mon: more forceful renumbering of legacy ruleset IDs

Previously, the rules were only modified in the trivial case,
so we continued to potentially have CRUSH maps with the
legacy ruleset functionality in use.

In order to ultimately remove rulesets entirely, we need
to do this more aggressively, renumbering all the rules
and then updating any pools as needed.

Signed-off-by: John Spray <john.spray@redhat.com>
This commit is contained in:
John Spray 2017-08-25 11:06:21 +01:00 committed by Sage Weil
parent 675483ecca
commit 71d4b2bed5
3 changed files with 62 additions and 68 deletions

View File

@ -13,7 +13,7 @@
#define dout_subsys ceph_subsys_crush
bool CrushWrapper::has_legacy_rulesets() const
bool CrushWrapper::has_legacy_rule_ids() const
{
for (unsigned i=0; i<crush->max_rules; i++) {
crush_rule *r = crush->rules[i];
@ -25,51 +25,17 @@ bool CrushWrapper::has_legacy_rulesets() const
return false;
}
int CrushWrapper::renumber_rules_by_ruleset()
std::map<int, int> CrushWrapper::renumber_rules()
{
int max_ruleset = 0;
std::map<int, int> result;
for (unsigned i=0; i<crush->max_rules; i++) {
crush_rule *r = crush->rules[i];
if (r && r->mask.ruleset >= max_ruleset) {
max_ruleset = r->mask.ruleset + 1;
if (r && r->mask.ruleset != i) {
result[r->mask.ruleset] = i;
r->mask.ruleset = i;
}
}
struct crush_rule **newrules =
(crush_rule**)calloc(1, max_ruleset * sizeof(crush_rule*));
for (unsigned i=0; i<crush->max_rules; i++) {
crush_rule *r = crush->rules[i];
if (!r)
continue;
if (newrules[r->mask.ruleset]) {
// collision, we can't do it.
free(newrules);
return -EINVAL;
}
newrules[r->mask.ruleset] = r;
}
// success, swap!
free(crush->rules);
crush->rules = newrules;
crush->max_rules = max_ruleset;
return 0;
}
bool CrushWrapper::has_multirule_rulesets() const
{
for (unsigned i=0; i<crush->max_rules; i++) {
crush_rule *r = crush->rules[i];
if (!r)
continue;
for (unsigned j=i+1; j<crush->max_rules; j++) {
crush_rule *s = crush->rules[j];
if (!s)
continue;
if (r->mask.ruleset == s->mask.ruleset)
return true;
}
}
return false;
return result;
}
bool CrushWrapper::has_non_straw2_buckets() const

View File

@ -120,14 +120,25 @@ public:
set_tunables_default();
}
/// true if any rule has a ruleset != the rule id
bool has_legacy_rulesets() const;
/**
* true if any rule has a rule id != its position in the array
*
* These indicate "ruleset" IDs that were created by older versions
* of Ceph. They are cleaned up in renumber_rules so that eventually
* we can remove the code for handling them.
*/
bool has_legacy_rule_ids() const;
/// fix rules whose ruleid != ruleset
int renumber_rules_by_ruleset();
/// true if any ruleset has more than 1 rule
bool has_multirule_rulesets() const;
/**
* fix rules whose ruleid != ruleset
*
* These rules were created in older versions of Ceph. The concept
* of a ruleset no longer exists.
*
* Return a map of old ID -> new ID. Caller must update OSDMap
* to use new IDs.
*/
std::map<int, int> renumber_rules();
/// true if any buckets that aren't straw2
bool has_non_straw2_buckets() const;
@ -1238,7 +1249,7 @@ public:
void finalize() {
assert(crush);
crush_finalize(crush);
have_uniform_rules = !has_legacy_rulesets();
have_uniform_rules = !has_legacy_rule_ids();
}
int update_device_class(int id, const string& class_name, const string& name, ostream *ss);
@ -1315,7 +1326,7 @@ public:
/**
* Return the lowest numbered ruleset of type `type`
*
* @returns a ruleset ID, or -1 if no matching rulesets found.
* @returns a ruleset ID, or -1 if no matching rules found.
*/
int find_first_ruleset(int type) const {
int result = -1;

View File

@ -543,23 +543,6 @@ void OSDMonitor::on_active()
void OSDMonitor::on_restart()
{
last_osd_report.clear();
if (mon->is_leader()) {
// fix ruleset != ruleid
if (osdmap.crush->has_legacy_rulesets() &&
!osdmap.crush->has_multirule_rulesets()) {
CrushWrapper newcrush;
_get_pending_crush(newcrush);
int r = newcrush.renumber_rules_by_ruleset();
if (r >= 0) {
dout(1) << __func__ << " crush map has ruleset != rule id; fixing" << dendl;
pending_inc.crush.clear();
newcrush.encode(pending_inc.crush, mon->get_quorum_con_features());
} else {
dout(10) << __func__ << " unable to renumber rules by ruleset" << dendl;
}
}
}
}
void OSDMonitor::on_shutdown()
@ -622,6 +605,40 @@ void OSDMonitor::create_pending()
<< pending_inc.new_nearfull_ratio << dendl;
}
}
// Rewrite CRUSH rule IDs if they are using legacy "ruleset"
// structure.
if (osdmap.crush->has_legacy_rule_ids()) {
CrushWrapper newcrush;
_get_pending_crush(newcrush);
// First, for all pools, work out which rule they really used
// by resolving ruleset to rule.
for (const auto &i : osdmap.get_pools()) {
const auto pool_id = i.first;
const auto &pool = i.second;
int new_rule_id = newcrush.find_rule(pool.crush_rule,
pool.type, pool.size);
dout(1) << __func__ << " rewriting pool "
<< osdmap.get_pool_name(pool_id) << " crush ruleset "
<< pool.crush_rule << " -> rule id " << new_rule_id << dendl;
if (pending_inc.new_pools.count(pool_id) == 0) {
pending_inc.new_pools[pool_id] = pool;
}
pending_inc.new_pools[pool_id].crush_rule = new_rule_id;
}
// Now, go ahead and renumber all the rules so that their
// rule_id field corresponds to their position in the array
auto old_to_new = newcrush.renumber_rules();
dout(1) << __func__ << " Rewrote " << old_to_new << " crush IDs:" << dendl;
for (const auto &i : old_to_new) {
dout(1) << __func__ << " " << i.first << " -> " << i.second << dendl;
}
pending_inc.crush.clear();
newcrush.encode(pending_inc.crush, mon->get_quorum_con_features());
}
}
creating_pgs_t
@ -7154,7 +7171,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
}
}
if (crush.has_legacy_rulesets()) {
if (crush.has_legacy_rule_ids()) {
err = -EINVAL;
ss << "crush maps with ruleset != ruleid are no longer allowed";
goto reply;