crush: fix preservation of shadow bucket ids

1- a decompiled and recompiled was parsing the class bucket ids but it
wasn't actually using them.
2- rebuild_roots_with_classes() was throwing out the old ids and assigning
new ids when the tree was rebuilt.

Fix by passing in a (potentially partial) class_bucket map into
populate_classes().  Take care to allocate new bucket ids that don't
collide with previously used ids.

Signed-off-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2017-08-03 18:05:08 -04:00
parent 007095b7ae
commit cb807b0006
5 changed files with 75 additions and 23 deletions

View File

@ -734,7 +734,7 @@ int CrushCompiler::parse_bucket(iter_t const& i)
}
for (auto &i : class_id)
crush.class_bucket[id][i.first] = i.second;
class_bucket[id][i.first] = i.second;
if (verbose) err << "bucket " << name << " (" << id << ") " << size << " items and weight "
<< (float)bucketweight / (float)0x10000 << std::endl;
@ -1084,7 +1084,7 @@ int CrushCompiler::parse_crush(iter_t const& i)
case crush_grammar::_crushrule:
if (!saw_rule) {
saw_rule = true;
crush.populate_classes();
crush.populate_classes(class_bucket);
}
r = parse_rule(p);
break;

View File

@ -53,6 +53,7 @@ class CrushCompiler {
map<int, unsigned> item_weight;
map<string, int> type_id;
map<string, int> rule_id;
std::map<int32_t, map<int32_t, int32_t> > class_bucket; // bucket id -> class id -> shadow bucket id
string string_node(node_t &node);
int int_node(node_t &node);

View File

@ -1398,8 +1398,16 @@ bool CrushWrapper::class_is_in_use(int class_id, ostream *ss)
return true;
}
int CrushWrapper::populate_classes()
int CrushWrapper::populate_classes(
const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket)
{
// build set of previous used shadow ids
set<int32_t> used_ids;
for (auto& p : old_class_bucket) {
for (auto& q : p.second) {
used_ids.insert(q.second);
}
}
set<int> roots;
find_nonshadow_roots(roots);
for (auto &r : roots) {
@ -1407,7 +1415,8 @@ int CrushWrapper::populate_classes()
continue;
for (auto &c : class_name) {
int clone;
int res = device_class_clone(r, c.first, &clone);
int res = device_class_clone(r, c.first, old_class_bucket, used_ids,
&clone);
if (res < 0)
return res;
}
@ -1856,7 +1865,11 @@ int CrushWrapper::remove_device_class(CephContext *cct, int id, ostream *ss)
return 0;
}
int CrushWrapper::device_class_clone(int original_id, int device_class, int *clone)
int CrushWrapper::device_class_clone(
int original_id, int device_class,
const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket,
const std::set<int32_t>& used_ids,
int *clone)
{
const char *item_name = get_item_name(original_id);
if (item_name == NULL)
@ -1870,15 +1883,13 @@ int CrushWrapper::device_class_clone(int original_id, int device_class, int *clo
return 0;
}
crush_bucket *original = get_bucket(original_id);
if (IS_ERR(original))
return -ENOENT;
assert(!IS_ERR(original));
crush_bucket *copy = crush_make_bucket(crush,
original->alg,
original->hash,
original->type,
0, NULL, NULL);
if(copy == NULL)
return -ENOMEM;
assert(copy);
for (unsigned i = 0; i < original->size; i++) {
int item = original->items[i];
int weight = crush_get_bucket_item_weight(original, i);
@ -1890,20 +1901,34 @@ int CrushWrapper::device_class_clone(int original_id, int device_class, int *clo
}
} else {
int child_copy_id;
int res = device_class_clone(item, device_class, &child_copy_id);
int res = device_class_clone(item, device_class, old_class_bucket,
used_ids, &child_copy_id);
if (res < 0)
return res;
crush_bucket *child_copy = get_bucket(child_copy_id);
if (IS_ERR(child_copy))
return -ENOENT;
assert(!IS_ERR(child_copy));
res = bucket_add_item(copy, child_copy_id, child_copy->weight);
if (res)
return res;
}
}
int res = crush_add_bucket(crush, 0, copy, clone);
int bno = 0;
if (old_class_bucket.count(original_id) &&
old_class_bucket.at(original_id).count(device_class)) {
bno = old_class_bucket.at(original_id).at(device_class);
} else {
// pick a new shadow bucket id that is not used by the current map
// *or* any previous shadow buckets.
bno = -1;
while (crush->buckets[-1-bno] ||
used_ids.count(bno)) {
--bno;
}
}
int res = crush_add_bucket(crush, bno, copy, clone);
if (res)
return res;
assert(!bno || bno == *clone);
res = set_item_class(*clone, device_class);
if (res < 0)
return res;
@ -1917,10 +1942,12 @@ int CrushWrapper::device_class_clone(int original_id, int device_class, int *clo
int CrushWrapper::rebuild_roots_with_classes()
{
std::map<int32_t, map<int32_t, int32_t> > old_class_bucket = class_bucket;
int r = trim_roots_with_class(false);
if (r < 0)
return r;
r = populate_classes();
class_bucket.clear();
r = populate_classes(old_class_bucket);
if (r < 0)
return r;
return trim_roots_with_class(true);

View File

@ -65,6 +65,7 @@ public:
std::map<int32_t, string> type_map; /* bucket/device type names */
std::map<int32_t, string> name_map; /* bucket/device names */
std::map<int32_t, string> rule_name_map;
std::map<int32_t, int32_t> class_map; /* item id -> class id */
std::map<int32_t, string> class_name; /* class id -> class name */
std::map<string, int32_t> class_rname; /* class name -> class id */
@ -1208,9 +1209,14 @@ public:
int update_device_class(int id, const string& class_name, const string& name, ostream *ss);
int remove_device_class(CephContext *cct, int id, ostream *ss);
int device_class_clone(int original, int device_class, int *clone);
int device_class_clone(
int original, int device_class,
const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket,
const std::set<int32_t>& used_ids,
int *clone);
bool class_is_in_use(int class_id, ostream *ss = nullptr);
int populate_classes();
int populate_classes(
const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket);
int rebuild_roots_with_classes();
/* remove unused roots generated for class devices */
int trim_roots_with_class(bool unused);

View File

@ -1110,7 +1110,11 @@ TEST(CrushWrapper, trim_roots_with_class) {
int root_id = c.get_item_id("default");
int clone_id;
ASSERT_EQ(c.device_class_clone(root_id, cl, &clone_id), 0);
map<int32_t, map<int32_t, int32_t>> old_class_bucket;
set<int32_t> used_ids;
ASSERT_EQ(c.device_class_clone(root_id, cl, old_class_bucket, used_ids,
&clone_id), 0);
ASSERT_TRUE(c.name_exists("default"));
ASSERT_TRUE(c.name_exists("default~ssd"));
@ -1144,9 +1148,12 @@ TEST(CrushWrapper, device_class_clone) {
c.reweight(g_ceph_context);
map<int32_t, map<int32_t, int32_t>> old_class_bucket;
set<int32_t> used_ids;
int root_id = c.get_item_id("default");
int clone_id;
ASSERT_EQ(c.device_class_clone(root_id, cl, &clone_id), 0);
ASSERT_EQ(c.device_class_clone(root_id, cl, old_class_bucket, used_ids,
&clone_id), 0);
ASSERT_TRUE(c.name_exists("default~ssd"));
ASSERT_EQ(clone_id, c.get_item_id("default~ssd"));
ASSERT_TRUE(c.subtree_contains(clone_id, item));
@ -1156,11 +1163,14 @@ TEST(CrushWrapper, device_class_clone) {
ASSERT_EQ(c.get_item_weightf(clone_id), 1);
// cloning again does nothing and returns the existing one
int other_clone_id;
ASSERT_EQ(c.device_class_clone(root_id, cl, &other_clone_id), 0);
ASSERT_EQ(c.device_class_clone(root_id, cl, old_class_bucket, used_ids,
&other_clone_id), 0);
ASSERT_EQ(clone_id, other_clone_id);
// invalid arguments
ASSERT_EQ(c.device_class_clone(12345, cl, &other_clone_id), -ECHILD);
ASSERT_EQ(c.device_class_clone(root_id, 12345, &other_clone_id), -EBADF);
ASSERT_EQ(c.device_class_clone(12345, cl, old_class_bucket, used_ids,
&other_clone_id), -ECHILD);
ASSERT_EQ(c.device_class_clone(root_id, 12345, old_class_bucket, used_ids,
&other_clone_id), -EBADF);
}
TEST(CrushWrapper, split_id_class) {
@ -1177,9 +1187,12 @@ TEST(CrushWrapper, split_id_class) {
int class_id = c.get_or_create_class_id("ssd");
c.class_map[item] = class_id;
map<int32_t, map<int32_t, int32_t>> old_class_bucket;
set<int32_t> used_ids;
int item_id = c.get_item_id("default");
int clone_id;
ASSERT_EQ(c.device_class_clone(item_id, class_id, &clone_id), 0);
ASSERT_EQ(c.device_class_clone(item_id, class_id, old_class_bucket, used_ids,
&clone_id), 0);
int retrieved_item_id;
int retrieved_class_id;
ASSERT_EQ(c.split_id_class(clone_id, &retrieved_item_id, &retrieved_class_id), 0);
@ -1205,10 +1218,15 @@ TEST(CrushWrapper, populate_and_cleanup_classes) {
int class_id = c.get_or_create_class_id("ssd");
c.class_map[item] = class_id;
ASSERT_EQ(c.populate_classes(), 0);
map<int32_t, map<int32_t, int32_t>> old_class_bucket;
ASSERT_EQ(c.populate_classes(old_class_bucket), 0);
ASSERT_TRUE(c.name_exists("default~ssd"));
old_class_bucket = c.class_bucket;
ASSERT_EQ(c.populate_classes(old_class_bucket), 0);
ASSERT_EQ(old_class_bucket, c.class_bucket);
c.class_bucket.clear();
ASSERT_EQ(c.cleanup_classes(), 0);
ASSERT_FALSE(c.name_exists("default~ssd"));