osd/OSDMap: track newly removed and purged snaps in each epoch

Instead of maintaining a set of snapids that have been removed over
all time, instead note just the newly removed and newly purged snaps
in each OSDMap epoch.  This is easier to consume for both the Objecter
and OSD.

Also keep the interval of snaps that have been removed but not perged
in each OSDMap.  This is extremely convenient because it frees the OSDs
from having to maintain this information in parallel even when they may
not have PGs belonging to those pools.  These structures will be large
right when the ugprade happens and the pg_pool_t::removed_snaps gets copied
to the new fields, but in the steady state it will be relatively small,
reflecting only the set of snaps that are currently being removed.

This also provides convenient visibility into the "trimming snaps" set
that the cluster is working on.

Signed-off-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2017-10-11 14:17:39 -05:00
parent 37c4affa25
commit 553048fbf9
2 changed files with 164 additions and 6 deletions

View File

@ -472,10 +472,13 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const
ENCODE_START(8, 7, bl); ENCODE_START(8, 7, bl);
{ {
uint8_t v = 5; uint8_t v = 6;
if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) { if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
v = 3; v = 3;
} }
if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
v = 5;
}
ENCODE_START(v, 1, bl); // client-usable data ENCODE_START(v, 1, bl); // client-usable data
::encode(fsid, bl); ::encode(fsid, bl);
::encode(epoch, bl); ::encode(epoch, bl);
@ -512,6 +515,10 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const
::encode(new_pg_upmap_items, bl); ::encode(new_pg_upmap_items, bl);
::encode(old_pg_upmap_items, bl); ::encode(old_pg_upmap_items, bl);
} }
if (v >= 6) {
::encode(new_removed_snaps, bl);
::encode(new_purged_snaps, bl);
}
ENCODE_FINISH(bl); // client-usable data ENCODE_FINISH(bl); // client-usable data
} }
@ -693,7 +700,7 @@ void OSDMap::Incremental::decode(bufferlist::iterator& bl)
return; return;
} }
{ {
DECODE_START(5, bl); // client-usable data DECODE_START(6, bl); // client-usable data
::decode(fsid, bl); ::decode(fsid, bl);
::decode(epoch, bl); ::decode(epoch, bl);
::decode(modified, bl); ::decode(modified, bl);
@ -736,6 +743,10 @@ void OSDMap::Incremental::decode(bufferlist::iterator& bl)
::decode(new_pg_upmap_items, bl); ::decode(new_pg_upmap_items, bl);
::decode(old_pg_upmap_items, bl); ::decode(old_pg_upmap_items, bl);
} }
if (struct_v >= 6) {
::decode(new_removed_snaps, bl);
::decode(new_purged_snaps, bl);
}
DECODE_FINISH(bl); // client-usable data DECODE_FINISH(bl); // client-usable data
} }
@ -1053,6 +1064,37 @@ void OSDMap::Incremental::dump(Formatter *f) const
f->dump_string("old", erasure_code_profile.c_str()); f->dump_string("old", erasure_code_profile.c_str());
} }
f->close_section(); f->close_section();
f->open_array_section("new_removed_snaps");
for (auto& p : new_removed_snaps) {
f->open_object_section("pool");
f->dump_int("pool", p.first);
f->open_array_section("snaps");
for (auto q = p.second.begin(); q != p.second.end(); ++q) {
f->open_object_section("interval");
f->dump_unsigned("begin", q.get_start());
f->dump_unsigned("length", q.get_len());
f->close_section();
}
f->close_section();
f->close_section();
}
f->close_section();
f->open_array_section("new_purged_snaps");
for (auto& p : new_purged_snaps) {
f->open_object_section("pool");
f->dump_int("pool", p.first);
f->open_array_section("snaps");
for (auto q = p.second.begin(); q != p.second.end(); ++q) {
f->open_object_section("interval");
f->dump_unsigned("begin", q.get_start());
f->dump_unsigned("length", q.get_len());
f->close_section();
}
f->close_section();
f->close_section();
}
f->close_section();
} }
void OSDMap::Incremental::generate_test_instances(list<Incremental*>& o) void OSDMap::Incremental::generate_test_instances(list<Incremental*>& o)
@ -1602,6 +1644,24 @@ int OSDMap::apply_incremental(const Incremental &inc)
pools[pool.first].last_change = epoch; pools[pool.first].last_change = epoch;
} }
new_removed_snaps = inc.new_removed_snaps;
new_purged_snaps = inc.new_purged_snaps;
for (auto p = new_removed_snaps.begin();
p != new_removed_snaps.end();
++p) {
removed_snaps_queue[p->first].union_of(p->second);
}
for (auto p = new_purged_snaps.begin();
p != new_purged_snaps.end();
++p) {
auto q = removed_snaps_queue.find(p->first);
assert(q != removed_snaps_queue.end());
q->second.subtract(p->second);
if (q->second.empty()) {
removed_snaps_queue.erase(q);
}
}
for (const auto &pname : inc.new_pool_names) { for (const auto &pname : inc.new_pool_names) {
auto pool_name_entry = pool_name.find(pname.first); auto pool_name_entry = pool_name.find(pname.first);
if (pool_name_entry != pool_name.end()) { if (pool_name_entry != pool_name.end()) {
@ -2306,10 +2366,13 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const
ENCODE_START(8, 7, bl); ENCODE_START(8, 7, bl);
{ {
uint8_t v = 6; uint8_t v = 7;
if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) { if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
v = 3; v = 3;
} }
if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
v = 6;
}
ENCODE_START(v, 1, bl); // client-usable data ENCODE_START(v, 1, bl); // client-usable data
// base // base
::encode(fsid, bl); ::encode(fsid, bl);
@ -2372,14 +2435,21 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const
if (v >= 6) { if (v >= 6) {
::encode(crush_version, bl); ::encode(crush_version, bl);
} }
if (v >= 7) {
::encode(new_removed_snaps, bl);
::encode(new_purged_snaps, bl);
}
ENCODE_FINISH(bl); // client-usable data ENCODE_FINISH(bl); // client-usable data
} }
{ {
uint8_t target_v = 5; uint8_t target_v = 6;
if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) { if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
target_v = 1; target_v = 1;
} }
if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
target_v = 5;
}
ENCODE_START(target_v, 1, bl); // extended, osd-only data ENCODE_START(target_v, 1, bl); // extended, osd-only data
::encode(osd_addrs->hb_back_addr, bl, features); ::encode(osd_addrs->hb_back_addr, bl, features);
::encode(osd_info, bl); ::encode(osd_info, bl);
@ -2407,6 +2477,9 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const
::encode(require_min_compat_client, bl); ::encode(require_min_compat_client, bl);
::encode(require_osd_release, bl); ::encode(require_osd_release, bl);
} }
if (target_v >= 6) {
::encode(removed_snaps_queue, bl);
}
ENCODE_FINISH(bl); // osd-only data ENCODE_FINISH(bl); // osd-only data
} }
@ -2582,7 +2655,7 @@ void OSDMap::decode(bufferlist::iterator& bl)
* Since we made it past that hurdle, we can use our normal paths. * Since we made it past that hurdle, we can use our normal paths.
*/ */
{ {
DECODE_START(6, bl); // client-usable data DECODE_START(7, bl); // client-usable data
// base // base
::decode(fsid, bl); ::decode(fsid, bl);
::decode(epoch, bl); ::decode(epoch, bl);
@ -2640,11 +2713,15 @@ void OSDMap::decode(bufferlist::iterator& bl)
if (struct_v >= 6) { if (struct_v >= 6) {
::decode(crush_version, bl); ::decode(crush_version, bl);
} }
if (struct_v >= 7) {
::decode(new_removed_snaps, bl);
::decode(new_purged_snaps, bl);
}
DECODE_FINISH(bl); // client-usable data DECODE_FINISH(bl); // client-usable data
} }
{ {
DECODE_START(5, bl); // extended, osd-only data DECODE_START(6, bl); // extended, osd-only data
::decode(osd_addrs->hb_back_addr, bl); ::decode(osd_addrs->hb_back_addr, bl);
::decode(osd_info, bl); ::decode(osd_info, bl);
::decode(blacklist, bl); ::decode(blacklist, bl);
@ -2693,6 +2770,9 @@ void OSDMap::decode(bufferlist::iterator& bl)
require_osd_release = 0; require_osd_release = 0;
} }
} }
if (struct_v >= 6) {
::decode(removed_snaps_queue, bl);
}
DECODE_FINISH(bl); // osd-only data DECODE_FINISH(bl); // osd-only data
} }
@ -2882,6 +2962,52 @@ void OSDMap::dump(Formatter *f) const
f->close_section(); f->close_section();
dump_erasure_code_profiles(erasure_code_profiles, f); dump_erasure_code_profiles(erasure_code_profiles, f);
f->open_array_section("removed_snaps_queue");
for (auto& p : removed_snaps_queue) {
f->open_object_section("pool");
f->dump_int("pool", p.first);
f->open_array_section("snaps");
for (auto q = p.second.begin(); q != p.second.end(); ++q) {
f->open_object_section("interval");
f->dump_unsigned("begin", q.get_start());
f->dump_unsigned("length", q.get_len());
f->close_section();
}
f->close_section();
f->close_section();
}
f->close_section();
f->open_array_section("new_removed_snaps");
for (auto& p : new_removed_snaps) {
f->open_object_section("pool");
f->dump_int("pool", p.first);
f->open_array_section("snaps");
for (auto q = p.second.begin(); q != p.second.end(); ++q) {
f->open_object_section("interval");
f->dump_unsigned("begin", q.get_start());
f->dump_unsigned("length", q.get_len());
f->close_section();
}
f->close_section();
f->close_section();
}
f->close_section();
f->open_array_section("new_purged_snaps");
for (auto& p : new_purged_snaps) {
f->open_object_section("pool");
f->dump_int("pool", p.first);
f->open_array_section("snaps");
for (auto q = p.second.begin(); q != p.second.end(); ++q) {
f->open_object_section("interval");
f->dump_unsigned("begin", q.get_start());
f->dump_unsigned("length", q.get_len());
f->close_section();
}
f->close_section();
f->close_section();
}
f->close_section();
} }
void OSDMap::generate_test_instances(list<OSDMap*>& o) void OSDMap::generate_test_instances(list<OSDMap*>& o)
@ -2968,6 +3094,10 @@ void OSDMap::print_pools(ostream& out) const
if (!pool.second.removed_snaps.empty()) if (!pool.second.removed_snaps.empty())
out << "\tremoved_snaps " << pool.second.removed_snaps << "\n"; out << "\tremoved_snaps " << pool.second.removed_snaps << "\n";
auto p = removed_snaps_queue.find(pool.first);
if (p != removed_snaps_queue.end()) {
out << "\tremoved_snaps_queue " << p->second << "\n";
}
} }
out << std::endl; out << std::endl;
} }

View File

@ -343,6 +343,10 @@ class OSDMap {
public: public:
MEMPOOL_CLASS_HELPERS(); MEMPOOL_CLASS_HELPERS();
typedef interval_set<
snapid_t,
mempool::osdmap::flat_map<snapid_t,snapid_t>> snap_interval_set_t;
class Incremental { class Incremental {
public: public:
MEMPOOL_CLASS_HELPERS(); MEMPOOL_CLASS_HELPERS();
@ -389,6 +393,8 @@ public:
mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t>> new_pg_upmap; mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t>> new_pg_upmap;
mempool::osdmap::map<pg_t,mempool::osdmap::vector<pair<int32_t,int32_t>>> new_pg_upmap_items; mempool::osdmap::map<pg_t,mempool::osdmap::vector<pair<int32_t,int32_t>>> new_pg_upmap_items;
mempool::osdmap::set<pg_t> old_pg_upmap, old_pg_upmap_items; mempool::osdmap::set<pg_t> old_pg_upmap, old_pg_upmap_items;
mempool::osdmap::map<int64_t, snap_interval_set_t> new_removed_snaps;
mempool::osdmap::map<int64_t, snap_interval_set_t> new_purged_snaps;
string cluster_snapshot; string cluster_snapshot;
@ -523,6 +529,15 @@ private:
mempool::osdmap::unordered_map<entity_addr_t,utime_t> blacklist; mempool::osdmap::unordered_map<entity_addr_t,utime_t> blacklist;
/// queue of snaps to remove
mempool::osdmap::map<int64_t, snap_interval_set_t> removed_snaps_queue;
/// removed_snaps additions this epoch
mempool::osdmap::map<int64_t, snap_interval_set_t> new_removed_snaps;
/// removed_snaps removals this epoch
mempool::osdmap::map<int64_t, snap_interval_set_t> new_purged_snaps;
epoch_t cluster_snapshot_epoch; epoch_t cluster_snapshot_epoch;
string cluster_snapshot; string cluster_snapshot;
bool new_blacklist_entries; bool new_blacklist_entries;
@ -1140,6 +1155,19 @@ public:
return false; return false;
} }
const mempool::osdmap::map<int64_t,snap_interval_set_t>&
get_removed_snaps_queue() const {
return removed_snaps_queue;
}
const mempool::osdmap::map<int64_t,snap_interval_set_t>&
get_new_removed_snaps() const {
return new_removed_snaps;
}
const mempool::osdmap::map<int64_t,snap_interval_set_t>&
get_new_purged_snaps() const {
return new_purged_snaps;
}
int64_t lookup_pg_pool_name(const string& name) const { int64_t lookup_pg_pool_name(const string& name) const {
auto p = name_pool.find(name); auto p = name_pool.find(name);
if (p == name_pool.end()) if (p == name_pool.end())