osd/OSDMap: track newly removed and purged snaps in each epoch

Instead of maintaining a set of snapids that have been removed over
all time, instead note just the newly removed and newly purged snaps
in each OSDMap epoch.  This is easier to consume for both the Objecter
and OSD.

Also keep the interval of snaps that have been removed but not perged
in each OSDMap.  This is extremely convenient because it frees the OSDs
from having to maintain this information in parallel even when they may
not have PGs belonging to those pools.  These structures will be large
right when the ugprade happens and the pg_pool_t::removed_snaps gets copied
to the new fields, but in the steady state it will be relatively small,
reflecting only the set of snaps that are currently being removed.

This also provides convenient visibility into the "trimming snaps" set
that the cluster is working on.

Signed-off-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2017-10-11 14:17:39 -05:00
parent 37c4affa25
commit 553048fbf9
2 changed files with 164 additions and 6 deletions

View File

@ -472,10 +472,13 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const
ENCODE_START(8, 7, bl);
{
uint8_t v = 5;
uint8_t v = 6;
if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
v = 3;
}
if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
v = 5;
}
ENCODE_START(v, 1, bl); // client-usable data
::encode(fsid, bl);
::encode(epoch, bl);
@ -512,6 +515,10 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const
::encode(new_pg_upmap_items, bl);
::encode(old_pg_upmap_items, bl);
}
if (v >= 6) {
::encode(new_removed_snaps, bl);
::encode(new_purged_snaps, bl);
}
ENCODE_FINISH(bl); // client-usable data
}
@ -693,7 +700,7 @@ void OSDMap::Incremental::decode(bufferlist::iterator& bl)
return;
}
{
DECODE_START(5, bl); // client-usable data
DECODE_START(6, bl); // client-usable data
::decode(fsid, bl);
::decode(epoch, bl);
::decode(modified, bl);
@ -736,6 +743,10 @@ void OSDMap::Incremental::decode(bufferlist::iterator& bl)
::decode(new_pg_upmap_items, bl);
::decode(old_pg_upmap_items, bl);
}
if (struct_v >= 6) {
::decode(new_removed_snaps, bl);
::decode(new_purged_snaps, bl);
}
DECODE_FINISH(bl); // client-usable data
}
@ -1053,6 +1064,37 @@ void OSDMap::Incremental::dump(Formatter *f) const
f->dump_string("old", erasure_code_profile.c_str());
}
f->close_section();
f->open_array_section("new_removed_snaps");
for (auto& p : new_removed_snaps) {
f->open_object_section("pool");
f->dump_int("pool", p.first);
f->open_array_section("snaps");
for (auto q = p.second.begin(); q != p.second.end(); ++q) {
f->open_object_section("interval");
f->dump_unsigned("begin", q.get_start());
f->dump_unsigned("length", q.get_len());
f->close_section();
}
f->close_section();
f->close_section();
}
f->close_section();
f->open_array_section("new_purged_snaps");
for (auto& p : new_purged_snaps) {
f->open_object_section("pool");
f->dump_int("pool", p.first);
f->open_array_section("snaps");
for (auto q = p.second.begin(); q != p.second.end(); ++q) {
f->open_object_section("interval");
f->dump_unsigned("begin", q.get_start());
f->dump_unsigned("length", q.get_len());
f->close_section();
}
f->close_section();
f->close_section();
}
f->close_section();
}
void OSDMap::Incremental::generate_test_instances(list<Incremental*>& o)
@ -1602,6 +1644,24 @@ int OSDMap::apply_incremental(const Incremental &inc)
pools[pool.first].last_change = epoch;
}
new_removed_snaps = inc.new_removed_snaps;
new_purged_snaps = inc.new_purged_snaps;
for (auto p = new_removed_snaps.begin();
p != new_removed_snaps.end();
++p) {
removed_snaps_queue[p->first].union_of(p->second);
}
for (auto p = new_purged_snaps.begin();
p != new_purged_snaps.end();
++p) {
auto q = removed_snaps_queue.find(p->first);
assert(q != removed_snaps_queue.end());
q->second.subtract(p->second);
if (q->second.empty()) {
removed_snaps_queue.erase(q);
}
}
for (const auto &pname : inc.new_pool_names) {
auto pool_name_entry = pool_name.find(pname.first);
if (pool_name_entry != pool_name.end()) {
@ -2306,10 +2366,13 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const
ENCODE_START(8, 7, bl);
{
uint8_t v = 6;
uint8_t v = 7;
if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
v = 3;
}
if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
v = 6;
}
ENCODE_START(v, 1, bl); // client-usable data
// base
::encode(fsid, bl);
@ -2372,14 +2435,21 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const
if (v >= 6) {
::encode(crush_version, bl);
}
if (v >= 7) {
::encode(new_removed_snaps, bl);
::encode(new_purged_snaps, bl);
}
ENCODE_FINISH(bl); // client-usable data
}
{
uint8_t target_v = 5;
uint8_t target_v = 6;
if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
target_v = 1;
}
if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
target_v = 5;
}
ENCODE_START(target_v, 1, bl); // extended, osd-only data
::encode(osd_addrs->hb_back_addr, bl, features);
::encode(osd_info, bl);
@ -2407,6 +2477,9 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const
::encode(require_min_compat_client, bl);
::encode(require_osd_release, bl);
}
if (target_v >= 6) {
::encode(removed_snaps_queue, bl);
}
ENCODE_FINISH(bl); // osd-only data
}
@ -2582,7 +2655,7 @@ void OSDMap::decode(bufferlist::iterator& bl)
* Since we made it past that hurdle, we can use our normal paths.
*/
{
DECODE_START(6, bl); // client-usable data
DECODE_START(7, bl); // client-usable data
// base
::decode(fsid, bl);
::decode(epoch, bl);
@ -2640,11 +2713,15 @@ void OSDMap::decode(bufferlist::iterator& bl)
if (struct_v >= 6) {
::decode(crush_version, bl);
}
if (struct_v >= 7) {
::decode(new_removed_snaps, bl);
::decode(new_purged_snaps, bl);
}
DECODE_FINISH(bl); // client-usable data
}
{
DECODE_START(5, bl); // extended, osd-only data
DECODE_START(6, bl); // extended, osd-only data
::decode(osd_addrs->hb_back_addr, bl);
::decode(osd_info, bl);
::decode(blacklist, bl);
@ -2693,6 +2770,9 @@ void OSDMap::decode(bufferlist::iterator& bl)
require_osd_release = 0;
}
}
if (struct_v >= 6) {
::decode(removed_snaps_queue, bl);
}
DECODE_FINISH(bl); // osd-only data
}
@ -2882,6 +2962,52 @@ void OSDMap::dump(Formatter *f) const
f->close_section();
dump_erasure_code_profiles(erasure_code_profiles, f);
f->open_array_section("removed_snaps_queue");
for (auto& p : removed_snaps_queue) {
f->open_object_section("pool");
f->dump_int("pool", p.first);
f->open_array_section("snaps");
for (auto q = p.second.begin(); q != p.second.end(); ++q) {
f->open_object_section("interval");
f->dump_unsigned("begin", q.get_start());
f->dump_unsigned("length", q.get_len());
f->close_section();
}
f->close_section();
f->close_section();
}
f->close_section();
f->open_array_section("new_removed_snaps");
for (auto& p : new_removed_snaps) {
f->open_object_section("pool");
f->dump_int("pool", p.first);
f->open_array_section("snaps");
for (auto q = p.second.begin(); q != p.second.end(); ++q) {
f->open_object_section("interval");
f->dump_unsigned("begin", q.get_start());
f->dump_unsigned("length", q.get_len());
f->close_section();
}
f->close_section();
f->close_section();
}
f->close_section();
f->open_array_section("new_purged_snaps");
for (auto& p : new_purged_snaps) {
f->open_object_section("pool");
f->dump_int("pool", p.first);
f->open_array_section("snaps");
for (auto q = p.second.begin(); q != p.second.end(); ++q) {
f->open_object_section("interval");
f->dump_unsigned("begin", q.get_start());
f->dump_unsigned("length", q.get_len());
f->close_section();
}
f->close_section();
f->close_section();
}
f->close_section();
}
void OSDMap::generate_test_instances(list<OSDMap*>& o)
@ -2968,6 +3094,10 @@ void OSDMap::print_pools(ostream& out) const
if (!pool.second.removed_snaps.empty())
out << "\tremoved_snaps " << pool.second.removed_snaps << "\n";
auto p = removed_snaps_queue.find(pool.first);
if (p != removed_snaps_queue.end()) {
out << "\tremoved_snaps_queue " << p->second << "\n";
}
}
out << std::endl;
}

View File

@ -343,6 +343,10 @@ class OSDMap {
public:
MEMPOOL_CLASS_HELPERS();
typedef interval_set<
snapid_t,
mempool::osdmap::flat_map<snapid_t,snapid_t>> snap_interval_set_t;
class Incremental {
public:
MEMPOOL_CLASS_HELPERS();
@ -389,6 +393,8 @@ public:
mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t>> new_pg_upmap;
mempool::osdmap::map<pg_t,mempool::osdmap::vector<pair<int32_t,int32_t>>> new_pg_upmap_items;
mempool::osdmap::set<pg_t> old_pg_upmap, old_pg_upmap_items;
mempool::osdmap::map<int64_t, snap_interval_set_t> new_removed_snaps;
mempool::osdmap::map<int64_t, snap_interval_set_t> new_purged_snaps;
string cluster_snapshot;
@ -523,6 +529,15 @@ private:
mempool::osdmap::unordered_map<entity_addr_t,utime_t> blacklist;
/// queue of snaps to remove
mempool::osdmap::map<int64_t, snap_interval_set_t> removed_snaps_queue;
/// removed_snaps additions this epoch
mempool::osdmap::map<int64_t, snap_interval_set_t> new_removed_snaps;
/// removed_snaps removals this epoch
mempool::osdmap::map<int64_t, snap_interval_set_t> new_purged_snaps;
epoch_t cluster_snapshot_epoch;
string cluster_snapshot;
bool new_blacklist_entries;
@ -1140,6 +1155,19 @@ public:
return false;
}
const mempool::osdmap::map<int64_t,snap_interval_set_t>&
get_removed_snaps_queue() const {
return removed_snaps_queue;
}
const mempool::osdmap::map<int64_t,snap_interval_set_t>&
get_new_removed_snaps() const {
return new_removed_snaps;
}
const mempool::osdmap::map<int64_t,snap_interval_set_t>&
get_new_purged_snaps() const {
return new_purged_snaps;
}
int64_t lookup_pg_pool_name(const string& name) const {
auto p = name_pool.find(name);
if (p == name_pool.end())