OSDMap: add a CEPH_FEATURE_OSDMAP_ENC feature, and use new encoding

Bring our OSDMap encoding into the modern Ceph world! :) This is
fairly straightforward, but has a few rough edges:
Previously we had a "struct_v" which went at the beginning of the
OSDMap encoding, and then later on an ev "extended version" which
was used to store the more-frequently-changed OSDMap pieces. There
was no size information stored explicitly to let clients skip this,
but osd maps were always encoded into their own bufferlist before
being sent to clients, which had the same effect.
We now use the modern ENCODE_START three times:
1) for the overall OSDMap encoding,
2) for the client-usable portion of the map,
3) for the "extended" portion of the map

This will let us independently rev everything, which may come in
useful if we want to (for instance) add a "monitor" portion to the
map that the OSDs don't care about. It also makes adding new
client information a lot easier since older clients will still
be able to decode the map as a whole.

We may want to merge this OSDMAP_ENC feature with one of the others
we are creating during this cycle, since they're all very closely
related. That will also let us protect more naturally against old
clients getting a map they need to understand but can't (because
we only need the new map features-to-come when used with erasure-encoded
PGs, etc).

Signed-off-by: Greg Farnum <greg@inktank.com>
This commit is contained in:
Greg Farnum 2013-12-12 15:35:23 -08:00
parent 2646d5edb1
commit 3d7c69fb09
3 changed files with 124 additions and 3 deletions

View File

@ -44,6 +44,7 @@
#define CEPH_FEATURE_EXPORT_PEER (1ULL<<37)
#define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38)
#define CEPH_FEATURE_OSD_TMAP2OMAP (1ULL<<38) /* overlap with EC */
#define CEPH_FEATURE_OSDMAP_ENC (1ULL<<39) /* supports new-style OSDMap encoding */
/*
* The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
@ -110,6 +111,7 @@ static inline unsigned long long ceph_sanitize_features(unsigned long long f) {
CEPH_FEATURE_CRUSH_V2 | \
CEPH_FEATURE_EXPORT_PEER | \
CEPH_FEATURE_OSD_ERASURE_CODES | \
CEPH_FEATURE_OSDMAP_ENC | \
0ULL)
#define CEPH_FEATURES_SUPPORTED_DEFAULT CEPH_FEATURES_ALL

View File

@ -1284,7 +1284,7 @@ void OSDMap::encode_client_old(bufferlist& bl) const
::encode(cbl, bl);
}
void OSDMap::encode(bufferlist& bl, uint64_t features) const
void OSDMap::encode_classic(bufferlist& bl, uint64_t features) const
{
if ((features & CEPH_FEATURE_PGID64) == 0) {
encode_client_old(bl);
@ -1332,13 +1332,67 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const
::encode(osd_addrs->hb_front_addr, bl);
}
void OSDMap::encode(bufferlist& bl, uint64_t features) const
{
if ((features & CEPH_FEATURE_OSDMAP_ENC) == 0) {
encode_classic(bl, features);
return;
}
// meta-encoding: how we include client-used and osd-specific data
ENCODE_START(7, 7, bl);
{
ENCODE_START(1, 1, bl); // client-usable data
// base
::encode(fsid, bl);
::encode(epoch, bl);
::encode(created, bl);
::encode(modified, bl);
::encode(pools, bl, features);
::encode(pool_name, bl);
::encode(pool_max, bl);
::encode(flags, bl);
::encode(max_osd, bl);
::encode(osd_state, bl);
::encode(osd_weight, bl);
::encode(osd_addrs->client_addr, bl);
::encode(*pg_temp, bl);
// crush
bufferlist cbl;
crush->encode(cbl);
::encode(cbl, bl);
ENCODE_FINISH(bl); // client-usable data
}
{
ENCODE_START(1, 1, bl); // extended, osd-only data
::encode(osd_addrs->hb_back_addr, bl);
::encode(osd_info, bl);
::encode(blacklist, bl);
::encode(osd_addrs->cluster_addr, bl);
::encode(cluster_snapshot_epoch, bl);
::encode(cluster_snapshot, bl);
::encode(*osd_uuid, bl);
::encode(osd_xinfo, bl);
::encode(osd_addrs->hb_front_addr, bl);
ENCODE_FINISH(bl); // osd-only data
}
ENCODE_FINISH(bl); // meta-encoding wrapper
}
void OSDMap::decode(bufferlist& bl)
{
bufferlist::iterator p = bl.begin();
decode(p);
}
void OSDMap::decode(bufferlist::iterator& p)
void OSDMap::decode_classic(bufferlist::iterator& p)
{
__u32 n, t;
__u16 v;
@ -1452,7 +1506,70 @@ void OSDMap::decode(bufferlist::iterator& p)
calc_num_osds();
}
void OSDMap::decode(bufferlist::iterator& bl)
{
/**
* Older encodings of the OSDMap had a single struct_v which
* covered the whole encoding, and was prior to our modern
* stuff which includes a compatv and a size. So if we see
* a struct_v < 7, we must rewind to the beginning and use our
* classic decoder.
*/
DECODE_START_LEGACY_COMPAT_LEN(7, 7, 7, bl); // wrapper
if (struct_v < 7) {
int struct_v_size = sizeof(struct_v);
bl.advance(-struct_v_size);
decode_classic(bl);
return;
}
/**
* Since we made it past that hurdle, we can use our normal paths.
*/
{
DECODE_START(1, bl); // client-usable data
// base
::decode(fsid, bl);
::decode(epoch, bl);
::decode(created, bl);
::decode(modified, bl);
::decode(pools, bl);
::decode(pool_name, bl);
::decode(pool_max, bl);
::decode(flags, bl);
::decode(max_osd, bl);
::decode(osd_state, bl);
::decode(osd_weight, bl);
::decode(osd_addrs->client_addr, bl);
::decode(*pg_temp, bl);
// crush
bufferlist cbl;
::decode(cbl, bl);
bufferlist::iterator cblp = cbl.begin();
crush->decode(cblp);
DECODE_FINISH(bl); // client-usable data
}
{
DECODE_START(1, bl); // extended, osd-only data
::decode(osd_addrs->hb_back_addr, bl);
::decode(osd_info, bl);
::decode(blacklist, bl);
::decode(osd_addrs->cluster_addr, bl);
::decode(cluster_snapshot_epoch, bl);
::decode(cluster_snapshot, bl);
::decode(*osd_uuid, bl);
::decode(osd_xinfo, bl);
::decode(osd_addrs->hb_front_addr, bl);
DECODE_FINISH(bl); // osd-only data
}
DECODE_FINISH(bl); // wrapper
}
void OSDMap::dump_json(ostream& out) const
{

View File

@ -463,10 +463,12 @@ private:
// serialize, unserialize
private:
void encode_client_old(bufferlist& bl) const;
void encode_classic(bufferlist& bl, uint64_t features) const;
void decode_classic(bufferlist::iterator& p);
public:
void encode(bufferlist& bl, uint64_t features=CEPH_FEATURES_ALL) const;
void decode(bufferlist& bl);
void decode(bufferlist::iterator& p);
void decode(bufferlist::iterator& bl);
/**** mapping facilities ****/