osd: remove magical tmap -> omap conversion

This is incomplete and unfortunately unusable in its current state:

 - it would only set USES_TMAP for old encoded object_info_t and tmapput,
   but would NOT set it for tmapup
 - a config option turned that off by default.

That means that the mds conversion from tmap -> omap won't be able to use
this because any existing cluster has tmap objects without the USES_TMAP
flag set.  And we don't want to unconditionally try a tmap->omap conversion
on omap operations because there are lots of existing librados users out
there that will be negatively impacted by this.

Instead, the MDS will need to handle this conversion on the client side by
reading either tmap or omap objects and explicitly rewriting the content
with omap (while truncating the tmap data away).

The auto-conversion function was added in v0.44.

Signed-off-by: Sage Weil <sage@inktank.com>
This commit is contained in:
Sage Weil 2013-10-01 14:21:40 -07:00
parent 1db0a572c1
commit a9e5323586
5 changed files with 17 additions and 123 deletions

View File

@ -35,3 +35,19 @@ v0.70
v0.71
~~~~~
* The MDS now disallows snapshots by default as they are not
considered stable. The command 'ceph mds set allow_snaps' will
enable them.
* For clusters that were created before v0.44 (pre-argonaut, Spring
2012) and store radosgw data, the auto-upgrade from TMAP to OMAP
objects has been disabled. Before upgrading, make sure that any
buckets created on pre-argonaut releases have been modified (e.g.,
by PUTing and then DELETEing an object from each bucket). Any
cluster created with argonaut (v0.48) or a later release or not
using radosgw never relied on the automatic conversion and is not
affected by this change.
* Any direct users of the 'tmap' portion of the librados API should be
aware that the automatic tmap -> omap conversion functionality has
been removed.

View File

@ -362,12 +362,6 @@ OPTION(mds_standby_replay, OPT_BOOL, false)
// If true, compact leveldb store on mount
OPTION(osd_compact_leveldb_on_mount, OPT_BOOL, false)
// If true, uses tmap as initial value for omap on old objects
OPTION(osd_auto_upgrade_tmap, OPT_BOOL, true)
// If true, TMAPPUT sets uses_tmap DEBUGGING ONLY
OPTION(osd_tmapput_sets_uses_tmap, OPT_BOOL, false)
// Maximum number of backfills to or from a single osd
OPTION(osd_max_backfills, OPT_U64, 10)

View File

@ -3307,11 +3307,6 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
}
if (cct->_conf->osd_tmapput_sets_uses_tmap) {
assert(cct->_conf->osd_auto_upgrade_tmap);
oi.set_flag(object_info_t::FLAG_USES_TMAP);
}
// write it
vector<OSDOp> nops(1);
OSDOp& newop = nops[0];
@ -3357,29 +3352,6 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
set<string> out_set;
if (oi.test_flag(object_info_t::FLAG_USES_TMAP) && cct->_conf->osd_auto_upgrade_tmap) {
dout(20) << "CEPH_OSD_OP_OMAPGETKEYS: "
<< " Reading " << oi.soid << " omap from tmap" << dendl;
map<string, bufferlist> vals;
bufferlist header;
int r = _get_tmap(ctx, &vals, &header);
if (r == 0) {
map<string, bufferlist>::iterator iter =
vals.upper_bound(start_after);
for (uint64_t i = 0;
i < max_return && iter != vals.end();
++i, iter++) {
out_set.insert(iter->first);
}
::encode(out_set, osd_op.outdata);
ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10);
ctx->delta_stats.num_rd++;
break;
}
dout(10) << "failed, reading from omap" << dendl;
// No valid tmap, use omap
}
{
ObjectMap::ObjectMapIterator iter = osd->store->get_omap_iterator(
coll, soid
@ -3415,30 +3387,6 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
map<string, bufferlist> out_set;
if (oi.test_flag(object_info_t::FLAG_USES_TMAP) && cct->_conf->osd_auto_upgrade_tmap) {
dout(20) << "CEPH_OSD_OP_OMAPGETVALS: "
<< " Reading " << oi.soid << " omap from tmap" << dendl;
map<string, bufferlist> vals;
bufferlist header;
int r = _get_tmap(ctx, &vals, &header);
if (r == 0) {
map<string, bufferlist>::iterator iter = vals.upper_bound(start_after);
if (filter_prefix > start_after) iter = vals.lower_bound(filter_prefix);
for (uint64_t i = 0;
i < max_return && iter != vals.end() &&
iter->first.substr(0, filter_prefix.size()) == filter_prefix;
++i, iter++) {
out_set.insert(*iter);
}
::encode(out_set, osd_op.outdata);
ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10);
ctx->delta_stats.num_rd++;
break;
}
// No valid tmap, use omap
dout(10) << "failed, reading from omap" << dendl;
}
{
ObjectMap::ObjectMapIterator iter = osd->store->get_omap_iterator(
coll, soid
@ -3466,19 +3414,6 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
case CEPH_OSD_OP_OMAPGETHEADER:
++ctx->num_read;
{
if (oi.test_flag(object_info_t::FLAG_USES_TMAP) && cct->_conf->osd_auto_upgrade_tmap) {
dout(20) << "CEPH_OSD_OP_OMAPGETHEADER: "
<< " Reading " << oi.soid << " omap from tmap" << dendl;
map<string, bufferlist> vals;
bufferlist header;
int r = _get_tmap(ctx, &vals, &header);
if (r == 0) {
osd_op.outdata.claim(header);
break;
}
// No valid tmap, fall through to omap
dout(10) << "failed, reading from omap" << dendl;
}
osd->store->omap_get_header(coll, soid, &osd_op.outdata);
ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10);
ctx->delta_stats.num_rd++;
@ -3497,28 +3432,6 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
goto fail;
}
map<string, bufferlist> out;
if (oi.test_flag(object_info_t::FLAG_USES_TMAP) && cct->_conf->osd_auto_upgrade_tmap) {
dout(20) << "CEPH_OSD_OP_OMAPGET: "
<< " Reading " << oi.soid << " omap from tmap" << dendl;
map<string, bufferlist> vals;
bufferlist header;
int r = _get_tmap(ctx, &vals, &header);
if (r == 0) {
for (set<string>::iterator iter = keys_to_get.begin();
iter != keys_to_get.end();
++iter) {
if (vals.count(*iter)) {
out.insert(*(vals.find(*iter)));
}
}
::encode(out, osd_op.outdata);
ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10);
ctx->delta_stats.num_rd++;
break;
}
// No valid tmap, use omap
dout(10) << "failed, reading from omap" << dendl;
}
osd->store->omap_get_values(coll, soid, keys_to_get, &out);
::encode(out, osd_op.outdata);
ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10);
@ -3596,9 +3509,6 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
case CEPH_OSD_OP_OMAPSETVALS:
++ctx->num_write;
{
if (oi.test_flag(object_info_t::FLAG_USES_TMAP) && cct->_conf->osd_auto_upgrade_tmap) {
_copy_up_tmap(ctx);
}
if (!obs.exists) {
ctx->delta_stats.num_objects++;
obs.exists = true;
@ -3626,9 +3536,6 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
case CEPH_OSD_OP_OMAPSETHEADER:
++ctx->num_write;
{
if (oi.test_flag(object_info_t::FLAG_USES_TMAP) && cct->_conf->osd_auto_upgrade_tmap) {
_copy_up_tmap(ctx);
}
if (!obs.exists) {
ctx->delta_stats.num_objects++;
obs.exists = true;
@ -3646,9 +3553,6 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
result = -ENOENT;
break;
}
if (oi.test_flag(object_info_t::FLAG_USES_TMAP) && cct->_conf->osd_auto_upgrade_tmap) {
_copy_up_tmap(ctx);
}
t.touch(coll, soid);
t.omap_clear(coll, soid);
ctx->delta_stats.num_wr++;
@ -3662,9 +3566,6 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
result = -ENOENT;
break;
}
if (oi.test_flag(object_info_t::FLAG_USES_TMAP) && cct->_conf->osd_auto_upgrade_tmap) {
_copy_up_tmap(ctx);
}
t.touch(coll, soid);
set<string> to_rm;
try {
@ -3838,22 +3739,6 @@ int ReplicatedPG::_get_tmap(OpContext *ctx,
return 0;
}
int ReplicatedPG::_copy_up_tmap(OpContext *ctx)
{
dout(20) << "copying up tmap for " << ctx->new_obs.oi.soid << dendl;
ctx->new_obs.oi.clear_flag(object_info_t::FLAG_USES_TMAP);
map<string, bufferlist> vals;
bufferlist header;
int r = _get_tmap(ctx, &vals, &header);
if (r < 0)
return 0;
ctx->op_t.omap_setkeys(coll, ctx->new_obs.oi.soid,
vals);
ctx->op_t.omap_setheader(coll, ctx->new_obs.oi.soid,
header);
return 0;
}
inline int ReplicatedPG::_delete_head(OpContext *ctx)
{
SnapSet& snapset = ctx->new_snapset;

View File

@ -856,7 +856,6 @@ private:
int _get_tmap(OpContext *ctx, map<string, bufferlist> *out,
bufferlist *header);
int _copy_up_tmap(OpContext *ctx);
int _delete_head(OpContext *ctx);
int _rollback_to(OpContext *ctx, ceph_osd_op& op);
public:

View File

@ -2101,7 +2101,7 @@ struct object_info_t {
FLAG_WHITEOUT = 1<<1, // object logically does not exist
FLAG_DIRTY = 1<<2, // object has been modified since last flushed or undirtied
// ...
FLAG_USES_TMAP = 1<<8,
FLAG_USES_TMAP = 1<<8, // deprecated; no longer used.
} flag_t;
flag_t flags;