Merge pull request #12059 from liewegas/wip-osd-omap-limits

osd: set server-side limits on omap get operations

Reviewed-by: xie xingguo <xie.xingguo@zte.com.cn>
Reviewed-by: Kefu Chai <kchai@redhat.com>
Reviewed-by: Samuel Just <sjust@redhat.com>
This commit is contained in:
Yuri Weinstein 2016-11-22 16:34:54 -08:00 committed by GitHub
commit 0b6169612b
3 changed files with 39 additions and 12 deletions

View File

@ -98,3 +98,12 @@ OSD: change the prefix from fs_* to os_*, because commit_latency and …
jerasure_neon, shec_generic, shec_sse3, shec_sse4, and shec_neon. If you use any
of these plugins directly you will see a warning in the mon log file.
Please switch to using just 'jerasure' or 'shec'.
* The librados omap get_keys and get_vals operations include a start key and a
limit on the number of keys to return. The OSD now imposes a configurable
limit on the number of keys and number of total bytes it will respond with,
which means that a librados user might get fewer keys than they asked for.
This is necessary to prevent careless users from requesting an unreasonable
amount of data from the cluster in a single operation. The new limits are
configured with `osd_max_omap_entries_per_request`, defaulting to 131,072, and
'osd_max_omap_bytes_per_request', defaulting to 4MB.

View File

@ -929,6 +929,9 @@ OPTION(osd_max_object_namespace_len, OPT_U32, 256) // max rados object namespace
OPTION(osd_max_attr_name_len, OPT_U32, 100) // max rados attr name len; cannot go higher than 100 chars for file system backends
OPTION(osd_max_attr_size, OPT_U64, 0)
OPTION(osd_max_omap_entries_per_request, OPT_U64, 131072)
OPTION(osd_max_omap_bytes_per_request, OPT_U64, 4<<20)
OPTION(osd_objectstore, OPT_STR, "filestore") // ObjectStore backend type
OPTION(osd_objectstore_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled
// Override maintaining compatibility with older OSDs

View File

@ -5755,22 +5755,29 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
tracepoint(osd, do_osd_op_pre_omapgetkeys, soid.oid.name.c_str(), soid.snap.val, "???", 0);
goto fail;
}
if (max_return > g_conf->osd_max_omap_entries_per_request) {
max_return = g_conf->osd_max_omap_entries_per_request;
}
tracepoint(osd, do_osd_op_pre_omapgetkeys, soid.oid.name.c_str(), soid.snap.val, start_after.c_str(), max_return);
set<string> out_set;
bufferlist bl;
uint32_t num = 0;
if (oi.is_omap()) {
ObjectMap::ObjectMapIterator iter = osd->store->get_omap_iterator(
coll, ghobject_t(soid)
);
assert(iter);
iter->upper_bound(start_after);
for (uint64_t i = 0;
i < max_return && iter->valid();
++i, iter->next(false)) {
out_set.insert(iter->key());
for (num = 0;
num < max_return &&
bl.length() < g_conf->osd_max_omap_bytes_per_request &&
iter->valid();
++num, iter->next(false)) {
::encode(iter->key(), bl);
}
} // else return empty out_set
::encode(out_set, osd_op.outdata);
::encode(num, osd_op.outdata);
osd_op.outdata.claim_append(bl);
ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10);
ctx->delta_stats.num_rd++;
}
@ -5792,9 +5799,13 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
tracepoint(osd, do_osd_op_pre_omapgetvals, soid.oid.name.c_str(), soid.snap.val, "???", 0, "???");
goto fail;
}
if (max_return > g_conf->osd_max_omap_entries_per_request) {
max_return = g_conf->osd_max_omap_entries_per_request;
}
tracepoint(osd, do_osd_op_pre_omapgetvals, soid.oid.name.c_str(), soid.snap.val, start_after.c_str(), max_return, filter_prefix.c_str());
map<string, bufferlist> out_set;
uint32_t num = 0;
bufferlist bl;
if (oi.is_omap()) {
ObjectMap::ObjectMapIterator iter = osd->store->get_omap_iterator(
coll, ghobject_t(soid)
@ -5805,15 +5816,19 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
iter->upper_bound(start_after);
if (filter_prefix > start_after) iter->lower_bound(filter_prefix);
for (uint64_t i = 0;
i < max_return && iter->valid() &&
for (num = 0;
num < max_return &&
bl.length() < g_conf->osd_max_omap_bytes_per_request &&
iter->valid() &&
iter->key().substr(0, filter_prefix.size()) == filter_prefix;
++i, iter->next(false)) {
++num, iter->next(false)) {
dout(20) << "Found key " << iter->key() << dendl;
out_set.insert(make_pair(iter->key(), iter->value()));
::encode(iter->key(), bl);
::encode(iter->value(), bl);
}
} // else return empty out_set
::encode(out_set, osd_op.outdata);
::encode(num, osd_op.outdata);
osd_op.outdata.claim_append(bl);
ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10);
ctx->delta_stats.num_rd++;
}