Merge pull request #32636 from ivancich/wip-avoid-0-length-ordered-list-result

rgw: address 0-length listing results when non-vis entries dominate

Reviewed-by: Matt Benjamin <mbenjamin@redhat.com>
This commit is contained in:
J. Eric Ivancich 2020-02-13 14:04:05 -05:00 committed by GitHub
commit d5788d66a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 124 additions and 34 deletions

View File

@ -6749,21 +6749,30 @@ next:
formatter->open_object_section("result");
formatter->dump_string("bucket", bucket_name);
formatter->open_array_section("objects");
constexpr uint32_t NUM_ENTRIES = 1000;
uint16_t expansion_factor = 1;
while (is_truncated) {
RGWRados::ent_map_t result;
int r = store->getRados()->cls_bucket_list_ordered(
bucket_info, RGW_NO_SHARD,
marker, empty_prefix, empty_delimiter,
1000, true,
NUM_ENTRIES, true, expansion_factor,
result, &is_truncated, &cls_filtered, &marker,
null_yield,
rgw_bucket_object_check_filter);
if (r < 0 && r != -ENOENT) {
cerr << "ERROR: failed operation r=" << r << std::endl;
} else if (r == -ENOENT) {
break;
}
if (r == -ENOENT)
break;
if (result.size() < NUM_ENTRIES / 8) {
++expansion_factor;
} else if (result.size() > NUM_ENTRIES * 7 / 8 &&
expansion_factor > 1) {
--expansion_factor;
}
for (auto iter = result.begin(); iter != result.end(); ++iter) {
rgw_obj_key key = iter->second.key;

View File

@ -57,6 +57,10 @@
#define BUCKET_TAG_TIMEOUT 30
// default number of entries to list with each bucket listing call
// (use marker to bridge between calls)
static constexpr size_t listing_max_entries = 1000;
/*
* The tenant_name is always returned on purpose. May be empty, of course.
@ -1015,20 +1019,29 @@ int RGWBucket::check_object_index(RGWBucketAdminOpState& op_state,
Formatter *formatter = flusher.get_formatter();
formatter->open_object_section("objects");
uint16_t expansion_factor = 1;
while (is_truncated) {
RGWRados::ent_map_t result;
result.reserve(listing_max_entries);
int r = store->getRados()->cls_bucket_list_ordered(
bucket_info, RGW_NO_SHARD, marker, prefix, empty_delimiter,
listing_max_entries, true, result, &is_truncated, &cls_filtered,
&marker, y, rgw_bucket_object_check_filter);
listing_max_entries, true, expansion_factor,
result, &is_truncated, &cls_filtered, &marker,
y, rgw_bucket_object_check_filter);
if (r == -ENOENT) {
break;
} else if (r < 0 && r != -ENOENT) {
set_err_msg(err_msg, "ERROR: failed operation r=" + cpp_strerror(-r));
}
if (result.size() < listing_max_entries / 8) {
++expansion_factor;
} else if (result.size() > listing_max_entries * 7 / 8 &&
expansion_factor > 1) {
--expansion_factor;
}
dump_bucket_index(result, formatter);
flusher.flush();
}

View File

@ -24,9 +24,6 @@
#include "services/svc_bucket_sync.h"
static constexpr size_t listing_max_entries = 1000;
// define as static when RGWBucket implementation completes
extern void rgw_get_buckets_obj(const rgw_user& user_id, string& buckets_obj_id);

View File

@ -5,6 +5,8 @@
#include <errno.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sstream>
#include <boost/algorithm/string.hpp>
#include <string_view>
@ -1774,10 +1776,25 @@ int RGWRados::Bucket::List::list_objects_ordered(
}
}
constexpr int allowed_read_attempts = 2;
for (int attempt = 0; attempt < allowed_read_attempts; ++attempt) {
rgw_obj_index_key prev_marker;
uint16_t attempt = 0;
while (true) {
ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ <<
" beginning attempt=" << ++attempt << dendl;
// this loop is generally expected only to have a single
// iteration; see bottom of loop for early exit
// iteration; the standard exit is at the bottom of the loop, but
// there's an error condition emergency exit as well
if (attempt > 1 && !(prev_marker < cur_marker)) {
// we've failed to make forward progress
ldout(cct, 0) << "RGWRados::Bucket::List::" << __func__ <<
": ERROR marker failed to make forward progress; attempt=" << attempt <<
", prev_marker=" << prev_marker <<
", cur_marker=" << cur_marker << dendl;
break;
}
prev_marker = cur_marker;
ent_map_t ent_map;
ent_map.reserve(read_ahead);
@ -1788,6 +1805,7 @@ int RGWRados::Bucket::List::list_objects_ordered(
params.delim,
read_ahead + 1 - count,
params.list_versions,
attempt,
ent_map,
&truncated,
&cls_filtered,
@ -1800,9 +1818,11 @@ int RGWRados::Bucket::List::list_objects_ordered(
for (auto eiter = ent_map.begin(); eiter != ent_map.end(); ++eiter) {
rgw_bucket_dir_entry& entry = eiter->second;
rgw_obj_index_key index_key = entry.key;
rgw_obj_key obj(index_key);
ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ <<
" considering entry " << entry.key << dendl;
/* note that parse_raw_oid() here will not set the correct
* object's instance, as rgw_obj_index_key encodes that
* separately. We don't need to set the instance because it's
@ -1816,12 +1836,12 @@ int RGWRados::Bucket::List::list_objects_ordered(
continue;
}
bool check_ns = (obj.ns == params.ns);
bool matched_ns = (obj.ns == params.ns);
if (!params.list_versions && !entry.is_visible()) {
continue;
}
if (params.enforce_ns && !check_ns) {
if (params.enforce_ns && !matched_ns) {
if (!params.ns.empty()) {
/* we've iterated past the namespace we're searching -- done now */
truncated = false;
@ -1914,6 +1934,9 @@ int RGWRados::Bucket::List::list_objects_ordered(
goto done;
}
ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ <<
" adding entry " << entry.key << " to result" << dendl;
result->emplace_back(std::move(entry));
count++;
} // eiter for loop
@ -1942,15 +1965,23 @@ int RGWRados::Bucket::List::list_objects_ordered(
}
} // if older osd didn't do delimiter filtering
// if we finished listing, or if we're returning at least half the
// requested entries, that's enough; S3 and swift protocols allow
// returning fewer than max entries
if (!truncated || count >= max / 2) {
ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ <<
" INFO end of outer loop, truncated=" << truncated <<
", count=" << count << ", attempt=" << attempt << dendl;
if (!truncated || count >= (max + 1) / 2) {
// if we finished listing, or if we're returning at least half the
// requested entries, that's enough; S3 and swift protocols allow
// returning fewer than max entries
break;
} else if (attempt > 8 && count >= 1) {
// if we've made at least 8 attempts and we have some, but very
// few, results, return with what we have
break;
}
ldout(cct, 1) << "RGWRados::Bucket::List::" << __func__ <<
" INFO ordered bucket listing requires read #" << (2 + attempt) <<
" INFO ordered bucket listing requires read #" << (1 + attempt) <<
dendl;
} // read attempt loop
@ -8136,12 +8167,13 @@ uint32_t RGWRados::calc_ordered_bucket_list_per_shard(uint32_t num_entries,
int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
int shard_id,
const int shard_id,
const rgw_obj_index_key& start_after,
const string& prefix,
const string& delimiter,
uint32_t num_entries,
bool list_versions,
const uint32_t num_entries,
const bool list_versions,
const uint16_t expansion_factor,
ent_map_t& m,
bool* is_truncated,
bool* cls_filtered,
@ -8149,9 +8181,18 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
optional_yield y,
check_filter_t force_check_filter)
{
ldout(cct, 10) << "cls_bucket_list_ordered " << bucket_info.bucket <<
" start_after " << start_after.name << "[" << start_after.instance <<
"] num_entries " << num_entries << dendl;
/* expansion_factor allows the number of entries to read to grow
* exponentially; this is used when earlier reads are producing too
* few results, perhaps due to filtering or to a series of
* namespaced entries */
ldout(cct, 10) << "RGWRados::" << __func__ << ": " << bucket_info.bucket <<
" start_after=\"" << start_after.name <<
"[" << start_after.instance <<
"]\", prefix=\"" << prefix <<
"\" num_entries=" << num_entries <<
", list_versions=" << list_versions <<
", expansion_factor=" << expansion_factor << dendl;
RGWSI_RADOS::Pool index_pool;
// key - oid (for different shards if there is any)
@ -8165,10 +8206,22 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
}
const uint32_t shard_count = oids.size();
const uint32_t num_entries_per_shard =
calc_ordered_bucket_list_per_shard(num_entries, shard_count);
uint32_t num_entries_per_shard;
if (expansion_factor == 0) {
num_entries_per_shard =
calc_ordered_bucket_list_per_shard(num_entries, shard_count);
} else if (expansion_factor <= 11) {
// we'll max out the exponential multiplication factor at 1024 (2<<10)
num_entries_per_shard =
std::min(num_entries,
(uint32_t(1 << (expansion_factor - 1)) *
calc_ordered_bucket_list_per_shard(num_entries, shard_count)));
} else {
num_entries_per_shard = num_entries;
}
ldout(cct, 10) << __func__ << " request from each of " << shard_count <<
ldout(cct, 10) << "RGWRados::" << __func__ <<
" request from each of " << shard_count <<
" shard(s) for " << num_entries_per_shard << " entries to get " <<
num_entries << " total entries" << dendl;
@ -8226,6 +8279,9 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
const string& name = vcurrents[pos]->first;
struct rgw_bucket_dir_entry& dirent = vcurrents[pos]->second;
ldout(cct, 20) << "RGWRados::" << __func__ << " currently processing " <<
dirent.key << " from shard " << pos << dendl;
bool force_check =
force_check_filter && force_check_filter(dirent.key.name);
@ -8247,11 +8303,15 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
} else {
r = 0;
}
if (r >= 0) {
ldout(cct, 10) << "RGWRados::cls_bucket_list_ordered: got " <<
ldout(cct, 10) << "RGWRados::" << __func__ << ": got " <<
dirent.key.name << "[" << dirent.key.instance << "]" << dendl;
m[name] = std::move(dirent);
++count;
} else {
ldout(cct, 10) << "RGWRados::" << __func__ << ": skipping " <<
dirent.key.name << "[" << dirent.key.instance << "]" << dendl;
}
// refresh the candidates map
@ -8267,7 +8327,7 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
}
} // while we haven't provided requested # of result entries
// suggest updates if there is any
// suggest updates if there are any
for (auto& miter : updates) {
if (miter.second.length()) {
ObjectWriteOperation o;
@ -8289,14 +8349,24 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
}
}
ldout(cct, 20) << "RGWRados::" << __func__ <<
": returning, count=" << count << ", is_truncated=" << *is_truncated <<
dendl;
if (*is_truncated && count < num_entries) {
ldout(cct, 10) << "RGWRados::" << __func__ <<
": INFO requested " << num_entries << " entries but returning " <<
count << ", which is truncated" << dendl;
}
if (pos >= 0)
if (pos >= 0) {
*last_entry = std::move((--vcurrents[pos])->first);
ldout(cct, 20) << "RGWRados::" << __func__ <<
": returning, last_entry=" << *last_entry << dendl;
} else {
ldout(cct, 20) << "RGWRados::" << __func__ <<
": returning, last_entry NOT SET" << dendl;
}
return 0;
}

View File

@ -1378,12 +1378,13 @@ public:
using check_filter_t = bool (*)(const std::string&);
int cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
int shard_id,
const int shard_id,
const rgw_obj_index_key& start_after,
const string& prefix,
const string& delimiter,
uint32_t num_entries,
bool list_versions,
const uint32_t num_entries,
const bool list_versions,
const uint16_t exp_factor, // 0 means ignore
ent_map_t& m,
bool* is_truncated,
bool* cls_filtered,