mirror of
https://github.com/ceph/ceph
synced 2024-12-27 14:03:25 +00:00
Merge pull request #32636 from ivancich/wip-avoid-0-length-ordered-list-result
rgw: address 0-length listing results when non-vis entries dominate Reviewed-by: Matt Benjamin <mbenjamin@redhat.com>
This commit is contained in:
commit
d5788d66a8
@ -6749,21 +6749,30 @@ next:
|
||||
formatter->open_object_section("result");
|
||||
formatter->dump_string("bucket", bucket_name);
|
||||
formatter->open_array_section("objects");
|
||||
|
||||
constexpr uint32_t NUM_ENTRIES = 1000;
|
||||
uint16_t expansion_factor = 1;
|
||||
while (is_truncated) {
|
||||
RGWRados::ent_map_t result;
|
||||
int r = store->getRados()->cls_bucket_list_ordered(
|
||||
bucket_info, RGW_NO_SHARD,
|
||||
marker, empty_prefix, empty_delimiter,
|
||||
1000, true,
|
||||
NUM_ENTRIES, true, expansion_factor,
|
||||
result, &is_truncated, &cls_filtered, &marker,
|
||||
null_yield,
|
||||
rgw_bucket_object_check_filter);
|
||||
if (r < 0 && r != -ENOENT) {
|
||||
cerr << "ERROR: failed operation r=" << r << std::endl;
|
||||
} else if (r == -ENOENT) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (r == -ENOENT)
|
||||
break;
|
||||
if (result.size() < NUM_ENTRIES / 8) {
|
||||
++expansion_factor;
|
||||
} else if (result.size() > NUM_ENTRIES * 7 / 8 &&
|
||||
expansion_factor > 1) {
|
||||
--expansion_factor;
|
||||
}
|
||||
|
||||
for (auto iter = result.begin(); iter != result.end(); ++iter) {
|
||||
rgw_obj_key key = iter->second.key;
|
||||
|
@ -57,6 +57,10 @@
|
||||
|
||||
#define BUCKET_TAG_TIMEOUT 30
|
||||
|
||||
// default number of entries to list with each bucket listing call
|
||||
// (use marker to bridge between calls)
|
||||
static constexpr size_t listing_max_entries = 1000;
|
||||
|
||||
|
||||
/*
|
||||
* The tenant_name is always returned on purpose. May be empty, of course.
|
||||
@ -1015,20 +1019,29 @@ int RGWBucket::check_object_index(RGWBucketAdminOpState& op_state,
|
||||
|
||||
Formatter *formatter = flusher.get_formatter();
|
||||
formatter->open_object_section("objects");
|
||||
uint16_t expansion_factor = 1;
|
||||
while (is_truncated) {
|
||||
RGWRados::ent_map_t result;
|
||||
result.reserve(listing_max_entries);
|
||||
|
||||
int r = store->getRados()->cls_bucket_list_ordered(
|
||||
bucket_info, RGW_NO_SHARD, marker, prefix, empty_delimiter,
|
||||
listing_max_entries, true, result, &is_truncated, &cls_filtered,
|
||||
&marker, y, rgw_bucket_object_check_filter);
|
||||
listing_max_entries, true, expansion_factor,
|
||||
result, &is_truncated, &cls_filtered, &marker,
|
||||
y, rgw_bucket_object_check_filter);
|
||||
if (r == -ENOENT) {
|
||||
break;
|
||||
} else if (r < 0 && r != -ENOENT) {
|
||||
set_err_msg(err_msg, "ERROR: failed operation r=" + cpp_strerror(-r));
|
||||
}
|
||||
|
||||
if (result.size() < listing_max_entries / 8) {
|
||||
++expansion_factor;
|
||||
} else if (result.size() > listing_max_entries * 7 / 8 &&
|
||||
expansion_factor > 1) {
|
||||
--expansion_factor;
|
||||
}
|
||||
|
||||
dump_bucket_index(result, formatter);
|
||||
flusher.flush();
|
||||
}
|
||||
|
@ -24,9 +24,6 @@
|
||||
#include "services/svc_bucket_sync.h"
|
||||
|
||||
|
||||
static constexpr size_t listing_max_entries = 1000;
|
||||
|
||||
|
||||
// define as static when RGWBucket implementation completes
|
||||
extern void rgw_get_buckets_obj(const rgw_user& user_id, string& buckets_obj_id);
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <sstream>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <string_view>
|
||||
|
||||
@ -1774,10 +1776,25 @@ int RGWRados::Bucket::List::list_objects_ordered(
|
||||
}
|
||||
}
|
||||
|
||||
constexpr int allowed_read_attempts = 2;
|
||||
for (int attempt = 0; attempt < allowed_read_attempts; ++attempt) {
|
||||
rgw_obj_index_key prev_marker;
|
||||
uint16_t attempt = 0;
|
||||
while (true) {
|
||||
ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ <<
|
||||
" beginning attempt=" << ++attempt << dendl;
|
||||
|
||||
// this loop is generally expected only to have a single
|
||||
// iteration; see bottom of loop for early exit
|
||||
// iteration; the standard exit is at the bottom of the loop, but
|
||||
// there's an error condition emergency exit as well
|
||||
|
||||
if (attempt > 1 && !(prev_marker < cur_marker)) {
|
||||
// we've failed to make forward progress
|
||||
ldout(cct, 0) << "RGWRados::Bucket::List::" << __func__ <<
|
||||
": ERROR marker failed to make forward progress; attempt=" << attempt <<
|
||||
", prev_marker=" << prev_marker <<
|
||||
", cur_marker=" << cur_marker << dendl;
|
||||
break;
|
||||
}
|
||||
prev_marker = cur_marker;
|
||||
|
||||
ent_map_t ent_map;
|
||||
ent_map.reserve(read_ahead);
|
||||
@ -1788,6 +1805,7 @@ int RGWRados::Bucket::List::list_objects_ordered(
|
||||
params.delim,
|
||||
read_ahead + 1 - count,
|
||||
params.list_versions,
|
||||
attempt,
|
||||
ent_map,
|
||||
&truncated,
|
||||
&cls_filtered,
|
||||
@ -1800,9 +1818,11 @@ int RGWRados::Bucket::List::list_objects_ordered(
|
||||
for (auto eiter = ent_map.begin(); eiter != ent_map.end(); ++eiter) {
|
||||
rgw_bucket_dir_entry& entry = eiter->second;
|
||||
rgw_obj_index_key index_key = entry.key;
|
||||
|
||||
rgw_obj_key obj(index_key);
|
||||
|
||||
ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ <<
|
||||
" considering entry " << entry.key << dendl;
|
||||
|
||||
/* note that parse_raw_oid() here will not set the correct
|
||||
* object's instance, as rgw_obj_index_key encodes that
|
||||
* separately. We don't need to set the instance because it's
|
||||
@ -1816,12 +1836,12 @@ int RGWRados::Bucket::List::list_objects_ordered(
|
||||
continue;
|
||||
}
|
||||
|
||||
bool check_ns = (obj.ns == params.ns);
|
||||
bool matched_ns = (obj.ns == params.ns);
|
||||
if (!params.list_versions && !entry.is_visible()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (params.enforce_ns && !check_ns) {
|
||||
if (params.enforce_ns && !matched_ns) {
|
||||
if (!params.ns.empty()) {
|
||||
/* we've iterated past the namespace we're searching -- done now */
|
||||
truncated = false;
|
||||
@ -1914,6 +1934,9 @@ int RGWRados::Bucket::List::list_objects_ordered(
|
||||
goto done;
|
||||
}
|
||||
|
||||
ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ <<
|
||||
" adding entry " << entry.key << " to result" << dendl;
|
||||
|
||||
result->emplace_back(std::move(entry));
|
||||
count++;
|
||||
} // eiter for loop
|
||||
@ -1942,15 +1965,23 @@ int RGWRados::Bucket::List::list_objects_ordered(
|
||||
}
|
||||
} // if older osd didn't do delimiter filtering
|
||||
|
||||
// if we finished listing, or if we're returning at least half the
|
||||
// requested entries, that's enough; S3 and swift protocols allow
|
||||
// returning fewer than max entries
|
||||
if (!truncated || count >= max / 2) {
|
||||
ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ <<
|
||||
" INFO end of outer loop, truncated=" << truncated <<
|
||||
", count=" << count << ", attempt=" << attempt << dendl;
|
||||
|
||||
if (!truncated || count >= (max + 1) / 2) {
|
||||
// if we finished listing, or if we're returning at least half the
|
||||
// requested entries, that's enough; S3 and swift protocols allow
|
||||
// returning fewer than max entries
|
||||
break;
|
||||
} else if (attempt > 8 && count >= 1) {
|
||||
// if we've made at least 8 attempts and we have some, but very
|
||||
// few, results, return with what we have
|
||||
break;
|
||||
}
|
||||
|
||||
ldout(cct, 1) << "RGWRados::Bucket::List::" << __func__ <<
|
||||
" INFO ordered bucket listing requires read #" << (2 + attempt) <<
|
||||
" INFO ordered bucket listing requires read #" << (1 + attempt) <<
|
||||
dendl;
|
||||
} // read attempt loop
|
||||
|
||||
@ -8136,12 +8167,13 @@ uint32_t RGWRados::calc_ordered_bucket_list_per_shard(uint32_t num_entries,
|
||||
|
||||
|
||||
int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
|
||||
int shard_id,
|
||||
const int shard_id,
|
||||
const rgw_obj_index_key& start_after,
|
||||
const string& prefix,
|
||||
const string& delimiter,
|
||||
uint32_t num_entries,
|
||||
bool list_versions,
|
||||
const uint32_t num_entries,
|
||||
const bool list_versions,
|
||||
const uint16_t expansion_factor,
|
||||
ent_map_t& m,
|
||||
bool* is_truncated,
|
||||
bool* cls_filtered,
|
||||
@ -8149,9 +8181,18 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
|
||||
optional_yield y,
|
||||
check_filter_t force_check_filter)
|
||||
{
|
||||
ldout(cct, 10) << "cls_bucket_list_ordered " << bucket_info.bucket <<
|
||||
" start_after " << start_after.name << "[" << start_after.instance <<
|
||||
"] num_entries " << num_entries << dendl;
|
||||
/* expansion_factor allows the number of entries to read to grow
|
||||
* exponentially; this is used when earlier reads are producing too
|
||||
* few results, perhaps due to filtering or to a series of
|
||||
* namespaced entries */
|
||||
|
||||
ldout(cct, 10) << "RGWRados::" << __func__ << ": " << bucket_info.bucket <<
|
||||
" start_after=\"" << start_after.name <<
|
||||
"[" << start_after.instance <<
|
||||
"]\", prefix=\"" << prefix <<
|
||||
"\" num_entries=" << num_entries <<
|
||||
", list_versions=" << list_versions <<
|
||||
", expansion_factor=" << expansion_factor << dendl;
|
||||
|
||||
RGWSI_RADOS::Pool index_pool;
|
||||
// key - oid (for different shards if there is any)
|
||||
@ -8165,10 +8206,22 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
|
||||
}
|
||||
|
||||
const uint32_t shard_count = oids.size();
|
||||
const uint32_t num_entries_per_shard =
|
||||
calc_ordered_bucket_list_per_shard(num_entries, shard_count);
|
||||
uint32_t num_entries_per_shard;
|
||||
if (expansion_factor == 0) {
|
||||
num_entries_per_shard =
|
||||
calc_ordered_bucket_list_per_shard(num_entries, shard_count);
|
||||
} else if (expansion_factor <= 11) {
|
||||
// we'll max out the exponential multiplication factor at 1024 (2<<10)
|
||||
num_entries_per_shard =
|
||||
std::min(num_entries,
|
||||
(uint32_t(1 << (expansion_factor - 1)) *
|
||||
calc_ordered_bucket_list_per_shard(num_entries, shard_count)));
|
||||
} else {
|
||||
num_entries_per_shard = num_entries;
|
||||
}
|
||||
|
||||
ldout(cct, 10) << __func__ << " request from each of " << shard_count <<
|
||||
ldout(cct, 10) << "RGWRados::" << __func__ <<
|
||||
" request from each of " << shard_count <<
|
||||
" shard(s) for " << num_entries_per_shard << " entries to get " <<
|
||||
num_entries << " total entries" << dendl;
|
||||
|
||||
@ -8226,6 +8279,9 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
|
||||
const string& name = vcurrents[pos]->first;
|
||||
struct rgw_bucket_dir_entry& dirent = vcurrents[pos]->second;
|
||||
|
||||
ldout(cct, 20) << "RGWRados::" << __func__ << " currently processing " <<
|
||||
dirent.key << " from shard " << pos << dendl;
|
||||
|
||||
bool force_check =
|
||||
force_check_filter && force_check_filter(dirent.key.name);
|
||||
|
||||
@ -8247,11 +8303,15 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
|
||||
} else {
|
||||
r = 0;
|
||||
}
|
||||
|
||||
if (r >= 0) {
|
||||
ldout(cct, 10) << "RGWRados::cls_bucket_list_ordered: got " <<
|
||||
ldout(cct, 10) << "RGWRados::" << __func__ << ": got " <<
|
||||
dirent.key.name << "[" << dirent.key.instance << "]" << dendl;
|
||||
m[name] = std::move(dirent);
|
||||
++count;
|
||||
} else {
|
||||
ldout(cct, 10) << "RGWRados::" << __func__ << ": skipping " <<
|
||||
dirent.key.name << "[" << dirent.key.instance << "]" << dendl;
|
||||
}
|
||||
|
||||
// refresh the candidates map
|
||||
@ -8267,7 +8327,7 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
|
||||
}
|
||||
} // while we haven't provided requested # of result entries
|
||||
|
||||
// suggest updates if there is any
|
||||
// suggest updates if there are any
|
||||
for (auto& miter : updates) {
|
||||
if (miter.second.length()) {
|
||||
ObjectWriteOperation o;
|
||||
@ -8289,14 +8349,24 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
|
||||
}
|
||||
}
|
||||
|
||||
ldout(cct, 20) << "RGWRados::" << __func__ <<
|
||||
": returning, count=" << count << ", is_truncated=" << *is_truncated <<
|
||||
dendl;
|
||||
|
||||
if (*is_truncated && count < num_entries) {
|
||||
ldout(cct, 10) << "RGWRados::" << __func__ <<
|
||||
": INFO requested " << num_entries << " entries but returning " <<
|
||||
count << ", which is truncated" << dendl;
|
||||
}
|
||||
|
||||
if (pos >= 0)
|
||||
if (pos >= 0) {
|
||||
*last_entry = std::move((--vcurrents[pos])->first);
|
||||
ldout(cct, 20) << "RGWRados::" << __func__ <<
|
||||
": returning, last_entry=" << *last_entry << dendl;
|
||||
} else {
|
||||
ldout(cct, 20) << "RGWRados::" << __func__ <<
|
||||
": returning, last_entry NOT SET" << dendl;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1378,12 +1378,13 @@ public:
|
||||
using check_filter_t = bool (*)(const std::string&);
|
||||
|
||||
int cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
|
||||
int shard_id,
|
||||
const int shard_id,
|
||||
const rgw_obj_index_key& start_after,
|
||||
const string& prefix,
|
||||
const string& delimiter,
|
||||
uint32_t num_entries,
|
||||
bool list_versions,
|
||||
const uint32_t num_entries,
|
||||
const bool list_versions,
|
||||
const uint16_t exp_factor, // 0 means ignore
|
||||
ent_map_t& m,
|
||||
bool* is_truncated,
|
||||
bool* cls_filtered,
|
||||
|
Loading…
Reference in New Issue
Block a user