Merge pull request #26564 from theanalyst/wip-reshard-lc-fixes

rgw: LC: handle resharded buckets

Reviewed-by: Casey Bodley <cbodley@redhat.com>
This commit is contained in:
Casey Bodley 2019-03-18 15:13:06 -04:00 committed by GitHub
commit cab4169677
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 328 additions and 39 deletions

View File

@ -95,3 +95,45 @@ Manual bucket resharding
::
# radosgw-admin bucket reshard --bucket <bucket_name> --num-shards <new number of shards>
Troubleshooting
===============
Clusters prior to Luminous 12.2.11 and Mimic 13.2.5 left behind stale bucket
instance entries that weren't automatically cleaned up. The issue also affected
LifeCycle policies which weren't applied to resharded buckets anymore. Both of
these issues can be worked around using a couple of radosgw-admin commands.
Stale Instance Management
-------------------------
::
# radosgw-admin reshard stale-instances list
This lists the stale instances in a cluster that are ready to be cleaned up.
Please note that the cleanup of these instances should be done only on a single
site cluster. The cleanup can be done by the following command:
::
# radosgw-admin reshard stale-instances rm
Lifecycle fixes
---------------
For clusters which had resharded instances, it is highly likely that the old
lifecycle processes would've flagged and deleted lifecycle processing as the
bucket instance changed during a reshard. While this is fixed for newer clusters
(from 13.2.6 and 12.2.12), older buckets which had lifecycle policies and
would've undergone reshard will have to be manually fixed by issuing the following command
::
# radosgw-admin lc reshard fix --bucket {bucketname}
As a convenience wrapper, if the ``--bucket`` argument is dropped then this
command will try and fix LC policies for all the buckets in the cluster.

View File

@ -756,7 +756,8 @@ static void log_entry(const char *func, const char *str, rgw_bucket_olh_entry *e
}
template <class T>
static int read_index_entry(cls_method_context_t hctx, string& name, T *entry)
static int read_omap_entry(cls_method_context_t hctx, const std::string& name,
T* entry)
{
bufferlist current_entry;
int rc = cls_cxx_map_get_val(hctx, name, &current_entry);
@ -768,9 +769,19 @@ static int read_index_entry(cls_method_context_t hctx, string& name, T *entry)
try {
decode(*entry, cur_iter);
} catch (buffer::error& err) {
CLS_LOG(1, "ERROR: read_index_entry(): failed to decode entry\n");
CLS_LOG(1, "ERROR: %s(): failed to decode entry\n", __func__);
return -EIO;
}
return 0;
}
template <class T>
static int read_index_entry(cls_method_context_t hctx, string& name, T* entry)
{
int ret = read_omap_entry(hctx, name, entry);
if (ret < 0) {
return ret;
}
log_entry(__func__, "existing entry", entry);
return 0;
@ -3168,18 +3179,10 @@ static int gc_omap_get(cls_method_context_t hctx, int type, const string& key, c
string index;
prepend_index_prefix(key, type, &index);
bufferlist bl;
int ret = cls_cxx_map_get_val(hctx, index, &bl);
int ret = read_omap_entry(hctx, index, info);
if (ret < 0)
return ret;
try {
auto iter = bl.cbegin();
decode(*info, iter);
} catch (buffer::error& err) {
CLS_LOG(0, "ERROR: rgw_cls_gc_omap_get(): failed to decode index=%s\n", index.c_str());
}
return 0;
}
@ -3494,6 +3497,29 @@ static int rgw_cls_gc_remove(cls_method_context_t hctx, bufferlist *in, bufferli
return gc_remove(hctx, op.tags);
}
static int rgw_cls_lc_get_entry(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
{
auto in_iter = in->cbegin();
cls_rgw_lc_get_entry_op op;
try {
decode(op, in_iter);
} catch (buffer::error& err) {
CLS_LOG(1, "ERROR: rgw_cls_lc_set_entry(): failed to decode entry\n");
return -EINVAL;
}
rgw_lc_entry_t lc_entry;
int ret = read_omap_entry(hctx, op.marker, &lc_entry);
if (ret < 0)
return ret;
cls_rgw_lc_get_entry_ret op_ret(std::move(lc_entry));
encode(op_ret, *out);
return 0;
}
static int rgw_cls_lc_set_entry(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
{
auto in_iter = in->cbegin();
@ -3705,22 +3731,6 @@ static int rgw_reshard_list(cls_method_context_t hctx, bufferlist *in, bufferlis
return 0;
}
static int get_reshard_entry(cls_method_context_t hctx, const string& key, cls_rgw_reshard_entry *entry)
{
bufferlist bl;
int ret = cls_cxx_map_get_val(hctx, key, &bl);
if (ret < 0)
return ret;
auto iter = bl.cbegin();
try {
decode(*entry, iter);
} catch (buffer::error& err) {
CLS_LOG(0, "ERROR: %s : failed to decode entry %s\n", __func__, err.what());
return -EIO;
}
return 0;
}
static int rgw_reshard_get(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
{
auto in_iter = in->cbegin();
@ -3736,7 +3746,7 @@ static int rgw_reshard_get(cls_method_context_t hctx, bufferlist *in, bufferlis
string key;
cls_rgw_reshard_entry entry;
op.entry.get_key(&key);
int ret = get_reshard_entry(hctx, key, &entry);
int ret = read_omap_entry(hctx, key, &entry);
if (ret < 0) {
return ret;
}
@ -3762,7 +3772,7 @@ static int rgw_reshard_remove(cls_method_context_t hctx, bufferlist *in, bufferl
string key;
cls_rgw_reshard_entry entry;
cls_rgw_reshard_entry::generate_key(op.tenant, op.bucket_name, &key);
int ret = get_reshard_entry(hctx, key, &entry);
int ret = read_omap_entry(hctx, key, &entry);
if (ret < 0) {
return ret;
}
@ -3917,6 +3927,7 @@ CLS_INIT(rgw)
cls_method_handle_t h_rgw_gc_set_entry;
cls_method_handle_t h_rgw_gc_list;
cls_method_handle_t h_rgw_gc_remove;
cls_method_handle_t h_rgw_lc_get_entry;
cls_method_handle_t h_rgw_lc_set_entry;
cls_method_handle_t h_rgw_lc_rm_entry;
cls_method_handle_t h_rgw_lc_get_next_entry;
@ -3979,6 +3990,7 @@ CLS_INIT(rgw)
cls_register_cxx_method(h_class, RGW_GC_REMOVE, CLS_METHOD_RD | CLS_METHOD_WR, rgw_cls_gc_remove, &h_rgw_gc_remove);
/* lifecycle bucket list */
cls_register_cxx_method(h_class, RGW_LC_GET_ENTRY, CLS_METHOD_RD, rgw_cls_lc_get_entry, &h_rgw_lc_get_entry);
cls_register_cxx_method(h_class, RGW_LC_SET_ENTRY, CLS_METHOD_RD | CLS_METHOD_WR, rgw_cls_lc_set_entry, &h_rgw_lc_set_entry);
cls_register_cxx_method(h_class, RGW_LC_RM_ENTRY, CLS_METHOD_RD | CLS_METHOD_WR, rgw_cls_lc_rm_entry, &h_rgw_lc_rm_entry);
cls_register_cxx_method(h_class, RGW_LC_GET_NEXT_ENTRY, CLS_METHOD_RD, rgw_cls_lc_get_next_entry, &h_rgw_lc_get_next_entry);

View File

@ -816,6 +816,29 @@ int cls_rgw_lc_set_entry(IoCtx& io_ctx, const string& oid, const pair<string, in
return r;
}
int cls_rgw_lc_get_entry(IoCtx& io_ctx, const string& oid, const std::string& marker, rgw_lc_entry_t& entry)
{
bufferlist in, out;
cls_rgw_lc_get_entry_op call{marker};;
encode(call, in);
int r = io_ctx.exec(oid, RGW_CLASS, RGW_LC_GET_ENTRY, in, out);
if (r < 0) {
return r;
}
cls_rgw_lc_get_entry_ret ret;
try {
auto iter = out.cbegin();
decode(ret, iter);
} catch (buffer::error& err) {
return -EIO;
}
entry = std::move(ret.entry);
return r;
}
int cls_rgw_lc_list(IoCtx& io_ctx, const string& oid,
const string& marker,
uint32_t max_entries,

View File

@ -553,6 +553,7 @@ int cls_rgw_lc_put_head(librados::IoCtx& io_ctx, const string& oid, cls_rgw_lc_o
int cls_rgw_lc_get_next_entry(librados::IoCtx& io_ctx, const string& oid, string& marker, pair<string, int>& entry);
int cls_rgw_lc_rm_entry(librados::IoCtx& io_ctx, const string& oid, const pair<string, int>& entry);
int cls_rgw_lc_set_entry(librados::IoCtx& io_ctx, const string& oid, const pair<string, int>& entry);
int cls_rgw_lc_get_entry(librados::IoCtx& io_ctx, const string& oid, const std::string& marker, rgw_lc_entry_t& entry);
int cls_rgw_lc_list(librados::IoCtx& io_ctx, const string& oid,
const string& marker,
uint32_t max_entries,

View File

@ -52,6 +52,7 @@
#define RGW_GC_REMOVE "gc_remove"
/* lifecycle bucket list */
#define RGW_LC_GET_ENTRY "lc_get_entry"
#define RGW_LC_SET_ENTRY "lc_set_entry"
#define RGW_LC_RM_ENTRY "lc_rm_entry"
#define RGW_LC_GET_NEXT_ENTRY "lc_get_next_entry"

View File

@ -1010,9 +1010,10 @@ struct cls_rgw_lc_get_next_entry_op {
};
WRITE_CLASS_ENCODER(cls_rgw_lc_get_next_entry_op)
struct cls_rgw_lc_get_next_entry_ret {
pair<string, int> entry;
using rgw_lc_entry_t = std::pair<std::string, int>;
struct cls_rgw_lc_get_next_entry_ret {
rgw_lc_entry_t entry;
cls_rgw_lc_get_next_entry_ret() {}
void encode(bufferlist& bl) const {
@ -1030,8 +1031,48 @@ struct cls_rgw_lc_get_next_entry_ret {
};
WRITE_CLASS_ENCODER(cls_rgw_lc_get_next_entry_ret)
struct cls_rgw_lc_get_entry_op {
string marker;
cls_rgw_lc_get_entry_op() {}
cls_rgw_lc_get_entry_op(const std::string& _marker) : marker(_marker) {}
void encode(bufferlist& bl) const {
ENCODE_START(1, 1, bl);
encode(marker, bl);
ENCODE_FINISH(bl);
}
void decode(bufferlist::const_iterator& bl) {
DECODE_START(1, bl);
decode(marker, bl);
DECODE_FINISH(bl);
}
};
WRITE_CLASS_ENCODER(cls_rgw_lc_get_entry_op)
struct cls_rgw_lc_get_entry_ret {
rgw_lc_entry_t entry;
cls_rgw_lc_get_entry_ret() {}
cls_rgw_lc_get_entry_ret(rgw_lc_entry_t&& _entry) : entry(std::move(_entry)) {}
void encode(bufferlist& bl) const {
ENCODE_START(1, 1, bl);
encode(entry, bl);
ENCODE_FINISH(bl);
}
void decode(bufferlist::const_iterator& bl) {
DECODE_START(1, bl);
decode(entry, bl);
DECODE_FINISH(bl);
}
};
WRITE_CLASS_ENCODER(cls_rgw_lc_get_entry_ret)
struct cls_rgw_lc_rm_entry_op {
pair<string, int> entry;
rgw_lc_entry_t entry;
cls_rgw_lc_rm_entry_op() {}
void encode(bufferlist& bl) const {
@ -1049,7 +1090,7 @@ struct cls_rgw_lc_rm_entry_op {
WRITE_CLASS_ENCODER(cls_rgw_lc_rm_entry_op)
struct cls_rgw_lc_set_entry_op {
pair<string, int> entry;
rgw_lc_entry_t entry;
cls_rgw_lc_set_entry_op() {}
void encode(bufferlist& bl) const {

View File

@ -193,6 +193,7 @@ void usage()
cout << " lc list list all bucket lifecycle progress\n";
cout << " lc get get a lifecycle bucket configuration\n";
cout << " lc process manually process lifecycle\n";
cout << " lc reshard fix fix LC for a resharded bucket\n";
cout << " metadata get get metadata info\n";
cout << " metadata put put metadata info\n";
cout << " metadata rm remove metadata info\n";
@ -435,6 +436,7 @@ enum {
OPT_LC_LIST,
OPT_LC_GET,
OPT_LC_PROCESS,
OPT_LC_RESHARD_FIX,
OPT_ORPHANS_FIND,
OPT_ORPHANS_FINISH,
OPT_ORPHANS_LIST_JOBS,
@ -888,6 +890,10 @@ static int get_cmd(const char *cmd, const char *prev_cmd, const char *prev_prev_
return OPT_LC_GET;
if (strcmp(cmd, "process") == 0)
return OPT_LC_PROCESS;
} else if ((prev_prev_cmd && strcmp(prev_prev_cmd, "lc") == 0) &&
strcmp(prev_cmd, "reshard") == 0) {
if (strcmp(cmd, "fix") == 0)
return OPT_LC_RESHARD_FIX;
} else if (strcmp(prev_cmd, "orphans") == 0) {
if (strcmp(cmd, "find") == 0)
return OPT_ORPHANS_FIND;
@ -6578,6 +6584,15 @@ next:
}
}
if (opt_cmd == OPT_LC_RESHARD_FIX) {
ret = RGWBucketAdminOp::fix_lc_shards(store, bucket_op,f);
if (ret < 0) {
cerr << "ERROR: listing stale instances" << cpp_strerror(-ret) << std::endl;
}
}
if (opt_cmd == OPT_ORPHANS_FIND) {
if (!yes_i_really_mean_it) {
cerr << "accidental removal of active objects can not be reversed; "

View File

@ -31,6 +31,7 @@
// until everything is moved from rgw_common
#include "rgw_common.h"
#include "rgw_reshard.h"
#include "rgw_lc.h"
#include "cls/user/cls_user_types.h"
#define dout_context g_ceph_context
@ -1877,6 +1878,94 @@ int RGWBucketAdminOp::clear_stale_instances(RGWRados *store,
return process_stale_instances(store, op_state, flusher, process_f);
}
static int fix_single_bucket_lc(RGWRados *store,
const std::string& tenant_name,
const std::string& bucket_name)
{
auto obj_ctx = store->svc.sysobj->init_obj_ctx();
RGWBucketInfo bucket_info;
map <std::string, bufferlist> bucket_attrs;
int ret = store->get_bucket_info(obj_ctx, tenant_name, bucket_name,
bucket_info, nullptr, &bucket_attrs);
if (ret < 0) {
// TODO: Should we handle the case where the bucket could've been removed between
// listing and fetching?
return ret;
}
return rgw::lc::fix_lc_shard_entry(store, bucket_info, bucket_attrs);
}
static void format_lc_status(Formatter* formatter,
const std::string& tenant_name,
const std::string& bucket_name,
int status)
{
formatter->open_object_section("bucket_entry");
std::string entry = tenant_name.empty() ? bucket_name : tenant_name + "/" + bucket_name;
formatter->dump_string("bucket", entry);
formatter->dump_int("status", status);
formatter->close_section(); // bucket_entry
}
static void process_single_lc_entry(RGWRados *store, Formatter *formatter,
const std::string& tenant_name,
const std::string& bucket_name)
{
int ret = fix_single_bucket_lc(store, tenant_name, bucket_name);
format_lc_status(formatter, tenant_name, bucket_name, -ret);
}
int RGWBucketAdminOp::fix_lc_shards(RGWRados *store,
RGWBucketAdminOpState& op_state,
RGWFormatterFlusher& flusher)
{
std::string marker;
void *handle;
Formatter *formatter = flusher.get_formatter();
static constexpr auto default_max_keys = 1000;
bool truncated;
if (const std::string& bucket_name = op_state.get_bucket_name();
! bucket_name.empty()) {
const rgw_user user_id = op_state.get_user_id();
process_single_lc_entry(store, formatter, user_id.tenant, bucket_name);
formatter->flush(cout);
} else {
int ret = store->meta_mgr->list_keys_init("bucket", marker, &handle);
if (ret < 0) {
std::cerr << "ERROR: can't get key: " << cpp_strerror(-ret) << std::endl;
return ret;
}
{
formatter->open_array_section("lc_fix_status");
auto sg = make_scope_guard([&store, &handle, &formatter](){
store->meta_mgr->list_keys_complete(handle);
formatter->close_section(); // lc_fix_status
formatter->flush(cout);
});
do {
list<std::string> keys;
ret = store->meta_mgr->list_keys_next(handle, default_max_keys, keys, &truncated);
if (ret < 0 && ret != -ENOENT) {
std::cerr << "ERROR: lists_keys_next(): " << cpp_strerror(-ret) << std::endl;
return ret;
} if (ret != -ENOENT) {
for (const auto &key:keys) {
auto [tenant_name, bucket_name] = split_tenant(key);
process_single_lc_entry(store, formatter, tenant_name, bucket_name);
}
}
formatter->flush(cout); // regularly flush every 1k entries
} while (truncated);
}
}
return 0;
}
void rgw_data_change::dump(Formatter *f) const
{
string type;

View File

@ -365,6 +365,8 @@ public:
static int clear_stale_instances(RGWRados *store, RGWBucketAdminOpState& op_state,
RGWFormatterFlusher& flusher);
static int fix_lc_shards(RGWRados *store, RGWBucketAdminOpState& op_state,
RGWFormatterFlusher& flusher);
};

View File

@ -16,6 +16,7 @@
#include "rgw_common.h"
#include "rgw_bucket.h"
#include "rgw_lc.h"
#include "rgw_string.h"
#include "services/svc_sys_obj.h"
@ -952,16 +953,17 @@ int RGWLC::bucket_lc_process(string& shard_id)
boost::split(result, shard_id, boost::is_any_of(":"));
string bucket_tenant = result[0];
string bucket_name = result[1];
string bucket_id = result[2];
string bucket_marker = result[2];
int ret = store->get_bucket_info(obj_ctx, bucket_tenant, bucket_name, bucket_info, NULL, &bucket_attrs);
if (ret < 0) {
ldpp_dout(this, 0) << "LC:get_bucket_info for " << bucket_name << " failed" << dendl;
return ret;
}
ret = bucket_info.bucket.bucket_id.compare(bucket_id) ;
if (ret != 0) {
ldpp_dout(this, 0) << "LC:old bucket id found. " << bucket_name << " should be deleted" << dendl;
if (bucket_info.bucket.marker != bucket_marker) {
ldpp_dout(this, 1) << "LC: deleting stale entry found for bucket=" << bucket_tenant
<< ":" << bucket_name << " cur_marker=" << bucket_info.bucket.marker
<< " orig_marker=" << bucket_marker << dendl;
return -ENOENT;
}
@ -1289,7 +1291,7 @@ void RGWLifecycleConfiguration::generate_test_instances(list<RGWLifecycleConfigu
o.push_back(new RGWLifecycleConfiguration);
}
static void get_lc_oid(CephContext *cct, const string& shard_id, string *oid)
void get_lc_oid(CephContext *cct, const string& shard_id, string *oid)
{
int max_objs = (cct->_conf->rgw_lc_max_objs > HASH_PRIME ? HASH_PRIME : cct->_conf->rgw_lc_max_objs);
int index = ceph_str_hash_linux(shard_id.c_str(), shard_id.size()) % HASH_PRIME % max_objs;
@ -1300,11 +1302,17 @@ static void get_lc_oid(CephContext *cct, const string& shard_id, string *oid)
return;
}
static std::string get_lc_shard_name(const rgw_bucket& bucket){
return string_join_reserve(':', bucket.tenant, bucket.name, bucket.marker);
}
template<typename F>
static int guard_lc_modify(RGWRados* store, const rgw_bucket& bucket, const string& cookie, const F& f) {
CephContext *cct = store->ctx();
string shard_id = bucket.tenant + ':' + bucket.name + ':' + bucket.bucket_id;
string shard_id = get_lc_shard_name(bucket);
string oid;
get_lc_oid(cct, shard_id, &oid);
@ -1390,3 +1398,52 @@ int RGWLC::remove_bucket_config(RGWBucketInfo& bucket_info,
return ret;
}
namespace rgw::lc {
int fix_lc_shard_entry(RGWRados* store, const RGWBucketInfo& bucket_info,
const map<std::string,bufferlist>& battrs)
{
if (auto aiter = battrs.find(RGW_ATTR_LC);
aiter == battrs.end()) {
return 0; // No entry, nothing to fix
}
auto shard_name = get_lc_shard_name(bucket_info.bucket);
std::string lc_oid;
get_lc_oid(store->ctx(), shard_name, &lc_oid);
rgw_lc_entry_t entry;
// There are multiple cases we need to encounter here
// 1. entry exists and is already set to marker, happens in plain buckets & newly resharded buckets
// 2. entry doesn't exist, which usually happens when reshard has happened prior to update and next LC process has already dropped the update
// 3. entry exists matching the current bucket id which was after a reshard (needs to be updated to the marker)
// We are not dropping the old marker here as that would be caught by the next LC process update
auto lc_pool_ctx = store->get_lc_pool_ctx();
int ret = cls_rgw_lc_get_entry(*lc_pool_ctx,
lc_oid, shard_name, entry);
if (ret == 0) {
ldout(store->ctx(), 5) << "Entry already exists, nothing to do" << dendl;
return ret; // entry is already existing correctly set to marker
}
ldout(store->ctx(), 5) << "cls_rgw_lc_get_entry errored ret code=" << ret << dendl;
if (ret == -ENOENT) {
ldout(store->ctx(), 1) << "No entry for bucket=" << bucket_info.bucket.name
<< " creating " << dendl;
// TODO: we have too many ppl making cookies like this!
char cookie_buf[COOKIE_LEN + 1];
gen_rand_alphanumeric(store->ctx(), cookie_buf, sizeof(cookie_buf) - 1);
std::string cookie = cookie_buf;
ret = guard_lc_modify(store, bucket_info.bucket, cookie,
[&lc_pool_ctx, &lc_oid](librados::IoCtx *ctx, const string& oid,
const pair<string, int>& entry) {
return cls_rgw_lc_set_entry(*lc_pool_ctx,
lc_oid, entry);
});
}
return ret;
}
}

View File

@ -502,6 +502,11 @@ class RGWLC : public DoutPrefixProvider {
int handle_multipart_expiration(RGWRados::Bucket *target, const map<string, lc_op>& prefix_map);
};
namespace rgw::lc {
int fix_lc_shard_entry(RGWRados *store, const RGWBucketInfo& bucket_info,
const map<std::string,bufferlist>& battrs);
} // namespace rgw::lc
#endif

View File

@ -115,6 +115,7 @@
lc list list all bucket lifecycle progress
lc get get a lifecycle bucket configuration
lc process manually process lifecycle
lc reshard fix fix LC for a resharded bucket
metadata get get metadata info
metadata put put metadata info
metadata rm remove metadata info