rgw: reshard improvements

Improve error log message when an expired reshard lock is renewed. Add two new configurable options to manage resharding: * rgw_reshard_batch_size : number of reshard entries to batch together before sending the operations to the CLS back-end. * rgw_reshard_max_aio : maximum number of outstanding asynchronous i/o operations to allow at a time. Alter rgw_reshard_bucket_lock duration default from 2 minutes to 6 minutes. Add documentation, minimum values, tags, and service to a few rgw reshard configuration options. Change some rgw_reshard_* options from LEVEL_DEV to LEVEL_ADVANCED. Signed-off-by: J. Eric Ivancich <ivancich@redhat.com>
2025-01-01 08:32:24 +00:00 · 2018-11-08 19:40:48 -05:00 · 2018-11-08 19:40:48 -05:00 · b713bb77a1
commit b713bb77a1
parent 5552971a8b
2 changed files with 46 additions and 15 deletions
--- a/src/common/options.cc
+++ b/src/common/options.cc
@ -6261,13 +6261,32 @@ std::vector<Option> get_rgw_options() {
    .set_default(true)
    .set_description("Enable stats on bucket listing in Swift"),

-    Option("rgw_reshard_num_logs", Option::TYPE_INT, Option::LEVEL_DEV)
+    Option("rgw_reshard_num_logs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
    .set_default(16)
-    .set_description(""),
+    .set_min(1)
+    .set_description("")
+    .add_service("rgw"),

-    Option("rgw_reshard_bucket_lock_duration", Option::TYPE_INT, Option::LEVEL_DEV)
-    .set_default(120)
-    .set_description(""),
+    Option("rgw_reshard_bucket_lock_duration", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(360)
+    .set_min(30)
+    .set_description("Number of seconds the timeout on the reshard locks (bucket reshard lock and reshard log lock) are set to. As a reshard proceeds these locks can be renewed/extended. If too short, reshards cannot complete and will fail, causing a future reshard attempt. If too long a hung or crashed reshard attempt will keep the bucket locked for an extended period, not allowing RGW to detect the failed reshard attempt and recover.")
+    .add_tag("performance")
+    .add_service("rgw"),
+    
+    Option("rgw_reshard_batch_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(64)
+    .set_min(8)
+    .set_description("Number of reshard entries to batch together before sending the operations to the CLS back-end")
+    .add_tag("performance")
+    .add_service("rgw"),
+
+    Option("rgw_reshard_max_aio", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(128)
+    .set_min(16)
+    .set_description("Maximum number of outstanding asynchronous I/O operations to allow at a time during resharding")
+    .add_tag("performance")
+    .add_service("rgw"),

    Option("rgw_trust_forwarded_https", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
    .set_default(false)
@ -6357,7 +6376,8 @@ std::vector<Option> get_rgw_options() {

    Option("rgw_reshard_thread_interval", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
    .set_default(10_min)
-    .set_description(""),
+    .set_min(10_min)
+    .set_description("Number of seconds between processing of reshard log entries"),

    Option("rgw_cache_expiry_interval", Option::TYPE_UINT,
 	   Option::LEVEL_ADVANCED)
--- a/src/rgw/rgw_reshard.cc
+++ b/src/rgw/rgw_reshard.cc
@ -2,6 +2,7 @@
 // vim: ts=8 sw=2 smarttab

 #include <limits>
+#include <sstream>

 #include "rgw_rados.h"
 #include "rgw_bucket.h"
@ -21,10 +22,6 @@ const string reshard_lock_name = "reshard_process";
 const string bucket_instance_lock_name = "bucket_instance_lock";


-#define RESHARD_SHARD_WINDOW 64
-#define RESHARD_MAX_AIO 128
-
-
 class BucketReshardShard {
  RGWRados *store;
  const RGWBucketInfo& bucket_info;
@ -33,6 +30,8 @@ class BucketReshardShard {
  vector<rgw_cls_bi_entry> entries;
  map<uint8_t, rgw_bucket_category_stats> stats;
  deque<librados::AioCompletion *>& aio_completions;
+  uint64_t max_aio_completions;
+  uint64_t reshard_shard_batch_size;

  int wait_next_completion() {
    librados::AioCompletion *c = aio_completions.front();
@ -52,7 +51,7 @@ class BucketReshardShard {
  }

  int get_completion(librados::AioCompletion **c) {
-    if (aio_completions.size() >= RESHARD_MAX_AIO) {
+    if (aio_completions.size() >= max_aio_completions) {
      int ret = wait_next_completion();
      if (ret < 0) {
        return ret;
@ -74,6 +73,11 @@ public:
  {
    num_shard = (bucket_info.num_shards > 0 ? _num_shard : -1);
    bs.init(bucket_info.bucket, num_shard, nullptr /* no RGWBucketInfo */);
+
+    max_aio_completions =
+      store->ctx()->_conf.get_val<uint64_t>("rgw_reshard_max_aio");
+    reshard_shard_batch_size =
+      store->ctx()->_conf.get_val<uint64_t>("rgw_reshard_batch_size");
  }

  int get_num_shard() {
@ -90,7 +94,7 @@ public:
      target.total_size_rounded += entry_stats.total_size_rounded;
      target.actual_size += entry_stats.actual_size;
    }
-    if (entries.size() >= RESHARD_SHARD_WINDOW) {
+    if (entries.size() >= reshard_shard_batch_size) {
      int ret = flush();
      if (ret < 0) {
        return ret;
@ -401,7 +405,8 @@ RGWBucketReshardLock::RGWBucketReshardLock(RGWRados* _store,
  ephemeral(_ephemeral),
  internal_lock(reshard_lock_name)
 {
-  const int lock_dur_secs = store->ctx()->_conf->rgw_reshard_bucket_lock_duration;
+  const int lock_dur_secs = store->ctx()->_conf.get_val<uint64_t>(
+    "rgw_reshard_bucket_lock_duration");
  duration = std::chrono::seconds(lock_dur_secs);

 #define COOKIE_LEN 16
@ -450,8 +455,14 @@ int RGWBucketReshardLock::renew(const Clock::time_point& now) {
    ret = internal_lock.lock_exclusive(&store->reshard_pool_ctx, lock_oid);
  }
  if (ret < 0) { /* expired or already locked by another processor */
+    std::stringstream error_s;
+    if (-ENOENT == ret) {
+      error_s << "ENOENT (lock expired or never initially locked)";
+    } else {
+      error_s << ret << " (" << cpp_strerror(-ret) << ")";
+    }
    ldout(store->ctx(), 5) << __func__ << "(): failed to renew lock on " <<
-      lock_oid << " with " << cpp_strerror(-ret) << dendl;
+      lock_oid << " with error " << error_s.str() << dendl;
    return ret;
  }
  internal_lock.set_must_renew(false);
@ -1093,7 +1104,7 @@ void *RGWReshard::ReshardWorker::entry() {

    utime_t end = ceph_clock_now();
    end -= start;
-    int secs = cct->_conf->rgw_reshard_thread_interval;
+    int secs = cct->_conf.get_val<uint64_t>("rgw_reshard_thread_interval");

    if (secs <= end.sec())
      continue; // next round