From 3d3ecb2a1eef8fd900a56034ce90d4b90ba1e633 Mon Sep 17 00:00:00 2001 From: Shilpa Jagannath Date: Tue, 14 Jul 2020 17:56:05 +0530 Subject: [PATCH] rgw: failing to reshard, restore old indexes Signed-off-by: Shilpa Jagannath --- src/rgw/rgw_reshard.cc | 41 ++++++++++++++++++++++++++++------------- src/rgw/rgw_reshard.h | 1 + 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/src/rgw/rgw_reshard.cc b/src/rgw/rgw_reshard.cc index 5c1e28018a0..a809d791b21 100644 --- a/src/rgw/rgw_reshard.cc +++ b/src/rgw/rgw_reshard.cc @@ -659,7 +659,6 @@ int RGWBucketReshard::do_reshard(int num_shards, } //overwrite current_index for the next reshard process - const auto prev_index = bucket_info.layout.current_index; bucket_info.layout.current_index = *bucket_info.layout.target_index; bucket_info.layout.target_index = std::nullopt; // target_layout doesn't need to exist after reshard bucket_info.layout.resharding = rgw::BucketReshardState::None; @@ -674,18 +673,6 @@ int RGWBucketReshard::do_reshard(int num_shards, return ret; } - // resharding successful, so remove old bucket index shards; use - // best effort and don't report out an error; the lock isn't needed - // at this point since all we're using a best effor to to remove old - // shard objects - - ret = store->svc()->bi->clean_index(dpp, bucket_info, prev_index); - if (ret < 0) { - ldpp_dout(dpp, -1) << "Error: " << __func__ << - " failed to clean up old shards; " << - "RGWRados::clean_bucket_index returned " << ret << dendl; - } - return 0; // NB: some error clean-up is done by ~BucketInfoReshardUpdate } // RGWBucketReshard::do_reshard @@ -719,6 +706,9 @@ int RGWBucketReshard::execute(int num_shards, int max_op_entries, } } + // keep a copy of old index layout + prev_index = bucket_info.layout.current_index; + ret = do_reshard(num_shards, max_op_entries, verbose, out, formatter, dpp); @@ -731,6 +721,18 @@ int RGWBucketReshard::execute(int num_shards, int max_op_entries, reshard_lock.unlock(); + // resharding successful, so remove old bucket index shards; use + // best effort and don't report out an error; the lock isn't needed + // at this point since all we're using a best effor to to remove old + // shard objects + + ret = store->svc()->bi->clean_index(dpp, bucket_info, prev_index); + if (ret < 0) { + ldpp_dout(dpp, -1) << __func__ << "Error: " << __func__ << + " failed to clean up old shards; " << + "RGWRados::clean_bucket_index returned " << ret << dendl; + } + ldpp_dout(dpp, 1) << __func__ << " INFO: reshard of bucket \"" << bucket_info.bucket.name << "\" completed successfully" << dendl; @@ -752,6 +754,19 @@ error_out: "RGWRados::clean_bucket_index returned " << ret2 << dendl; } + // restore old index if reshard fails + bucket_info.layout.current_index = prev_index; + ret = store->getRados()->put_bucket_instance_info(bucket_info, false, real_time(), &bucket_attrs, dpp); + if (ret < 0) { + lderr(store->ctx()) << "ERROR: failed writing bucket instance info: " << dendl; + return ret; + } + + ret = store->svc()->bi->init_index(dpp, bucket_info, bucket_info.layout.current_index); + if (ret < 0) { + return ret; + } + return ret; } // execute diff --git a/src/rgw/rgw_reshard.h b/src/rgw/rgw_reshard.h index c75dd169f87..ed1325d5adf 100644 --- a/src/rgw/rgw_reshard.h +++ b/src/rgw/rgw_reshard.h @@ -76,6 +76,7 @@ private: rgw::sal::RadosStore* store; RGWBucketInfo bucket_info; std::map bucket_attrs; + rgw::bucket_index_layout_generation prev_index; RGWBucketReshardLock reshard_lock; RGWBucketReshardLock* outer_reshard_lock;