From e84148a609fe938971cfd0a2c21798706fa5d78a Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Thu, 21 Jul 2016 13:26:09 -0700 Subject: [PATCH] rgw: add configurables for {data,meta} sync error injection Signed-off-by: Yehuda Sadeh --- src/common/config_opts.h | 4 ++++ src/rgw/rgw_data_sync.cc | 12 ++++++++++-- src/rgw/rgw_sync.cc | 6 ++++++ src/rgw/rgw_sync.h | 3 +++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 04ac61832e5..39406bc183b 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -1454,6 +1454,10 @@ OPTION(rgw_md_notify_interval_msec, OPT_INT, 200) // metadata changes notificati OPTION(rgw_run_sync_thread, OPT_BOOL, true) // whether radosgw (not radosgw-admin) spawns the sync thread OPTION(rgw_sync_lease_period, OPT_INT, 120) // time in second for lease that rgw takes on a specific log (or log shard) +OPTION(rgw_sync_data_inject_err_probability, OPT_DOUBLE, 0) // range [0, 1] +OPTION(rgw_sync_meta_inject_err_probability, OPT_DOUBLE, 0) // range [0, 1] + + OPTION(rgw_realm_reconfigure_delay, OPT_DOUBLE, 2) // seconds to wait before reloading realm configuration OPTION(rgw_period_push_interval, OPT_DOUBLE, 2) // seconds to wait before retrying "period push" OPTION(rgw_period_push_interval_max, OPT_DOUBLE, 30) // maximum interval after exponential backoff diff --git a/src/rgw/rgw_data_sync.cc b/src/rgw/rgw_data_sync.cc index f22775b5d62..43c770aec09 100644 --- a/src/rgw/rgw_data_sync.cc +++ b/src/rgw/rgw_data_sync.cc @@ -2068,6 +2068,8 @@ class RGWBucketSyncSingleEntryCR : public RGWCoroutine { RGWDataSyncDebugLogger logger; + bool error_injection; + public: RGWBucketSyncSingleEntryCR(RGWDataSyncEnv *_sync_env, @@ -2094,6 +2096,8 @@ public: set_status("init"); logger.init(sync_env, "Object", ss.str()); + + error_injection = (sync_env->cct->_conf->rgw_sync_data_inject_err_probability > 0); } int operate() { @@ -2111,8 +2115,12 @@ public: ldout(sync_env->cct, 0) << "ERROR: " << __func__ << "(): entry with empty obj name, skipping" << dendl; goto done; } - if (op == CLS_RGW_OP_ADD || - op == CLS_RGW_OP_LINK_OLH) { + if (error_injection && + rand() % 10000 < cct->_conf->rgw_sync_data_inject_err_probability * 10000.0) { + ldout(sync_env->cct, 0) << __func__ << ": injecting data sync error on key=" << key.name << dendl; + retcode = -EIO; + } else if (op == CLS_RGW_OP_ADD || + op == CLS_RGW_OP_LINK_OLH) { if (op == CLS_RGW_OP_ADD && !key.instance.empty() && key.instance != "null") { set_status("skipping entry"); ldout(sync_env->cct, 10) << "bucket skipping sync obj: " << sync_env->source_zone << "/" << bucket_info->bucket << "/" << key << "[" << versioned_epoch << "]: versioned object will be synced on link_olh" << dendl; diff --git a/src/rgw/rgw_sync.cc b/src/rgw/rgw_sync.cc index 474e2c558d8..f7b419be77f 100644 --- a/src/rgw/rgw_sync.cc +++ b/src/rgw/rgw_sync.cc @@ -1108,6 +1108,12 @@ int RGWMetaSyncSingleEntryCR::operate() { reenter(this) { #define NUM_TRANSIENT_ERROR_RETRIES 10 + if (error_injection && + rand() % 10000 < cct->_conf->rgw_sync_meta_inject_err_probability * 10000.0) { + ldout(sync_env->cct, 0) << __FILE__ << ":" << __LINE__ << ": injecting meta sync error on key=" << raw_key << dendl; + return set_cr_error(-EIO); + } + if (op_status != MDLOG_STATUS_COMPLETE) { ldout(sync_env->cct, 20) << "skipping pending operation" << dendl; yield call(marker_tracker->finish(entry_marker)); diff --git a/src/rgw/rgw_sync.h b/src/rgw/rgw_sync.h index 59f6b426d07..9fc9430acf5 100644 --- a/src/rgw/rgw_sync.h +++ b/src/rgw/rgw_sync.h @@ -421,6 +421,8 @@ class RGWMetaSyncSingleEntryCR : public RGWCoroutine { int tries; + bool error_injection; + public: RGWMetaSyncSingleEntryCR(RGWMetaSyncEnv *_sync_env, const string& _raw_key, const string& _entry_marker, @@ -431,6 +433,7 @@ public: op_status(_op_status), pos(0), sync_status(0), marker_tracker(_marker_tracker), tries(0) { + error_injection = (sync_env->cct->_conf->rgw_sync_meta_inject_err_probability > 0); } int operate();