mirror of
https://github.com/ceph/ceph
synced 2025-02-23 11:07:35 +00:00
Merge pull request #52711 from cbodley/wip-62250
rgw: retry metadata cache notifications with INVALIDATE_OBJ Reviewed-by: Adam Emerson <aemerson@redhat.com>
This commit is contained in:
commit
b6383e9e0b
@ -3258,7 +3258,7 @@ options:
|
||||
is very heavily loaded. Beware that increasing this value may cause some operations
|
||||
to take longer in exceptional cases and thus may, rarely, cause clients to time
|
||||
out.
|
||||
default: 3
|
||||
default: 10
|
||||
tags:
|
||||
- error recovery
|
||||
services:
|
||||
|
@ -1351,13 +1351,7 @@ int RGWRados::init_ctl(const DoutPrefixProvider *dpp)
|
||||
*/
|
||||
int RGWRados::init_begin(const DoutPrefixProvider *dpp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
inject_notify_timeout_probability =
|
||||
cct->_conf.get_val<double>("rgw_inject_notify_timeout_probability");
|
||||
max_notify_retries = cct->_conf.get_val<uint64_t>("rgw_max_notify_retries");
|
||||
|
||||
ret = init_svc(false, dpp);
|
||||
int ret = init_svc(false, dpp);
|
||||
if (ret < 0) {
|
||||
ldpp_dout(dpp, 0) << "ERROR: failed to init services (ret=" << cpp_strerror(-ret) << ")" << dendl;
|
||||
return ret;
|
||||
|
@ -390,11 +390,6 @@ class RGWRados
|
||||
|
||||
librados::IoCtx root_pool_ctx; // .rgw
|
||||
|
||||
double inject_notify_timeout_probability{0.0};
|
||||
unsigned max_notify_retries{0};
|
||||
|
||||
friend class RGWWatcher;
|
||||
|
||||
ceph::mutex bucket_id_lock{ceph::make_mutex("rados_bucket_id")};
|
||||
|
||||
// This field represents the number of bucket index object shards
|
||||
|
@ -278,6 +278,10 @@ int RGWSI_Notify::do_start(optional_yield y, const DoutPrefixProvider *dpp)
|
||||
return r;
|
||||
}
|
||||
|
||||
inject_notify_timeout_probability =
|
||||
cct->_conf.get_val<double>("rgw_inject_notify_timeout_probability");
|
||||
max_notify_retries = cct->_conf.get_val<uint64_t>("rgw_max_notify_retries");
|
||||
|
||||
control_pool = zone_svc->get_zone_params().control_pool;
|
||||
|
||||
int ret = init_watch(dpp, y);
|
||||
@ -396,19 +400,69 @@ int RGWSI_Notify::distribute(const DoutPrefixProvider *dpp, const string& key,
|
||||
return 0;
|
||||
}
|
||||
|
||||
namespace librados {
|
||||
|
||||
static std::ostream& operator<<(std::ostream& out, const notify_timeout_t& t)
|
||||
{
|
||||
return out << t.notifier_id << ':' << t.cookie;
|
||||
}
|
||||
|
||||
} // namespace librados
|
||||
|
||||
using timeout_vector = std::vector<librados::notify_timeout_t>;
|
||||
|
||||
static timeout_vector decode_timeouts(const bufferlist& bl)
|
||||
{
|
||||
using ceph::decode;
|
||||
auto p = bl.begin();
|
||||
|
||||
// decode and discard the acks
|
||||
uint32_t num_acks;
|
||||
decode(num_acks, p);
|
||||
for (auto i = 0u; i < num_acks; ++i) {
|
||||
std::pair<uint64_t, uint64_t> id;
|
||||
decode(id, p);
|
||||
// discard the payload
|
||||
uint32_t blen;
|
||||
decode(blen, p);
|
||||
p += blen;
|
||||
}
|
||||
|
||||
// decode and return the timeouts
|
||||
uint32_t num_timeouts;
|
||||
decode(num_timeouts, p);
|
||||
|
||||
timeout_vector timeouts;
|
||||
for (auto i = 0u; i < num_timeouts; ++i) {
|
||||
std::pair<uint64_t, uint64_t> id;
|
||||
decode(id, p);
|
||||
timeouts.push_back({id.first, id.second});
|
||||
}
|
||||
return timeouts;
|
||||
}
|
||||
|
||||
int RGWSI_Notify::robust_notify(const DoutPrefixProvider *dpp,
|
||||
RGWSI_RADOS::Obj& notify_obj,
|
||||
const RGWCacheNotifyInfo& cni,
|
||||
optional_yield y)
|
||||
{
|
||||
bufferlist bl;
|
||||
bufferlist bl, rbl;
|
||||
encode(cni, bl);
|
||||
|
||||
// First, try to send, without being fancy about it.
|
||||
auto r = notify_obj.notify(dpp, bl, 0, nullptr, y);
|
||||
auto r = notify_obj.notify(dpp, bl, 0, &rbl, y);
|
||||
|
||||
if (r < 0) {
|
||||
timeout_vector timeouts;
|
||||
try {
|
||||
timeouts = decode_timeouts(rbl);
|
||||
} catch (const buffer::error& e) {
|
||||
ldpp_dout(dpp, 0) << "robust_notify failed to decode notify response: "
|
||||
<< e.what() << dendl;
|
||||
}
|
||||
|
||||
ldpp_dout(dpp, 1) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||||
<< " Watchers " << timeouts << " did not respond."
|
||||
<< " Notify failed on object " << cni.obj << ": "
|
||||
<< cpp_strerror(-r) << dendl;
|
||||
}
|
||||
@ -427,10 +481,19 @@ int RGWSI_Notify::robust_notify(const DoutPrefixProvider *dpp,
|
||||
ldpp_dout(dpp, 1) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||||
<< " Invalidating obj=" << info.obj << " tries="
|
||||
<< tries << dendl;
|
||||
r = notify_obj.notify(dpp, bl, 0, nullptr, y);
|
||||
r = notify_obj.notify(dpp, retrybl, 0, &rbl, y);
|
||||
if (r < 0) {
|
||||
timeout_vector timeouts;
|
||||
try {
|
||||
timeouts = decode_timeouts(rbl);
|
||||
} catch (const buffer::error& e) {
|
||||
ldpp_dout(dpp, 0) << "robust_notify failed to decode notify response: "
|
||||
<< e.what() << dendl;
|
||||
}
|
||||
|
||||
ldpp_dout(dpp, 1) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||||
<< " invalidation attempt " << tries << " failed: "
|
||||
<< " Watchers " << timeouts << " did not respond."
|
||||
<< " Invalidation attempt " << tries << " failed: "
|
||||
<< cpp_strerror(-r) << dendl;
|
||||
}
|
||||
}
|
||||
|
@ -42,7 +42,7 @@ private:
|
||||
bool enabled{false};
|
||||
|
||||
double inject_notify_timeout_probability{0};
|
||||
static constexpr unsigned max_notify_retries = 10;
|
||||
uint64_t max_notify_retries = 10;
|
||||
|
||||
std::string get_control_oid(int i);
|
||||
RGWSI_RADOS::Obj pick_control_obj(const std::string& key);
|
||||
|
Loading…
Reference in New Issue
Block a user