mirror of
https://github.com/ceph/ceph
synced 2025-02-04 01:14:24 +00:00
Merge PR #45669 into main
* refs/pull/45669/head: client: switch to use 32 bits ext_num_fwd client: switch to use 32 bits ext_num_retry ceph_fs.h: add 32 bits extended num_retry and num_fwd support ceph_fs.h: switch to use its own encode/decode helpers Reviewed-by: Venky Shankar <vshankar@redhat.com>
This commit is contained in:
commit
8ce318517d
@ -2477,7 +2477,7 @@ void Client::send_request(MetaRequest *request, MetaSession *session,
|
||||
mds_rank_t mds = session->mds_num;
|
||||
ldout(cct, 10) << __func__ << " rebuilding request " << request->get_tid()
|
||||
<< " for mds." << mds << dendl;
|
||||
auto r = build_client_request(request);
|
||||
auto r = build_client_request(request, mds);
|
||||
if (!r)
|
||||
return;
|
||||
|
||||
@ -2522,31 +2522,31 @@ void Client::send_request(MetaRequest *request, MetaSession *session,
|
||||
session->con->send_message2(std::move(r));
|
||||
}
|
||||
|
||||
ref_t<MClientRequest> Client::build_client_request(MetaRequest *request)
|
||||
ref_t<MClientRequest> Client::build_client_request(MetaRequest *request, mds_rank_t mds)
|
||||
{
|
||||
auto session = mds_sessions.at(mds);
|
||||
bool old_version = !session->mds_features.test(CEPHFS_FEATURE_32BITS_RETRY_FWD);
|
||||
|
||||
/*
|
||||
* The type of 'retry_attempt' in 'MetaRequest' is 'int',
|
||||
* while in 'ceph_mds_request_head' the type of 'num_retry'
|
||||
* is '__u8'. So in case the request retries exceeding 256
|
||||
* times, the MDS will receive a incorrect retry seq.
|
||||
* Avoid inifinite retrying after overflow.
|
||||
*
|
||||
* In this case it's ususally a bug in MDS and continue
|
||||
* retrying the request makes no sense.
|
||||
*
|
||||
* In future this could be fixed in ceph code, so avoid
|
||||
* using the hardcode here.
|
||||
* The client will increase the retry count and if the MDS is
|
||||
* old version, so we limit to retry at most 256 times.
|
||||
*/
|
||||
int max_retry = sizeof(((struct ceph_mds_request_head*)0)->num_retry);
|
||||
max_retry = 1 << (max_retry * CHAR_BIT);
|
||||
if (request->retry_attempt >= max_retry) {
|
||||
request->abort(-CEPHFS_EMULTIHOP);
|
||||
request->caller_cond->notify_all();
|
||||
ldout(cct, 1) << __func__ << " request tid " << request->tid
|
||||
<< " seq overflow" << ", abort it" << dendl;
|
||||
return nullptr;
|
||||
if (request->retry_attempt) {
|
||||
int old_max_retry = sizeof(((struct ceph_mds_request_head*)0)->num_retry);
|
||||
old_max_retry = 1 << (old_max_retry * CHAR_BIT);
|
||||
if ((old_version && request->retry_attempt >= old_max_retry) ||
|
||||
(uint32_t)request->retry_attempt >= UINT32_MAX) {
|
||||
request->abort(-CEPHFS_EMULTIHOP);
|
||||
request->caller_cond->notify_all();
|
||||
ldout(cct, 1) << __func__ << " request tid " << request->tid
|
||||
<< " retry seq overflow" << ", abort it" << dendl;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
auto req = make_message<MClientRequest>(request->get_op());
|
||||
auto req = make_message<MClientRequest>(request->get_op(), old_version);
|
||||
req->set_tid(request->tid);
|
||||
req->set_stamp(request->op_stamp);
|
||||
memcpy(&req->head, &request->head, sizeof(ceph_mds_request_head));
|
||||
@ -2578,7 +2578,7 @@ ref_t<MClientRequest> Client::build_client_request(MetaRequest *request)
|
||||
req->fscrypt_auth = request->fscrypt_auth;
|
||||
req->fscrypt_file = request->fscrypt_file;
|
||||
req->set_retry_attempt(request->retry_attempt++);
|
||||
req->head.num_fwd = request->num_fwd;
|
||||
req->head.ext_num_fwd = request->num_fwd;
|
||||
const gid_t *_gids;
|
||||
int gid_count = request->perms.get_gids(&_gids);
|
||||
req->set_gid_list(gid_count, _gids);
|
||||
@ -2607,32 +2607,20 @@ void Client::handle_client_request_forward(const MConstRef<MClientRequestForward
|
||||
ceph_assert(request);
|
||||
|
||||
/*
|
||||
* The type of 'num_fwd' in ceph 'MClientRequestForward'
|
||||
* is 'int32_t', while in 'ceph_mds_request_head' the
|
||||
* type is '__u8'. So in case the request bounces between
|
||||
* MDSes exceeding 256 times, the client will get stuck.
|
||||
* Avoid inifinite retrying after overflow.
|
||||
*
|
||||
* In this case it's ususally a bug in MDS and continue
|
||||
* bouncing the request makes no sense.
|
||||
*
|
||||
* In future this could be fixed in ceph code, so avoid
|
||||
* using the hardcode here.
|
||||
* The MDS will increase the fwd count and in client side
|
||||
* if the num_fwd is less than the one saved in request
|
||||
* that means the MDS is an old version and overflowed of
|
||||
* 8 bits.
|
||||
*/
|
||||
int max_fwd = sizeof(((struct ceph_mds_request_head*)0)->num_fwd);
|
||||
max_fwd = (1 << (max_fwd * CHAR_BIT)) - 1;
|
||||
auto num_fwd = fwd->get_num_fwd();
|
||||
if (num_fwd <= request->num_fwd || num_fwd >= max_fwd) {
|
||||
if (request->num_fwd >= max_fwd || num_fwd >= max_fwd) {
|
||||
request->abort(-CEPHFS_EMULTIHOP);
|
||||
request->caller_cond->notify_all();
|
||||
ldout(cct, 1) << __func__ << " tid " << tid << " seq overflow"
|
||||
<< ", abort it" << dendl;
|
||||
} else {
|
||||
ldout(cct, 10) << __func__ << " tid " << tid
|
||||
<< " old fwd seq " << fwd->get_num_fwd()
|
||||
<< " <= req fwd " << request->num_fwd
|
||||
<< ", ignore it" << dendl;
|
||||
}
|
||||
if (num_fwd <= request->num_fwd || (uint32_t)num_fwd >= UINT32_MAX) {
|
||||
request->abort(-CEPHFS_EMULTIHOP);
|
||||
request->caller_cond->notify_all();
|
||||
ldout(cct, 0) << __func__ << " request tid " << tid << " new num_fwd "
|
||||
<< num_fwd << " old num_fwd " << request->num_fwd << ", fwd seq overflow"
|
||||
<< ", abort it" << dendl;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -956,7 +956,7 @@ protected:
|
||||
void connect_mds_targets(mds_rank_t mds);
|
||||
void send_request(MetaRequest *request, MetaSession *session,
|
||||
bool drop_cap_releases=false);
|
||||
MRef<MClientRequest> build_client_request(MetaRequest *request);
|
||||
MRef<MClientRequest> build_client_request(MetaRequest *request, mds_rank_t mds);
|
||||
void kick_requests(MetaSession *session);
|
||||
void kick_requests_closed(MetaSession *session);
|
||||
void handle_client_request_forward(const MConstRef<MClientRequestForward>& reply);
|
||||
|
@ -46,8 +46,8 @@ void MetaRequest::dump(Formatter *f) const
|
||||
f->dump_unsigned("oldest_client_tid", head.oldest_client_tid);
|
||||
f->dump_unsigned("mdsmap_epoch", head.mdsmap_epoch);
|
||||
f->dump_unsigned("flags", head.flags);
|
||||
f->dump_unsigned("num_retry", head.num_retry);
|
||||
f->dump_unsigned("num_fwd", head.num_fwd);
|
||||
f->dump_unsigned("num_retry", head.ext_num_retry);
|
||||
f->dump_unsigned("num_fwd", head.ext_num_fwd);
|
||||
f->dump_unsigned("num_releases", head.num_releases);
|
||||
|
||||
f->dump_int("abort_rc", abort_rc);
|
||||
|
@ -156,8 +156,8 @@ public:
|
||||
// normal fields
|
||||
void set_tid(ceph_tid_t t) { tid = t; }
|
||||
void set_oldest_client_tid(ceph_tid_t t) { head.oldest_client_tid = t; }
|
||||
void inc_num_fwd() { head.num_fwd = head.num_fwd + 1; }
|
||||
void set_retry_attempt(int a) { head.num_retry = a; }
|
||||
void inc_num_fwd() { head.ext_num_fwd = head.ext_num_fwd + 1; }
|
||||
void set_retry_attempt(int a) { head.ext_num_retry = a; }
|
||||
void set_filepath(const filepath& fp) { path = fp; }
|
||||
void set_filepath2(const filepath& fp) { path2 = fp; }
|
||||
void set_alternate_name(std::string an) { alternate_name = an; }
|
||||
|
@ -14,6 +14,8 @@
|
||||
|
||||
#include "msgr.h"
|
||||
#include "rados.h"
|
||||
#include "include/encoding.h"
|
||||
#include "include/denc.h"
|
||||
|
||||
/*
|
||||
* The data structures defined here are shared between Linux kernel and
|
||||
@ -619,7 +621,7 @@ union ceph_mds_request_args {
|
||||
} __attribute__ ((packed)) lookupino;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
#define CEPH_MDS_REQUEST_HEAD_VERSION 1
|
||||
#define CEPH_MDS_REQUEST_HEAD_VERSION 2
|
||||
|
||||
/*
|
||||
* Note that any change to this structure must ensure that it is compatible
|
||||
@ -630,15 +632,68 @@ struct ceph_mds_request_head {
|
||||
__le64 oldest_client_tid;
|
||||
__le32 mdsmap_epoch; /* on client */
|
||||
__le32 flags; /* CEPH_MDS_FLAG_* */
|
||||
__u8 num_retry, num_fwd; /* count retry, fwd attempts */
|
||||
__u8 num_retry, num_fwd; /* legacy count retry and fwd attempts */
|
||||
__le16 num_releases; /* # include cap/lease release records */
|
||||
__le32 op; /* mds op code */
|
||||
__le32 caller_uid, caller_gid;
|
||||
__le64 ino; /* use this ino for openc, mkdir, mknod,
|
||||
etc. (if replaying) */
|
||||
union ceph_mds_request_args args;
|
||||
|
||||
__le32 ext_num_retry; /* new count retry attempts */
|
||||
__le32 ext_num_fwd; /* new count fwd attempts */
|
||||
} __attribute__ ((packed));
|
||||
|
||||
void inline encode(const struct ceph_mds_request_head& h, ceph::buffer::list& bl, bool old_version) {
|
||||
using ceph::encode;
|
||||
encode(h.version, bl);
|
||||
encode(h.oldest_client_tid, bl);
|
||||
encode(h.mdsmap_epoch, bl);
|
||||
encode(h.flags, bl);
|
||||
|
||||
// For old MDS daemons
|
||||
__u8 num_retry = __u32(h.ext_num_retry);
|
||||
__u8 num_fwd = __u32(h.ext_num_fwd);
|
||||
encode(num_retry, bl);
|
||||
encode(num_fwd, bl);
|
||||
|
||||
encode(h.num_releases, bl);
|
||||
encode(h.op, bl);
|
||||
encode(h.caller_uid, bl);
|
||||
encode(h.caller_gid, bl);
|
||||
encode(h.ino, bl);
|
||||
bl.append((char*)&h.args, sizeof(h.args));
|
||||
|
||||
if (!old_version) {
|
||||
encode(h.ext_num_retry, bl);
|
||||
encode(h.ext_num_fwd, bl);
|
||||
}
|
||||
}
|
||||
|
||||
void inline decode(struct ceph_mds_request_head& h, ceph::buffer::list::const_iterator& bl) {
|
||||
using ceph::decode;
|
||||
decode(h.version, bl);
|
||||
decode(h.oldest_client_tid, bl);
|
||||
decode(h.mdsmap_epoch, bl);
|
||||
decode(h.flags, bl);
|
||||
decode(h.num_retry, bl);
|
||||
decode(h.num_fwd, bl);
|
||||
decode(h.num_releases, bl);
|
||||
decode(h.op, bl);
|
||||
decode(h.caller_uid, bl);
|
||||
decode(h.caller_gid, bl);
|
||||
decode(h.ino, bl);
|
||||
bl.copy(sizeof(h.args), (char*)&(h.args));
|
||||
|
||||
if (h.version >= 2) {
|
||||
decode(h.ext_num_retry, bl);
|
||||
decode(h.ext_num_fwd, bl);
|
||||
} else {
|
||||
h.ext_num_retry = h.num_retry;
|
||||
h.ext_num_fwd = h.num_fwd;
|
||||
}
|
||||
}
|
||||
|
||||
/* cap/lease release record */
|
||||
struct ceph_mds_request_release {
|
||||
__le64 ino, cap_id; /* ino and unique cap id */
|
||||
|
@ -320,7 +320,6 @@ WRITE_RAW_ENCODER(ceph_file_layout)
|
||||
WRITE_RAW_ENCODER(ceph_dir_layout)
|
||||
WRITE_RAW_ENCODER(ceph_mds_session_head)
|
||||
WRITE_RAW_ENCODER(ceph_mds_request_head_legacy)
|
||||
WRITE_RAW_ENCODER(ceph_mds_request_head)
|
||||
WRITE_RAW_ENCODER(ceph_mds_request_release)
|
||||
WRITE_RAW_ENCODER(ceph_filelock)
|
||||
WRITE_RAW_ENCODER(ceph_mds_caps_head)
|
||||
|
@ -29,6 +29,7 @@ static const std::array feature_names
|
||||
"alternate_name",
|
||||
"notify_session_state",
|
||||
"op_getvxattr",
|
||||
"32bits_retry_fwd",
|
||||
};
|
||||
static_assert(feature_names.size() == CEPHFS_FEATURE_MAX + 1);
|
||||
|
||||
|
@ -45,7 +45,8 @@ namespace ceph {
|
||||
#define CEPHFS_FEATURE_ALTERNATE_NAME 15
|
||||
#define CEPHFS_FEATURE_NOTIFY_SESSION_STATE 16
|
||||
#define CEPHFS_FEATURE_OP_GETVXATTR 17
|
||||
#define CEPHFS_FEATURE_MAX 17
|
||||
#define CEPHFS_FEATURE_32BITS_RETRY_FWD 18
|
||||
#define CEPHFS_FEATURE_MAX 18
|
||||
|
||||
#define CEPHFS_FEATURES_ALL { \
|
||||
0, 1, 2, 3, 4, \
|
||||
@ -64,6 +65,7 @@ namespace ceph {
|
||||
CEPHFS_FEATURE_ALTERNATE_NAME, \
|
||||
CEPHFS_FEATURE_NOTIFY_SESSION_STATE, \
|
||||
CEPHFS_FEATURE_OP_GETVXATTR, \
|
||||
CEPHFS_FEATURE_32BITS_RETRY_FWD, \
|
||||
}
|
||||
|
||||
#define CEPHFS_METRIC_FEATURES_ALL { \
|
||||
|
@ -73,6 +73,7 @@ private:
|
||||
public:
|
||||
mutable struct ceph_mds_request_head head; /* XXX HACK! */
|
||||
utime_t stamp;
|
||||
bool peer_old_version = false;
|
||||
|
||||
struct Release {
|
||||
mutable ceph_mds_request_release item;
|
||||
@ -111,10 +112,11 @@ protected:
|
||||
// cons
|
||||
MClientRequest()
|
||||
: MMDSOp(CEPH_MSG_CLIENT_REQUEST, HEAD_VERSION, COMPAT_VERSION) {}
|
||||
MClientRequest(int op)
|
||||
MClientRequest(int op, bool over=true)
|
||||
: MMDSOp(CEPH_MSG_CLIENT_REQUEST, HEAD_VERSION, COMPAT_VERSION) {
|
||||
memset(&head, 0, sizeof(head));
|
||||
head.op = op;
|
||||
peer_old_version = over;
|
||||
}
|
||||
~MClientRequest() final {}
|
||||
|
||||
@ -160,8 +162,8 @@ public:
|
||||
// normal fields
|
||||
void set_stamp(utime_t t) { stamp = t; }
|
||||
void set_oldest_client_tid(ceph_tid_t t) { head.oldest_client_tid = t; }
|
||||
void inc_num_fwd() { head.num_fwd = head.num_fwd + 1; }
|
||||
void set_retry_attempt(int a) { head.num_retry = a; }
|
||||
void inc_num_fwd() { head.ext_num_fwd = head.ext_num_fwd + 1; }
|
||||
void set_retry_attempt(int a) { head.ext_num_retry = a; }
|
||||
void set_filepath(const filepath& fp) { path = fp; }
|
||||
void set_filepath2(const filepath& fp) { path2 = fp; }
|
||||
void set_string2(const char *s) { path2.set_path(std::string_view(s), 0); }
|
||||
@ -192,8 +194,8 @@ public:
|
||||
|
||||
utime_t get_stamp() const { return stamp; }
|
||||
ceph_tid_t get_oldest_client_tid() const { return head.oldest_client_tid; }
|
||||
int get_num_fwd() const { return head.num_fwd; }
|
||||
int get_retry_attempt() const { return head.num_retry; }
|
||||
int get_num_fwd() const { return head.ext_num_fwd; }
|
||||
int get_retry_attempt() const { return head.ext_num_retry; }
|
||||
int get_op() const { return head.op; }
|
||||
unsigned get_caller_uid() const { return head.caller_uid; }
|
||||
unsigned get_caller_gid() const { return head.caller_gid; }
|
||||
@ -252,10 +254,20 @@ public:
|
||||
void encode_payload(uint64_t features) override {
|
||||
using ceph::encode;
|
||||
head.num_releases = releases.size();
|
||||
head.version = CEPH_MDS_REQUEST_HEAD_VERSION;
|
||||
/*
|
||||
* If the peer is old version, we must skip all the
|
||||
* new members, because the old version of MDS or
|
||||
* client will just copy the 'head' memory and isn't
|
||||
* that smart to skip them.
|
||||
*/
|
||||
if (peer_old_version) {
|
||||
head.version = 1;
|
||||
} else {
|
||||
head.version = CEPH_MDS_REQUEST_HEAD_VERSION;
|
||||
}
|
||||
|
||||
if (features & CEPH_FEATURE_FS_BTIME) {
|
||||
encode(head, payload);
|
||||
encode(head, payload, peer_old_version);
|
||||
} else {
|
||||
struct ceph_mds_request_head_legacy old_mds_head;
|
||||
|
||||
@ -312,8 +324,10 @@ public:
|
||||
out << " " << get_filepath2();
|
||||
if (stamp != utime_t())
|
||||
out << " " << stamp;
|
||||
if (head.num_retry)
|
||||
out << " RETRY=" << (int)head.num_retry;
|
||||
if (head.ext_num_fwd)
|
||||
out << " FWD=" << (int)head.ext_num_fwd;
|
||||
if (head.ext_num_retry)
|
||||
out << " RETRY=" << (int)head.ext_num_retry;
|
||||
if (is_async())
|
||||
out << " ASYNC";
|
||||
if (is_replay())
|
||||
|
Loading…
Reference in New Issue
Block a user