diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 5a6f54f7f20..6b7a90e5005 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -224,7 +224,7 @@ OPTION(ms_async_rdma_receive_buffers, OPT_U32, 10240) OPTION(ms_async_rdma_port_num, OPT_U32, 1) OPTION(ms_async_rdma_polling_us, OPT_U32, 1000) OPTION(ms_async_rdma_local_gid, OPT_STR, "") // GID format: "fe80:0000:0000:0000:7efe:90ff:fe72:6efe", no zero folding -OPTION(ms_async_rdma_roce_ver, OPT_INT, 2) // 2=RoCEv2, 1=RoCEv1.5, 0=RoCEv1 +OPTION(ms_async_rdma_roce_ver, OPT_INT, 1) // 0=RoCEv1, 1=RoCEv2, 2=RoCEv1.5 OPTION(ms_async_rdma_sl, OPT_INT, 3) // in RoCE, this means PCP OPTION(ms_dpdk_port_id, OPT_INT, 0) diff --git a/src/msg/async/rdma/Infiniband.cc b/src/msg/async/rdma/Infiniband.cc index 2913aa1ecc4..d81c338d2a2 100644 --- a/src/msg/async/rdma/Infiniband.cc +++ b/src/msg/async/rdma/Infiniband.cc @@ -49,7 +49,8 @@ Device::Device(CephContext *cct, ibv_device* d): device(d), device_attr(new ibv_ Port::Port(CephContext *cct, struct ibv_context* ictxt, uint8_t ipn): ctxt(ictxt), port_num(ipn), port_attr(new ibv_port_attr) { union ibv_gid cgid; struct ibv_exp_gid_attr gid_attr; - + bool malformed = false; + int r = ibv_query_port(ctxt, port_num, port_attr); if (r == -1) { lderr(cct) << __func__ << " query port failed " << cpp_strerror(errno) << dendl; @@ -61,7 +62,7 @@ Port::Port(CephContext *cct, struct ibv_context* ictxt, uint8_t ipn): ctxt(ictxt // search for requested GID in GIDs table ldout(cct, 1) << __func__ << " looking for local GID " << (cct->_conf->ms_async_rdma_local_gid) << " of type " << (cct->_conf->ms_async_rdma_roce_ver) << dendl; - sscanf(cct->_conf->ms_async_rdma_local_gid.c_str(), + r = sscanf(cct->_conf->ms_async_rdma_local_gid.c_str(), "%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx" ":%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx", &cgid.raw[ 0], &cgid.raw[ 1], @@ -73,6 +74,11 @@ Port::Port(CephContext *cct, struct ibv_context* ictxt, uint8_t ipn): ctxt(ictxt &cgid.raw[12], &cgid.raw[13], &cgid.raw[14], &cgid.raw[15]); + if (r != 16) { + ldout(cct, 1) << __func__ << " malformed or no GID supplied, using GID index 0" << dendl; + malformed = true; + } + gid_attr.comp_mask = IBV_EXP_QUERY_GID_ATTR_TYPE; for (gid_idx = 0; gid_idx < port_attr->gid_tbl_len; gid_idx++) { @@ -86,6 +92,8 @@ Port::Port(CephContext *cct, struct ibv_context* ictxt, uint8_t ipn): ctxt(ictxt lderr(cct) << __func__ << " query gid attributes of port " << port_num << " index " << gid_idx << " failed " << cpp_strerror(errno) << dendl; ceph_abort(); } + + if (malformed) break; // stay with gid_idx=0 if ( (gid_attr.type == cct->_conf->ms_async_rdma_roce_ver) && (memcmp(&gid, &cgid, 16) == 0) ) { ldout(cct, 1) << __func__ << " found at index " << gid_idx << dendl;