Merge PR #31274 into master

* refs/pull/31274/head:
	osd/OSD: enhance osd numa affinity compatibility

Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2019-12-02 12:56:05 -06:00
commit 983b872da0
3 changed files with 76 additions and 14 deletions

View File

@ -25,6 +25,9 @@
#include "common/numa.h"
#include <netdb.h>
#include <string>
#include <string.h>
#include <vector>
#define dout_subsys ceph_subsys_
@ -508,15 +511,29 @@ int get_iface_numa_node(
const std::string& iface,
int *node)
{
string fn = std::string("/sys/class/net/") + iface + "/device/numa_node";
int ifatype = IFACE_DEFAULT;
string ifa = iface;
int pos = ifa.find(":");
if (pos != string::npos) {
ifa.erase(pos);
}
string fn = std::string("/sys/class/net/") + ifa + "/device/numa_node";
int fd = ::open(fn.c_str(), O_RDONLY);
if (fd < 0) {
fn = std::string("/sys/class/net/") + ifa + "/bonding/slaves";
fd = ::open(fn.c_str(), O_RDONLY);
if (fd < 0) {
return -errno;
}
ifatype = IFACE_BOND_PORT;
} else {
ifatype = IFACE_PHY_PORT;
}
int r = 0;
char buf[1024];
char *endptr = 0;
int fd = ::open(fn.c_str(), O_RDONLY);
if (fd < 0) {
return -errno;
}
int bond_node = -1;
r = safe_read(fd, &buf, sizeof(buf));
if (r < 0) {
goto out;
@ -525,13 +542,43 @@ int get_iface_numa_node(
while (r > 0 && ::isspace(buf[--r])) {
buf[r] = 0;
}
*node = strtoll(buf, &endptr, 10);
if (endptr != buf + strlen(buf)) {
r = -EINVAL;
goto out;
switch (ifatype) {
case IFACE_PHY_PORT:
*node = strtoll(buf, &endptr, 10);
if (endptr != buf + strlen(buf)) {
r = -EINVAL;
goto out;
}
r = 0;
break;
case IFACE_BOND_PORT:
std::vector<std::string> sv;
char *q, *p = strtok_r(buf, " ", &q);
while (p != NULL) {
sv.push_back(p);
p = strtok_r(NULL, " ", &q);
}
for (auto& iter : sv) {
int bn = -1;
r = get_iface_numa_node(iter, &bn);
if (r >= 0) {
if (bond_node == -1 || bn == bond_node) {
bond_node = bn;
} else {
*node = -2;
goto out;
}
} else {
goto out;
}
}
*node = bond_node;
break;
}
r = 0;
out:
out:
::close(fd);
return r;
}

View File

@ -20,6 +20,12 @@ class entity_addrvec_t;
#define CEPH_PICK_ADDRESS_PREFER_IPV4 0x40
#define CEPH_PICK_ADDRESS_DEFAULT_MON_PORTS 0x80
enum IfaceType {
IFACE_DEFAULT = 0,
IFACE_PHY_PORT = 1,
IFACE_BOND_PORT = 2
};
#ifndef WITH_SEASTAR
/*
Pick addresses based on subnets if needed.

View File

@ -2299,11 +2299,11 @@ int OSD::set_numa_affinity()
cct,
cluster_messenger->get_myaddrs().front().get_sockaddr_storage());
int r = get_iface_numa_node(front_iface, &front_node);
if (r >= 0) {
if (r >= 0 && front_node >= 0) {
dout(1) << __func__ << " public network " << front_iface << " numa node "
<< front_node << dendl;
<< front_node << dendl;
r = get_iface_numa_node(back_iface, &back_node);
if (r >= 0) {
if (r >= 0 && back_node >= 0) {
dout(1) << __func__ << " cluster network " << back_iface << " numa node "
<< back_node << dendl;
if (front_node == back_node &&
@ -2312,14 +2312,23 @@ int OSD::set_numa_affinity()
if (g_conf().get_val<bool>("osd_numa_auto_affinity")) {
numa_node = front_node;
}
} else if (front_node != back_node) {
dout(1) << __func__ << " public and cluster network numa nodes do not match"
<< dendl;
} else {
dout(1) << __func__ << " objectstore and network numa nodes do not match"
<< dendl;
}
} else if (back_node == -2) {
dout(1) << __func__ << " cluster network " << back_iface
<< " ports numa nodes do not match" << dendl;
} else {
derr << __func__ << " unable to identify cluster interface '" << back_iface
<< "' numa node: " << cpp_strerror(r) << dendl;
}
} else if (front_node == -2) {
dout(1) << __func__ << " public network " << front_iface
<< " ports numa nodes do not match" << dendl;
} else {
derr << __func__ << " unable to identify public interface '" << front_iface
<< "' numa node: " << cpp_strerror(r) << dendl;