cpu: Support processor-less (memory-only) NUMA nodes (#734)
* cpu: Support processor-less (memory-only) NUMA nodes Processor-less (memory-only) NUMA nodes exist e.g. in systems that use Intel Optane drives for RAM expansion using Intel Memory Drive Technology (IMDT). IMDT RAM expansion supports two modes: * "Unify Remote Memory domains": present a processor-less (memory-only) NUMA domain, which is the default * "Expand local memory domains": to expand each processor’s memory domain with a portion of the memory made available by Optane and IMDT This commit fixes a crash in the first case (when "cpulist" is empty). Here's an example of such a system: $ numastat -m|head -n5 Per-node system memory usage (in MBs): Node 0 Node 1 Node 2 Total --------------- --------------- --------------- --------------- MemTotal 118239.56 130816.00 464384.00 713439.56 $ for i in {0..2}; do echo -n "$i: " ; cat /sys/bus/node/devices/node$i/cpulist ; done 0: 0-7,16-23 1: 8-15,24-31 2: $ /opt/vsmp/bin/vsmpversion -vvv Memory Drive Technology: 8.2.1455.74 (Sep 28 2017 13:09:59) System configuration: Boards: 3 1 x Proc. + I/O + Memory 2 x NVM devices (Intel SSDPED1K375GAQ) Processors: 2, Cores: 16, Threads: 32 Intel(R) Xeon(R) CPU E5-2667 v4 @ 3.20GHz Stepping 01 Memory (MB): 713472 (of 977450), Cache: 251416, Private: 12562 1 x 249088MB [262036/ 678/12270] 1 x 232192MB [357707/125369/ 146] 82:00.0#1 1 x 232192MB [357707/125369/ 146] 83:00.0#1 * cpu: rename some variables (pkg => node) * cpu: Use %v not %q in log.Debugf() format strings
This commit is contained in:
parent
f6f9c8d6cc
commit
a8d7d1101a
|
@ -111,7 +111,7 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
|
|||
_, cpuname := filepath.Split(cpu)
|
||||
|
||||
if _, err := os.Stat(filepath.Join(cpu, "cpufreq")); os.IsNotExist(err) {
|
||||
log.Debugf("CPU %q is missing cpufreq", cpu)
|
||||
log.Debugf("CPU %v is missing cpufreq", cpu)
|
||||
} else {
|
||||
// sysfs cpufreq values are kHz, thus multiply by 1000 to export base units (hz).
|
||||
// See https://www.kernel.org/doc/Documentation/cpu-freq/user-guide.txt
|
||||
|
@ -132,7 +132,7 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
|
|||
}
|
||||
|
||||
if _, err := os.Stat(filepath.Join(cpu, "thermal_throttle")); os.IsNotExist(err) {
|
||||
log.Debugf("CPU %q is missing thermal_throttle", cpu)
|
||||
log.Debugf("CPU %v is missing thermal_throttle", cpu)
|
||||
continue
|
||||
}
|
||||
if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle", "core_throttle_count")); err != nil {
|
||||
|
@ -141,36 +141,43 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
|
|||
ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, prometheus.CounterValue, float64(value), cpuname)
|
||||
}
|
||||
|
||||
pkgs, err := filepath.Glob(sysFilePath("bus/node/devices/node[0-9]*"))
|
||||
nodes, err := filepath.Glob(sysFilePath("bus/node/devices/node[0-9]*"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// package/node loop
|
||||
for _, pkg := range pkgs {
|
||||
if _, err := os.Stat(filepath.Join(pkg, "cpulist")); os.IsNotExist(err) {
|
||||
log.Debugf("package %q is missing cpulist", pkg)
|
||||
// package / NUMA node loop
|
||||
for _, node := range nodes {
|
||||
if _, err := os.Stat(filepath.Join(node, "cpulist")); os.IsNotExist(err) {
|
||||
log.Debugf("NUMA node %v is missing cpulist", node)
|
||||
continue
|
||||
}
|
||||
cpulist, err := ioutil.ReadFile(filepath.Join(pkg, "cpulist"))
|
||||
cpulist, err := ioutil.ReadFile(filepath.Join(node, "cpulist"))
|
||||
if err != nil {
|
||||
log.Debugf("could not read cpulist of package %q", pkg)
|
||||
log.Debugf("could not read cpulist of NUMA node %v", node)
|
||||
return err
|
||||
}
|
||||
// cpulist example of one package/node with HT: "0-11,24-35"
|
||||
line := strings.Split(string(cpulist), "\n")[0]
|
||||
if line == "" {
|
||||
// Skip processor-less (memory-only) NUMA nodes.
|
||||
// E.g. RAM expansion with Intel Optane Drive(s) using
|
||||
// Intel Memory Drive Technology (IMDT).
|
||||
log.Debugf("skipping processor-less (memory-only) NUMA node %v", node)
|
||||
continue
|
||||
}
|
||||
firstCPU := strings.FieldsFunc(line, func(r rune) bool {
|
||||
return r == '-' || r == ','
|
||||
})[0]
|
||||
if _, err := os.Stat(filepath.Join(pkg, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); os.IsNotExist(err) {
|
||||
log.Debugf("Package %q CPU %q is missing package_throttle", pkg, firstCPU)
|
||||
if _, err := os.Stat(filepath.Join(node, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); os.IsNotExist(err) {
|
||||
log.Debugf("Node %v CPU %v is missing package_throttle", node, firstCPU)
|
||||
continue
|
||||
}
|
||||
if value, err = readUintFromFile(filepath.Join(pkg, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); err != nil {
|
||||
if value, err = readUintFromFile(filepath.Join(node, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); err != nil {
|
||||
return err
|
||||
}
|
||||
pkgno := digitRegexp.FindAllString(pkg, 1)[0]
|
||||
ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), pkgno)
|
||||
nodeno := digitRegexp.FindAllString(node, 1)[0]
|
||||
ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), nodeno)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
|
@ -150,6 +150,14 @@ Mode: 644
|
|||
Path: sys/bus/node/devices/node0/cpulist
|
||||
Lines: 1
|
||||
0-3
|
||||
Mode: 644
|
||||
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
Directory: sys/bus/node/devices/node1
|
||||
Mode: 755
|
||||
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
Path: sys/bus/node/devices/node1/cpulist
|
||||
Lines: 1
|
||||
|
||||
Mode: 644
|
||||
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
Directory: sys/class
|
||||
|
|
Loading…
Reference in New Issue