fix cpustat when some cpus are offline
Signed-off-by: Jia Xin <alexjx@gmail.com>
This commit is contained in:
parent
a3bd2e1305
commit
39b4556b5b
|
@ -43,7 +43,7 @@ type cpuCollector struct {
|
|||
cpuPackageThrottle *prometheus.Desc
|
||||
cpuIsolated *prometheus.Desc
|
||||
logger log.Logger
|
||||
cpuStats []procfs.CPUStat
|
||||
cpuStats map[int64]procfs.CPUStat
|
||||
cpuStatsMutex sync.Mutex
|
||||
isolatedCpus []uint16
|
||||
|
||||
|
@ -126,6 +126,7 @@ func NewCPUCollector(logger log.Logger) (Collector, error) {
|
|||
),
|
||||
logger: logger,
|
||||
isolatedCpus: isolcpus,
|
||||
cpuStats: make(map[int64]procfs.CPUStat),
|
||||
}
|
||||
err = c.compileIncludeFlags(flagsInclude, bugsInclude)
|
||||
if err != nil {
|
||||
|
@ -324,7 +325,7 @@ func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error {
|
|||
c.cpuStatsMutex.Lock()
|
||||
defer c.cpuStatsMutex.Unlock()
|
||||
for cpuID, cpuStat := range c.cpuStats {
|
||||
cpuNum := strconv.Itoa(cpuID)
|
||||
cpuNum := strconv.Itoa(int(cpuID))
|
||||
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuNum, "user")
|
||||
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuNum, "nice")
|
||||
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.System, cpuNum, "system")
|
||||
|
@ -345,82 +346,82 @@ func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error {
|
|||
}
|
||||
|
||||
// updateCPUStats updates the internal cache of CPU stats.
|
||||
func (c *cpuCollector) updateCPUStats(newStats []procfs.CPUStat) {
|
||||
func (c *cpuCollector) updateCPUStats(newStats map[int64]procfs.CPUStat) {
|
||||
|
||||
// Acquire a lock to update the stats.
|
||||
c.cpuStatsMutex.Lock()
|
||||
defer c.cpuStatsMutex.Unlock()
|
||||
|
||||
// Reset the cache if the list of CPUs has changed.
|
||||
if len(c.cpuStats) != len(newStats) {
|
||||
c.cpuStats = make([]procfs.CPUStat, len(newStats))
|
||||
}
|
||||
|
||||
for i, n := range newStats {
|
||||
cpuStats := c.cpuStats[i]
|
||||
|
||||
// If idle jumps backwards by more than X seconds, assume we had a hotplug event and reset the stats for this CPU.
|
||||
if (c.cpuStats[i].Idle - n.Idle) >= jumpBackSeconds {
|
||||
level.Debug(c.logger).Log("msg", jumpBackDebugMessage, "cpu", i, "old_value", c.cpuStats[i].Idle, "new_value", n.Idle)
|
||||
c.cpuStats[i] = procfs.CPUStat{}
|
||||
if (cpuStats.Idle - n.Idle) >= jumpBackSeconds {
|
||||
level.Debug(c.logger).Log("msg", jumpBackDebugMessage, "cpu", i, "old_value", cpuStats.Idle, "new_value", n.Idle)
|
||||
cpuStats = procfs.CPUStat{}
|
||||
}
|
||||
|
||||
if n.Idle >= c.cpuStats[i].Idle {
|
||||
c.cpuStats[i].Idle = n.Idle
|
||||
if n.Idle >= cpuStats.Idle {
|
||||
cpuStats.Idle = n.Idle
|
||||
} else {
|
||||
level.Debug(c.logger).Log("msg", "CPU Idle counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Idle, "new_value", n.Idle)
|
||||
level.Debug(c.logger).Log("msg", "CPU Idle counter jumped backwards", "cpu", i, "old_value", cpuStats.Idle, "new_value", n.Idle)
|
||||
}
|
||||
|
||||
if n.User >= c.cpuStats[i].User {
|
||||
c.cpuStats[i].User = n.User
|
||||
if n.User >= cpuStats.User {
|
||||
cpuStats.User = n.User
|
||||
} else {
|
||||
level.Debug(c.logger).Log("msg", "CPU User counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].User, "new_value", n.User)
|
||||
level.Debug(c.logger).Log("msg", "CPU User counter jumped backwards", "cpu", i, "old_value", cpuStats.User, "new_value", n.User)
|
||||
}
|
||||
|
||||
if n.Nice >= c.cpuStats[i].Nice {
|
||||
c.cpuStats[i].Nice = n.Nice
|
||||
if n.Nice >= cpuStats.Nice {
|
||||
cpuStats.Nice = n.Nice
|
||||
} else {
|
||||
level.Debug(c.logger).Log("msg", "CPU Nice counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Nice, "new_value", n.Nice)
|
||||
level.Debug(c.logger).Log("msg", "CPU Nice counter jumped backwards", "cpu", i, "old_value", cpuStats.Nice, "new_value", n.Nice)
|
||||
}
|
||||
|
||||
if n.System >= c.cpuStats[i].System {
|
||||
c.cpuStats[i].System = n.System
|
||||
if n.System >= cpuStats.System {
|
||||
cpuStats.System = n.System
|
||||
} else {
|
||||
level.Debug(c.logger).Log("msg", "CPU System counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].System, "new_value", n.System)
|
||||
level.Debug(c.logger).Log("msg", "CPU System counter jumped backwards", "cpu", i, "old_value", cpuStats.System, "new_value", n.System)
|
||||
}
|
||||
|
||||
if n.Iowait >= c.cpuStats[i].Iowait {
|
||||
c.cpuStats[i].Iowait = n.Iowait
|
||||
if n.Iowait >= cpuStats.Iowait {
|
||||
cpuStats.Iowait = n.Iowait
|
||||
} else {
|
||||
level.Debug(c.logger).Log("msg", "CPU Iowait counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Iowait, "new_value", n.Iowait)
|
||||
level.Debug(c.logger).Log("msg", "CPU Iowait counter jumped backwards", "cpu", i, "old_value", cpuStats.Iowait, "new_value", n.Iowait)
|
||||
}
|
||||
|
||||
if n.IRQ >= c.cpuStats[i].IRQ {
|
||||
c.cpuStats[i].IRQ = n.IRQ
|
||||
if n.IRQ >= cpuStats.IRQ {
|
||||
cpuStats.IRQ = n.IRQ
|
||||
} else {
|
||||
level.Debug(c.logger).Log("msg", "CPU IRQ counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].IRQ, "new_value", n.IRQ)
|
||||
level.Debug(c.logger).Log("msg", "CPU IRQ counter jumped backwards", "cpu", i, "old_value", cpuStats.IRQ, "new_value", n.IRQ)
|
||||
}
|
||||
|
||||
if n.SoftIRQ >= c.cpuStats[i].SoftIRQ {
|
||||
c.cpuStats[i].SoftIRQ = n.SoftIRQ
|
||||
if n.SoftIRQ >= cpuStats.SoftIRQ {
|
||||
cpuStats.SoftIRQ = n.SoftIRQ
|
||||
} else {
|
||||
level.Debug(c.logger).Log("msg", "CPU SoftIRQ counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].SoftIRQ, "new_value", n.SoftIRQ)
|
||||
level.Debug(c.logger).Log("msg", "CPU SoftIRQ counter jumped backwards", "cpu", i, "old_value", cpuStats.SoftIRQ, "new_value", n.SoftIRQ)
|
||||
}
|
||||
|
||||
if n.Steal >= c.cpuStats[i].Steal {
|
||||
c.cpuStats[i].Steal = n.Steal
|
||||
if n.Steal >= cpuStats.Steal {
|
||||
cpuStats.Steal = n.Steal
|
||||
} else {
|
||||
level.Debug(c.logger).Log("msg", "CPU Steal counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Steal, "new_value", n.Steal)
|
||||
level.Debug(c.logger).Log("msg", "CPU Steal counter jumped backwards", "cpu", i, "old_value", cpuStats.Steal, "new_value", n.Steal)
|
||||
}
|
||||
|
||||
if n.Guest >= c.cpuStats[i].Guest {
|
||||
c.cpuStats[i].Guest = n.Guest
|
||||
if n.Guest >= cpuStats.Guest {
|
||||
cpuStats.Guest = n.Guest
|
||||
} else {
|
||||
level.Debug(c.logger).Log("msg", "CPU Guest counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Guest, "new_value", n.Guest)
|
||||
level.Debug(c.logger).Log("msg", "CPU Guest counter jumped backwards", "cpu", i, "old_value", cpuStats.Guest, "new_value", n.Guest)
|
||||
}
|
||||
|
||||
if n.GuestNice >= c.cpuStats[i].GuestNice {
|
||||
c.cpuStats[i].GuestNice = n.GuestNice
|
||||
if n.GuestNice >= cpuStats.GuestNice {
|
||||
cpuStats.GuestNice = n.GuestNice
|
||||
} else {
|
||||
level.Debug(c.logger).Log("msg", "CPU GuestNice counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].GuestNice, "new_value", n.GuestNice)
|
||||
level.Debug(c.logger).Log("msg", "CPU GuestNice counter jumped backwards", "cpu", i, "old_value", cpuStats.GuestNice, "new_value", n.GuestNice)
|
||||
}
|
||||
|
||||
c.cpuStats[i] = cpuStats
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,9 +24,16 @@ import (
|
|||
"github.com/prometheus/procfs"
|
||||
)
|
||||
|
||||
func makeTestCPUCollector(s []procfs.CPUStat) *cpuCollector {
|
||||
dup := make([]procfs.CPUStat, len(s))
|
||||
copy(dup, s)
|
||||
func copyStats(d, s map[int64]procfs.CPUStat) {
|
||||
for k := range s {
|
||||
v := s[k]
|
||||
d[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
func makeTestCPUCollector(s map[int64]procfs.CPUStat) *cpuCollector {
|
||||
dup := make(map[int64]procfs.CPUStat, len(s))
|
||||
copyStats(dup, s)
|
||||
return &cpuCollector{
|
||||
logger: log.NewNopLogger(),
|
||||
cpuStats: dup,
|
||||
|
@ -34,32 +41,34 @@ func makeTestCPUCollector(s []procfs.CPUStat) *cpuCollector {
|
|||
}
|
||||
|
||||
func TestCPU(t *testing.T) {
|
||||
firstCPUStat := []procfs.CPUStat{{
|
||||
User: 100.0,
|
||||
Nice: 100.0,
|
||||
System: 100.0,
|
||||
Idle: 100.0,
|
||||
Iowait: 100.0,
|
||||
IRQ: 100.0,
|
||||
SoftIRQ: 100.0,
|
||||
Steal: 100.0,
|
||||
Guest: 100.0,
|
||||
GuestNice: 100.0,
|
||||
}}
|
||||
firstCPUStat := map[int64]procfs.CPUStat{
|
||||
0: {
|
||||
User: 100.0,
|
||||
Nice: 100.0,
|
||||
System: 100.0,
|
||||
Idle: 100.0,
|
||||
Iowait: 100.0,
|
||||
IRQ: 100.0,
|
||||
SoftIRQ: 100.0,
|
||||
Steal: 100.0,
|
||||
Guest: 100.0,
|
||||
GuestNice: 100.0,
|
||||
}}
|
||||
|
||||
c := makeTestCPUCollector(firstCPUStat)
|
||||
want := []procfs.CPUStat{{
|
||||
User: 101.0,
|
||||
Nice: 101.0,
|
||||
System: 101.0,
|
||||
Idle: 101.0,
|
||||
Iowait: 101.0,
|
||||
IRQ: 101.0,
|
||||
SoftIRQ: 101.0,
|
||||
Steal: 101.0,
|
||||
Guest: 101.0,
|
||||
GuestNice: 101.0,
|
||||
}}
|
||||
want := map[int64]procfs.CPUStat{
|
||||
0: {
|
||||
User: 101.0,
|
||||
Nice: 101.0,
|
||||
System: 101.0,
|
||||
Idle: 101.0,
|
||||
Iowait: 101.0,
|
||||
IRQ: 101.0,
|
||||
SoftIRQ: 101.0,
|
||||
Steal: 101.0,
|
||||
Guest: 101.0,
|
||||
GuestNice: 101.0,
|
||||
}}
|
||||
c.updateCPUStats(want)
|
||||
got := c.cpuStats
|
||||
if !reflect.DeepEqual(want, got) {
|
||||
|
@ -67,18 +76,19 @@ func TestCPU(t *testing.T) {
|
|||
}
|
||||
|
||||
c = makeTestCPUCollector(firstCPUStat)
|
||||
jumpBack := []procfs.CPUStat{{
|
||||
User: 99.9,
|
||||
Nice: 99.9,
|
||||
System: 99.9,
|
||||
Idle: 99.9,
|
||||
Iowait: 99.9,
|
||||
IRQ: 99.9,
|
||||
SoftIRQ: 99.9,
|
||||
Steal: 99.9,
|
||||
Guest: 99.9,
|
||||
GuestNice: 99.9,
|
||||
}}
|
||||
jumpBack := map[int64]procfs.CPUStat{
|
||||
0: {
|
||||
User: 99.9,
|
||||
Nice: 99.9,
|
||||
System: 99.9,
|
||||
Idle: 99.9,
|
||||
Iowait: 99.9,
|
||||
IRQ: 99.9,
|
||||
SoftIRQ: 99.9,
|
||||
Steal: 99.9,
|
||||
Guest: 99.9,
|
||||
GuestNice: 99.9,
|
||||
}}
|
||||
c.updateCPUStats(jumpBack)
|
||||
got = c.cpuStats
|
||||
if reflect.DeepEqual(jumpBack, got) {
|
||||
|
@ -86,18 +96,19 @@ func TestCPU(t *testing.T) {
|
|||
}
|
||||
|
||||
c = makeTestCPUCollector(firstCPUStat)
|
||||
resetIdle := []procfs.CPUStat{{
|
||||
User: 102.0,
|
||||
Nice: 102.0,
|
||||
System: 102.0,
|
||||
Idle: 1.0,
|
||||
Iowait: 102.0,
|
||||
IRQ: 102.0,
|
||||
SoftIRQ: 102.0,
|
||||
Steal: 102.0,
|
||||
Guest: 102.0,
|
||||
GuestNice: 102.0,
|
||||
}}
|
||||
resetIdle := map[int64]procfs.CPUStat{
|
||||
0: {
|
||||
User: 102.0,
|
||||
Nice: 102.0,
|
||||
System: 102.0,
|
||||
Idle: 1.0,
|
||||
Iowait: 102.0,
|
||||
IRQ: 102.0,
|
||||
SoftIRQ: 102.0,
|
||||
Steal: 102.0,
|
||||
Guest: 102.0,
|
||||
GuestNice: 102.0,
|
||||
}}
|
||||
c.updateCPUStats(resetIdle)
|
||||
got = c.cpuStats
|
||||
if !reflect.DeepEqual(resetIdle, got) {
|
||||
|
|
4
go.mod
4
go.mod
|
@ -24,10 +24,10 @@ require (
|
|||
github.com/prometheus/client_model v0.3.0
|
||||
github.com/prometheus/common v0.37.0
|
||||
github.com/prometheus/exporter-toolkit v0.8.2
|
||||
github.com/prometheus/procfs v0.8.0
|
||||
github.com/prometheus/procfs v0.9.0
|
||||
github.com/safchain/ethtool v0.2.0
|
||||
github.com/soundcloud/go-runit v0.0.0-20150630195641-06ad41a06c4a
|
||||
golang.org/x/sys v0.2.0
|
||||
golang.org/x/sys v0.4.0
|
||||
gopkg.in/alecthomas/kingpin.v2 v2.2.6
|
||||
)
|
||||
|
||||
|
|
8
go.sum
8
go.sum
|
@ -246,8 +246,8 @@ github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsT
|
|||
github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
|
||||
github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
|
||||
github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
|
||||
github.com/prometheus/procfs v0.8.0 h1:ODq8ZFEaYeCaZOJlZZdJA2AbQR98dSHSM1KW/You5mo=
|
||||
github.com/prometheus/procfs v0.8.0/go.mod h1:z7EfXMXOkbkqb9IINtpCn86r/to3BnA0uaxHdg830/4=
|
||||
github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJfhI=
|
||||
github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY=
|
||||
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
|
||||
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
|
||||
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
|
||||
|
@ -418,8 +418,8 @@ golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBc
|
|||
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.2.0 h1:ljd4t30dBnAvMZaQCevtY0xLLD0A+bRZXbgLMLU1F/A=
|
||||
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.4.0 h1:Zr2JFtRQNX3BCZ8YtxRE9hNJYC8J6I1MVbMg6owUp18=
|
||||
golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
|
|
Loading…
Reference in New Issue