NRestarts or NRefused aren't available on older systemd versions (#1039)
* If NRestarts or NRefused are not available, don't ignore the unit itself * Don't report systemd metrics (NRestarts/NRefused) that are not available Signed-off-by: James Hartig <james@getadmiral.com>
This commit is contained in:
parent
fe5a117831
commit
60c827231a
|
@ -9,6 +9,7 @@ The wifi collector is disabled by default due to suspected caching issues and go
|
||||||
* https://github.com/prometheus/node_exporter/issues/1008
|
* https://github.com/prometheus/node_exporter/issues/1008
|
||||||
|
|
||||||
* [CHANGE] Filter out non-installed units when collecting all systemd units #1011
|
* [CHANGE] Filter out non-installed units when collecting all systemd units #1011
|
||||||
|
* [CHANGE] `service_restart_total` and `socket_refused_connections_total` will not be reported if you're running an older version of systemd
|
||||||
* [FEATURE] Collect NRefused property for systemd socket units (available as of systemd v239)
|
* [FEATURE] Collect NRefused property for systemd socket units (available as of systemd v239)
|
||||||
* [FEATURE] Collect NRestarts property for systemd service units
|
* [FEATURE] Collect NRestarts property for systemd service units
|
||||||
* [FEATURE] Add socket unit stats to systemd collector #968
|
* [FEATURE] Add socket unit stats to systemd collector #968
|
||||||
|
@ -16,6 +17,7 @@ The wifi collector is disabled by default due to suspected caching issues and go
|
||||||
* [ENHANCEMENT]
|
* [ENHANCEMENT]
|
||||||
|
|
||||||
* [BUGFIX] Fix goroutine leak in supervisord collector
|
* [BUGFIX] Fix goroutine leak in supervisord collector
|
||||||
|
* [BUGFIX] Systemd units will not be ignored if you're running older versions of systemd #1039
|
||||||
* [BUGFIX] Handle vanishing PIDs #1043
|
* [BUGFIX] Handle vanishing PIDs #1043
|
||||||
|
|
||||||
## 0.16.0 / 2018-05-15
|
## 0.16.0 / 2018-05-15
|
||||||
|
|
|
@ -140,10 +140,10 @@ func (c *systemdCollector) collectUnitStatusMetrics(ch chan<- prometheus.Metric,
|
||||||
c.unitDesc, prometheus.GaugeValue, isActive,
|
c.unitDesc, prometheus.GaugeValue, isActive,
|
||||||
unit.Name, stateName)
|
unit.Name, stateName)
|
||||||
}
|
}
|
||||||
if strings.HasSuffix(unit.Name, ".service") {
|
if strings.HasSuffix(unit.Name, ".service") && unit.nRestarts != nil {
|
||||||
ch <- prometheus.MustNewConstMetric(
|
ch <- prometheus.MustNewConstMetric(
|
||||||
c.nRestartsDesc, prometheus.CounterValue,
|
c.nRestartsDesc, prometheus.CounterValue,
|
||||||
float64(unit.nRestarts), unit.Name)
|
float64(*unit.nRestarts), unit.Name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -160,9 +160,11 @@ func (c *systemdCollector) collectSockets(ch chan<- prometheus.Metric, units []u
|
||||||
ch <- prometheus.MustNewConstMetric(
|
ch <- prometheus.MustNewConstMetric(
|
||||||
c.socketCurrentConnectionsDesc, prometheus.GaugeValue,
|
c.socketCurrentConnectionsDesc, prometheus.GaugeValue,
|
||||||
float64(unit.currentConnections), unit.Name)
|
float64(unit.currentConnections), unit.Name)
|
||||||
ch <- prometheus.MustNewConstMetric(
|
if unit.refusedConnections != nil {
|
||||||
c.socketRefusedConnectionsDesc, prometheus.GaugeValue,
|
ch <- prometheus.MustNewConstMetric(
|
||||||
float64(unit.refusedConnections), unit.Name)
|
c.socketRefusedConnectionsDesc, prometheus.GaugeValue,
|
||||||
|
float64(*unit.refusedConnections), unit.Name)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -212,10 +214,10 @@ type unit struct {
|
||||||
dbus.UnitStatus
|
dbus.UnitStatus
|
||||||
lastTriggerUsec uint64
|
lastTriggerUsec uint64
|
||||||
startTimeUsec uint64
|
startTimeUsec uint64
|
||||||
nRestarts uint32
|
nRestarts *uint32
|
||||||
acceptedConnections uint32
|
acceptedConnections uint32
|
||||||
currentConnections uint32
|
currentConnections uint32
|
||||||
refusedConnections uint32
|
refusedConnections *uint32
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *systemdCollector) getAllUnits() ([]unit, error) {
|
func (c *systemdCollector) getAllUnits() ([]unit, error) {
|
||||||
|
@ -241,40 +243,47 @@ func (c *systemdCollector) getAllUnits() ([]unit, error) {
|
||||||
if strings.HasSuffix(unit.Name, ".timer") {
|
if strings.HasSuffix(unit.Name, ".timer") {
|
||||||
lastTriggerValue, err := conn.GetUnitTypeProperty(unit.Name, "Timer", "LastTriggerUSec")
|
lastTriggerValue, err := conn.GetUnitTypeProperty(unit.Name, "Timer", "LastTriggerUSec")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("couldn't get unit '%s' LastTriggerUSec: %s", unit.Name, err)
|
log.Debugf("couldn't get unit '%s' LastTriggerUSec: %s\n", unit.Name, err)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
unit.lastTriggerUsec = lastTriggerValue.Value.Value().(uint64)
|
unit.lastTriggerUsec = lastTriggerValue.Value.Value().(uint64)
|
||||||
}
|
}
|
||||||
if strings.HasSuffix(unit.Name, ".service") {
|
if strings.HasSuffix(unit.Name, ".service") {
|
||||||
nRestarts, err := conn.GetUnitTypeProperty(unit.Name, "Service", "NRestarts")
|
// NRestarts wasn't added until systemd 235.
|
||||||
|
restartsCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "NRestarts")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debugf("couldn't get unit '%s' NRestarts: %s\n", unit.Name, err)
|
log.Debugf("couldn't get unit '%s' NRestarts: %s\n", unit.Name, err)
|
||||||
continue
|
} else {
|
||||||
|
nRestarts := restartsCount.Value.Value().(uint32)
|
||||||
|
unit.nRestarts = &nRestarts
|
||||||
}
|
}
|
||||||
unit.nRestarts = nRestarts.Value.Value().(uint32)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if strings.HasSuffix(unit.Name, ".socket") {
|
if strings.HasSuffix(unit.Name, ".socket") {
|
||||||
acceptedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NAccepted")
|
acceptedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NAccepted")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("couldn't get unit '%s' NAccepted: %s", unit.Name, err)
|
log.Debugf("couldn't get unit '%s' NAccepted: %s\n", unit.Name, err)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
unit.acceptedConnections = acceptedConnectionCount.Value.Value().(uint32)
|
unit.acceptedConnections = acceptedConnectionCount.Value.Value().(uint32)
|
||||||
|
|
||||||
currentConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NConnections")
|
currentConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NConnections")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("couldn't get unit '%s' NConnections: %s", unit.Name, err)
|
log.Debugf("couldn't get unit '%s' NConnections: %s\n", unit.Name, err)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
unit.currentConnections = currentConnectionCount.Value.Value().(uint32)
|
unit.currentConnections = currentConnectionCount.Value.Value().(uint32)
|
||||||
|
|
||||||
|
// NRefused wasn't added until systemd 239.
|
||||||
refusedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NRefused")
|
refusedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NRefused")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debugf("couldn't get unit '%s' NRefused: %s\n", unit.Name, err)
|
log.Debugf("couldn't get unit '%s' NRefused: %s\n", unit.Name, err)
|
||||||
continue
|
} else {
|
||||||
|
nRefused := refusedConnectionCount.Value.Value().(uint32)
|
||||||
|
unit.refusedConnections = &nRefused
|
||||||
}
|
}
|
||||||
unit.refusedConnections = refusedConnectionCount.Value.Value().(uint32)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if unit.ActiveState != "active" {
|
if unit.ActiveState != "active" {
|
||||||
|
@ -282,7 +291,8 @@ func (c *systemdCollector) getAllUnits() ([]unit, error) {
|
||||||
} else {
|
} else {
|
||||||
timestampValue, err := conn.GetUnitProperty(unit.Name, "ActiveEnterTimestamp")
|
timestampValue, err := conn.GetUnitProperty(unit.Name, "ActiveEnterTimestamp")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("couldn't get unit '%s' StartTimeUsec: %s", unit.Name, err)
|
log.Debugf("couldn't get unit '%s' StartTimeUsec: %s\n", unit.Name, err)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
unit.startTimeUsec = timestampValue.Value.Value().(uint64)
|
unit.startTimeUsec = timestampValue.Value.Value().(uint64)
|
||||||
|
|
Loading…
Reference in New Issue