[systemd] collect taskCurrent, tasksMax per systemd unit (#1098)

* [systemd] collect taskCurrent, tasksMax per systemd unit

Signed-off-by: Arno Uhlig <arno.uhlig@sap.com>
This commit is contained in:
Arno Uhlig 2018-11-14 10:50:39 +01:00 committed by Ben Kochie
parent 174b854080
commit 6edd9d217e
2 changed files with 61 additions and 0 deletions

View File

@ -43,6 +43,7 @@ Darwin meminfo metrics have been renamed to match Prometheus conventions. #1060
* [ENHANCEMENT] Handle stuck NFS mounts #997 * [ENHANCEMENT] Handle stuck NFS mounts #997
* [ENHANCEMENT] infiniband: Handle iWARP RDMA modules N/A #974 * [ENHANCEMENT] infiniband: Handle iWARP RDMA modules N/A #974
* [ENHANCEMENT] Update diskstats for linux kernel 4.19 #1109 * [ENHANCEMENT] Update diskstats for linux kernel 4.19 #1109
* [ENHANCEMENT] Collect TasksCurrent, TasksMax per systemd unit #1098
* [BUGFIX] Fix FreeBSD CPU temp #965 * [BUGFIX] Fix FreeBSD CPU temp #965
* [BUGFIX] Fix goroutine leak in supervisord collector #978 * [BUGFIX] Fix goroutine leak in supervisord collector #978

View File

@ -24,6 +24,7 @@ import (
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log" "github.com/prometheus/common/log"
"gopkg.in/alecthomas/kingpin.v2" "gopkg.in/alecthomas/kingpin.v2"
"math"
) )
var ( var (
@ -35,6 +36,8 @@ var (
type systemdCollector struct { type systemdCollector struct {
unitDesc *prometheus.Desc unitDesc *prometheus.Desc
unitStartTimeDesc *prometheus.Desc unitStartTimeDesc *prometheus.Desc
unitTasksCurrentDesc *prometheus.Desc
unitTasksMaxDesc *prometheus.Desc
systemRunningDesc *prometheus.Desc systemRunningDesc *prometheus.Desc
summaryDesc *prometheus.Desc summaryDesc *prometheus.Desc
nRestartsDesc *prometheus.Desc nRestartsDesc *prometheus.Desc
@ -64,6 +67,14 @@ func NewSystemdCollector() (Collector, error) {
prometheus.BuildFQName(namespace, subsystem, "unit_start_time_seconds"), prometheus.BuildFQName(namespace, subsystem, "unit_start_time_seconds"),
"Start time of the unit since unix epoch in seconds.", []string{"name"}, nil, "Start time of the unit since unix epoch in seconds.", []string{"name"}, nil,
) )
unitTasksCurrentDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "unit_tasks_current"),
"Current number of tasks per Systemd unit", []string{"name"}, nil,
)
unitTasksMaxDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "unit_tasks_max"),
"Maximum number of tasks per Systemd unit", []string{"name"}, nil,
)
systemRunningDesc := prometheus.NewDesc( systemRunningDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "system_running"), prometheus.BuildFQName(namespace, subsystem, "system_running"),
"Whether the system is operational (see 'systemctl is-system-running')", "Whether the system is operational (see 'systemctl is-system-running')",
@ -93,6 +104,8 @@ func NewSystemdCollector() (Collector, error) {
return &systemdCollector{ return &systemdCollector{
unitDesc: unitDesc, unitDesc: unitDesc,
unitStartTimeDesc: unitStartTimeDesc, unitStartTimeDesc: unitStartTimeDesc,
unitTasksCurrentDesc: unitTasksCurrentDesc,
unitTasksMaxDesc: unitTasksMaxDesc,
systemRunningDesc: systemRunningDesc, systemRunningDesc: systemRunningDesc,
summaryDesc: summaryDesc, summaryDesc: summaryDesc,
nRestartsDesc: nRestartsDesc, nRestartsDesc: nRestartsDesc,
@ -117,6 +130,8 @@ func (c *systemdCollector) Update(ch chan<- prometheus.Metric) error {
units := filterUnits(allUnits, c.unitWhitelistPattern, c.unitBlacklistPattern) units := filterUnits(allUnits, c.unitWhitelistPattern, c.unitBlacklistPattern)
c.collectUnitStatusMetrics(ch, units) c.collectUnitStatusMetrics(ch, units)
c.collectUnitStartTimeMetrics(ch, units) c.collectUnitStartTimeMetrics(ch, units)
c.collectUnitTasksCurrentMetrics(ch, units)
c.collectUnitTasksMaxMetrics(ch, units)
c.collectTimers(ch, units) c.collectTimers(ch, units)
c.collectSockets(ch, units) c.collectSockets(ch, units)
@ -176,6 +191,26 @@ func (c *systemdCollector) collectUnitStartTimeMetrics(ch chan<- prometheus.Metr
} }
} }
func (c *systemdCollector) collectUnitTasksCurrentMetrics(ch chan<- prometheus.Metric, units []unit) {
for _, unit := range units {
if unit.tasksCurrent != nil {
ch <- prometheus.MustNewConstMetric(
c.unitTasksCurrentDesc, prometheus.GaugeValue,
float64(*unit.tasksCurrent), unit.Name)
}
}
}
func (c *systemdCollector) collectUnitTasksMaxMetrics(ch chan<- prometheus.Metric, units []unit) {
for _, unit := range units {
if unit.tasksMax != nil {
ch <- prometheus.MustNewConstMetric(
c.unitTasksMaxDesc, prometheus.GaugeValue,
float64(*unit.tasksMax), unit.Name)
}
}
}
func (c *systemdCollector) collectTimers(ch chan<- prometheus.Metric, units []unit) { func (c *systemdCollector) collectTimers(ch chan<- prometheus.Metric, units []unit) {
for _, unit := range units { for _, unit := range units {
if !strings.HasSuffix(unit.Name, ".timer") { if !strings.HasSuffix(unit.Name, ".timer") {
@ -214,6 +249,8 @@ type unit struct {
dbus.UnitStatus dbus.UnitStatus
lastTriggerUsec uint64 lastTriggerUsec uint64
startTimeUsec uint64 startTimeUsec uint64
tasksCurrent *uint64
tasksMax *uint64
nRestarts *uint32 nRestarts *uint32
acceptedConnections uint32 acceptedConnections uint32
currentConnections uint32 currentConnections uint32
@ -258,6 +295,29 @@ func (c *systemdCollector) getAllUnits() ([]unit, error) {
nRestarts := restartsCount.Value.Value().(uint32) nRestarts := restartsCount.Value.Value().(uint32)
unit.nRestarts = &nRestarts unit.nRestarts = &nRestarts
} }
tasksCurrentCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "TasksCurrent")
if err != nil {
log.Debugf("couldn't get unit '%s' TasksCurrent: %s", unit.Name, err)
} else {
val := tasksCurrentCount.Value.Value().(uint64)
// Don't set if tasksCurrent if dbus reports MaxUint64.
if val != math.MaxUint64 {
unit.tasksCurrent = &val
}
}
tasksMaxCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "TasksMax")
if err != nil {
log.Debugf("couldn't get unit '%s' TasksMax: %s", unit.Name, err)
} else {
val := tasksMaxCount.Value.Value().(uint64)
// Don't set if tasksMax if dbus reports MaxUint64.
if val != math.MaxUint64 {
unit.tasksMax = &val
}
}
} }
if strings.HasSuffix(unit.Name, ".socket") { if strings.HasSuffix(unit.Name, ".socket") {