[systemd] collect taskCurrent, tasksMax per systemd unit (#1098)
* [systemd] collect taskCurrent, tasksMax per systemd unit Signed-off-by: Arno Uhlig <arno.uhlig@sap.com>
This commit is contained in:
parent
174b854080
commit
6edd9d217e
|
@ -43,6 +43,7 @@ Darwin meminfo metrics have been renamed to match Prometheus conventions. #1060
|
||||||
* [ENHANCEMENT] Handle stuck NFS mounts #997
|
* [ENHANCEMENT] Handle stuck NFS mounts #997
|
||||||
* [ENHANCEMENT] infiniband: Handle iWARP RDMA modules N/A #974
|
* [ENHANCEMENT] infiniband: Handle iWARP RDMA modules N/A #974
|
||||||
* [ENHANCEMENT] Update diskstats for linux kernel 4.19 #1109
|
* [ENHANCEMENT] Update diskstats for linux kernel 4.19 #1109
|
||||||
|
* [ENHANCEMENT] Collect TasksCurrent, TasksMax per systemd unit #1098
|
||||||
|
|
||||||
* [BUGFIX] Fix FreeBSD CPU temp #965
|
* [BUGFIX] Fix FreeBSD CPU temp #965
|
||||||
* [BUGFIX] Fix goroutine leak in supervisord collector #978
|
* [BUGFIX] Fix goroutine leak in supervisord collector #978
|
||||||
|
|
|
@ -24,6 +24,7 @@ import (
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/prometheus/common/log"
|
"github.com/prometheus/common/log"
|
||||||
"gopkg.in/alecthomas/kingpin.v2"
|
"gopkg.in/alecthomas/kingpin.v2"
|
||||||
|
"math"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -35,6 +36,8 @@ var (
|
||||||
type systemdCollector struct {
|
type systemdCollector struct {
|
||||||
unitDesc *prometheus.Desc
|
unitDesc *prometheus.Desc
|
||||||
unitStartTimeDesc *prometheus.Desc
|
unitStartTimeDesc *prometheus.Desc
|
||||||
|
unitTasksCurrentDesc *prometheus.Desc
|
||||||
|
unitTasksMaxDesc *prometheus.Desc
|
||||||
systemRunningDesc *prometheus.Desc
|
systemRunningDesc *prometheus.Desc
|
||||||
summaryDesc *prometheus.Desc
|
summaryDesc *prometheus.Desc
|
||||||
nRestartsDesc *prometheus.Desc
|
nRestartsDesc *prometheus.Desc
|
||||||
|
@ -64,6 +67,14 @@ func NewSystemdCollector() (Collector, error) {
|
||||||
prometheus.BuildFQName(namespace, subsystem, "unit_start_time_seconds"),
|
prometheus.BuildFQName(namespace, subsystem, "unit_start_time_seconds"),
|
||||||
"Start time of the unit since unix epoch in seconds.", []string{"name"}, nil,
|
"Start time of the unit since unix epoch in seconds.", []string{"name"}, nil,
|
||||||
)
|
)
|
||||||
|
unitTasksCurrentDesc := prometheus.NewDesc(
|
||||||
|
prometheus.BuildFQName(namespace, subsystem, "unit_tasks_current"),
|
||||||
|
"Current number of tasks per Systemd unit", []string{"name"}, nil,
|
||||||
|
)
|
||||||
|
unitTasksMaxDesc := prometheus.NewDesc(
|
||||||
|
prometheus.BuildFQName(namespace, subsystem, "unit_tasks_max"),
|
||||||
|
"Maximum number of tasks per Systemd unit", []string{"name"}, nil,
|
||||||
|
)
|
||||||
systemRunningDesc := prometheus.NewDesc(
|
systemRunningDesc := prometheus.NewDesc(
|
||||||
prometheus.BuildFQName(namespace, subsystem, "system_running"),
|
prometheus.BuildFQName(namespace, subsystem, "system_running"),
|
||||||
"Whether the system is operational (see 'systemctl is-system-running')",
|
"Whether the system is operational (see 'systemctl is-system-running')",
|
||||||
|
@ -93,6 +104,8 @@ func NewSystemdCollector() (Collector, error) {
|
||||||
return &systemdCollector{
|
return &systemdCollector{
|
||||||
unitDesc: unitDesc,
|
unitDesc: unitDesc,
|
||||||
unitStartTimeDesc: unitStartTimeDesc,
|
unitStartTimeDesc: unitStartTimeDesc,
|
||||||
|
unitTasksCurrentDesc: unitTasksCurrentDesc,
|
||||||
|
unitTasksMaxDesc: unitTasksMaxDesc,
|
||||||
systemRunningDesc: systemRunningDesc,
|
systemRunningDesc: systemRunningDesc,
|
||||||
summaryDesc: summaryDesc,
|
summaryDesc: summaryDesc,
|
||||||
nRestartsDesc: nRestartsDesc,
|
nRestartsDesc: nRestartsDesc,
|
||||||
|
@ -117,6 +130,8 @@ func (c *systemdCollector) Update(ch chan<- prometheus.Metric) error {
|
||||||
units := filterUnits(allUnits, c.unitWhitelistPattern, c.unitBlacklistPattern)
|
units := filterUnits(allUnits, c.unitWhitelistPattern, c.unitBlacklistPattern)
|
||||||
c.collectUnitStatusMetrics(ch, units)
|
c.collectUnitStatusMetrics(ch, units)
|
||||||
c.collectUnitStartTimeMetrics(ch, units)
|
c.collectUnitStartTimeMetrics(ch, units)
|
||||||
|
c.collectUnitTasksCurrentMetrics(ch, units)
|
||||||
|
c.collectUnitTasksMaxMetrics(ch, units)
|
||||||
c.collectTimers(ch, units)
|
c.collectTimers(ch, units)
|
||||||
c.collectSockets(ch, units)
|
c.collectSockets(ch, units)
|
||||||
|
|
||||||
|
@ -176,6 +191,26 @@ func (c *systemdCollector) collectUnitStartTimeMetrics(ch chan<- prometheus.Metr
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *systemdCollector) collectUnitTasksCurrentMetrics(ch chan<- prometheus.Metric, units []unit) {
|
||||||
|
for _, unit := range units {
|
||||||
|
if unit.tasksCurrent != nil {
|
||||||
|
ch <- prometheus.MustNewConstMetric(
|
||||||
|
c.unitTasksCurrentDesc, prometheus.GaugeValue,
|
||||||
|
float64(*unit.tasksCurrent), unit.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *systemdCollector) collectUnitTasksMaxMetrics(ch chan<- prometheus.Metric, units []unit) {
|
||||||
|
for _, unit := range units {
|
||||||
|
if unit.tasksMax != nil {
|
||||||
|
ch <- prometheus.MustNewConstMetric(
|
||||||
|
c.unitTasksMaxDesc, prometheus.GaugeValue,
|
||||||
|
float64(*unit.tasksMax), unit.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (c *systemdCollector) collectTimers(ch chan<- prometheus.Metric, units []unit) {
|
func (c *systemdCollector) collectTimers(ch chan<- prometheus.Metric, units []unit) {
|
||||||
for _, unit := range units {
|
for _, unit := range units {
|
||||||
if !strings.HasSuffix(unit.Name, ".timer") {
|
if !strings.HasSuffix(unit.Name, ".timer") {
|
||||||
|
@ -214,6 +249,8 @@ type unit struct {
|
||||||
dbus.UnitStatus
|
dbus.UnitStatus
|
||||||
lastTriggerUsec uint64
|
lastTriggerUsec uint64
|
||||||
startTimeUsec uint64
|
startTimeUsec uint64
|
||||||
|
tasksCurrent *uint64
|
||||||
|
tasksMax *uint64
|
||||||
nRestarts *uint32
|
nRestarts *uint32
|
||||||
acceptedConnections uint32
|
acceptedConnections uint32
|
||||||
currentConnections uint32
|
currentConnections uint32
|
||||||
|
@ -258,6 +295,29 @@ func (c *systemdCollector) getAllUnits() ([]unit, error) {
|
||||||
nRestarts := restartsCount.Value.Value().(uint32)
|
nRestarts := restartsCount.Value.Value().(uint32)
|
||||||
unit.nRestarts = &nRestarts
|
unit.nRestarts = &nRestarts
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tasksCurrentCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "TasksCurrent")
|
||||||
|
if err != nil {
|
||||||
|
log.Debugf("couldn't get unit '%s' TasksCurrent: %s", unit.Name, err)
|
||||||
|
} else {
|
||||||
|
val := tasksCurrentCount.Value.Value().(uint64)
|
||||||
|
// Don't set if tasksCurrent if dbus reports MaxUint64.
|
||||||
|
if val != math.MaxUint64 {
|
||||||
|
unit.tasksCurrent = &val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tasksMaxCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "TasksMax")
|
||||||
|
if err != nil {
|
||||||
|
log.Debugf("couldn't get unit '%s' TasksMax: %s", unit.Name, err)
|
||||||
|
} else {
|
||||||
|
val := tasksMaxCount.Value.Value().(uint64)
|
||||||
|
// Don't set if tasksMax if dbus reports MaxUint64.
|
||||||
|
if val != math.MaxUint64 {
|
||||||
|
unit.tasksMax = &val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if strings.HasSuffix(unit.Name, ".socket") {
|
if strings.HasSuffix(unit.Name, ".socket") {
|
||||||
|
|
Loading…
Reference in New Issue