Collect InfiniBand port state and physical state (#1357)

Collect the InfiniBand port state, the physical state, and the maximum
signal transfer rate.

Signed-off-by: Benjamin Drung <benjamin.drung@cloud.ionos.com>
This commit is contained in:
Benjamin Drung 2019-11-22 22:52:17 +01:00 committed by Ben Kochie
parent 8b7df09d01
commit 04fbcfffa1
3 changed files with 37 additions and 0 deletions

View File

@ -840,6 +840,11 @@ node_infiniband_multicast_packets_received_total{device="mlx4_0",port="2"} 0
# TYPE node_infiniband_multicast_packets_transmitted_total counter
node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="1"} 16
node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="2"} 0
# HELP node_infiniband_physical_state_id Physical state of the InfiniBand port (0: no change, 1: sleep, 2: polling, 3: disable, 4: shift, 5: link up, 6: link error recover, 7: phytest)
# TYPE node_infiniband_physical_state_id gauge
node_infiniband_physical_state_id{device="i40iw0",port="1"} 5
node_infiniband_physical_state_id{device="mlx4_0",port="1"} 5
node_infiniband_physical_state_id{device="mlx4_0",port="2"} 5
# HELP node_infiniband_port_constraint_errors_received_total Number of packets received on the switch physical port that are discarded
# TYPE node_infiniband_port_constraint_errors_received_total counter
node_infiniband_port_constraint_errors_received_total{device="mlx4_0",port="1"} 0
@ -872,6 +877,16 @@ node_infiniband_port_packets_transmitted_total{device="mlx4_0",port="1"} 6.23586
# HELP node_infiniband_port_transmit_wait_total Number of ticks during which the port had data to transmit but no data was sent during the entire tick
# TYPE node_infiniband_port_transmit_wait_total counter
node_infiniband_port_transmit_wait_total{device="mlx4_0",port="1"} 4.294967295e+09
# HELP node_infiniband_rate_bytes_per_second Maximum signal transfer rate
# TYPE node_infiniband_rate_bytes_per_second gauge
node_infiniband_rate_bytes_per_second{device="i40iw0",port="1"} 1.25e+09
node_infiniband_rate_bytes_per_second{device="mlx4_0",port="1"} 5e+09
node_infiniband_rate_bytes_per_second{device="mlx4_0",port="2"} 5e+09
# HELP node_infiniband_state_id State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer)
# TYPE node_infiniband_state_id gauge
node_infiniband_state_id{device="i40iw0",port="1"} 4
node_infiniband_state_id{device="mlx4_0",port="1"} 4
node_infiniband_state_id{device="mlx4_0",port="2"} 4
# HELP node_infiniband_unicast_packets_received_total Number of unicast packets received (including errors)
# TYPE node_infiniband_unicast_packets_received_total counter
node_infiniband_unicast_packets_received_total{device="mlx4_0",port="1"} 61148

View File

@ -840,6 +840,11 @@ node_infiniband_multicast_packets_received_total{device="mlx4_0",port="2"} 0
# TYPE node_infiniband_multicast_packets_transmitted_total counter
node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="1"} 16
node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="2"} 0
# HELP node_infiniband_physical_state_id Physical state of the InfiniBand port (0: no change, 1: sleep, 2: polling, 3: disable, 4: shift, 5: link up, 6: link error recover, 7: phytest)
# TYPE node_infiniband_physical_state_id gauge
node_infiniband_physical_state_id{device="i40iw0",port="1"} 5
node_infiniband_physical_state_id{device="mlx4_0",port="1"} 5
node_infiniband_physical_state_id{device="mlx4_0",port="2"} 5
# HELP node_infiniband_port_constraint_errors_received_total Number of packets received on the switch physical port that are discarded
# TYPE node_infiniband_port_constraint_errors_received_total counter
node_infiniband_port_constraint_errors_received_total{device="mlx4_0",port="1"} 0
@ -872,6 +877,16 @@ node_infiniband_port_packets_transmitted_total{device="mlx4_0",port="1"} 6.23586
# HELP node_infiniband_port_transmit_wait_total Number of ticks during which the port had data to transmit but no data was sent during the entire tick
# TYPE node_infiniband_port_transmit_wait_total counter
node_infiniband_port_transmit_wait_total{device="mlx4_0",port="1"} 4.294967295e+09
# HELP node_infiniband_rate_bytes_per_second Maximum signal transfer rate
# TYPE node_infiniband_rate_bytes_per_second gauge
node_infiniband_rate_bytes_per_second{device="i40iw0",port="1"} 1.25e+09
node_infiniband_rate_bytes_per_second{device="mlx4_0",port="1"} 5e+09
node_infiniband_rate_bytes_per_second{device="mlx4_0",port="2"} 5e+09
# HELP node_infiniband_state_id State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer)
# TYPE node_infiniband_state_id gauge
node_infiniband_state_id{device="i40iw0",port="1"} 4
node_infiniband_state_id{device="mlx4_0",port="1"} 4
node_infiniband_state_id{device="mlx4_0",port="2"} 4
# HELP node_infiniband_unicast_packets_received_total Number of unicast packets received (including errors)
# TYPE node_infiniband_unicast_packets_received_total counter
node_infiniband_unicast_packets_received_total{device="mlx4_0",port="1"} 61148

View File

@ -57,6 +57,7 @@ func NewInfiniBandCollector() (Collector, error) {
"link_error_recovery_total": "Number of times the link successfully recovered from an error state",
"multicast_packets_received_total": "Number of multicast packets received (including errors)",
"multicast_packets_transmitted_total": "Number of multicast packets transmitted (including errors)",
"physical_state_id": "Physical state of the InfiniBand port (0: no change, 1: sleep, 2: polling, 3: disable, 4: shift, 5: link up, 6: link error recover, 7: phytest)",
"port_constraint_errors_received_total": "Number of packets received on the switch physical port that are discarded",
"port_constraint_errors_transmitted_total": "Number of packets not transmitted from the switch physical port",
"port_data_received_bytes_total": "Number of data octets received on all links",
@ -67,6 +68,8 @@ func NewInfiniBandCollector() (Collector, error) {
"port_packets_received_total": "Number of packets received on all VLs by this port (including errors)",
"port_packets_transmitted_total": "Number of packets transmitted on all VLs from this port (including errors)",
"port_transmit_wait_total": "Number of ticks during which the port had data to transmit but no data was sent during the entire tick",
"rate_bytes_per_second": "Maximum signal transfer rate",
"state_id": "State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer)",
"unicast_packets_received_total": "Number of unicast packets received (including errors)",
"unicast_packets_transmitted_total": "Number of unicast packets transmitted (including errors)",
}
@ -105,6 +108,10 @@ func (c *infinibandCollector) Update(ch chan<- prometheus.Metric) error {
for _, port := range device.Ports {
portStr := strconv.FormatUint(uint64(port.Port), 10)
c.pushMetric(ch, "state_id", uint64(port.StateID), port.Name, portStr, prometheus.GaugeValue)
c.pushMetric(ch, "physical_state_id", uint64(port.PhysStateID), port.Name, portStr, prometheus.GaugeValue)
c.pushMetric(ch, "rate_bytes_per_second", port.Rate, port.Name, portStr, prometheus.GaugeValue)
c.pushCounter(ch, "legacy_multicast_packets_received_total", port.Counters.LegacyPortMulticastRcvPackets, port.Name, portStr)
c.pushCounter(ch, "legacy_multicast_packets_transmitted_total", port.Counters.LegacyPortMulticastXmitPackets, port.Name, portStr)
c.pushCounter(ch, "legacy_data_received_bytes_total", port.Counters.LegacyPortRcvData64, port.Name, portStr)