Collect InfiniBand port state and physical state (#1357)
Collect the InfiniBand port state, the physical state, and the maximum signal transfer rate. Signed-off-by: Benjamin Drung <benjamin.drung@cloud.ionos.com>
This commit is contained in:
parent
8b7df09d01
commit
04fbcfffa1
|
@ -840,6 +840,11 @@ node_infiniband_multicast_packets_received_total{device="mlx4_0",port="2"} 0
|
|||
# TYPE node_infiniband_multicast_packets_transmitted_total counter
|
||||
node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="1"} 16
|
||||
node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="2"} 0
|
||||
# HELP node_infiniband_physical_state_id Physical state of the InfiniBand port (0: no change, 1: sleep, 2: polling, 3: disable, 4: shift, 5: link up, 6: link error recover, 7: phytest)
|
||||
# TYPE node_infiniband_physical_state_id gauge
|
||||
node_infiniband_physical_state_id{device="i40iw0",port="1"} 5
|
||||
node_infiniband_physical_state_id{device="mlx4_0",port="1"} 5
|
||||
node_infiniband_physical_state_id{device="mlx4_0",port="2"} 5
|
||||
# HELP node_infiniband_port_constraint_errors_received_total Number of packets received on the switch physical port that are discarded
|
||||
# TYPE node_infiniband_port_constraint_errors_received_total counter
|
||||
node_infiniband_port_constraint_errors_received_total{device="mlx4_0",port="1"} 0
|
||||
|
@ -872,6 +877,16 @@ node_infiniband_port_packets_transmitted_total{device="mlx4_0",port="1"} 6.23586
|
|||
# HELP node_infiniband_port_transmit_wait_total Number of ticks during which the port had data to transmit but no data was sent during the entire tick
|
||||
# TYPE node_infiniband_port_transmit_wait_total counter
|
||||
node_infiniband_port_transmit_wait_total{device="mlx4_0",port="1"} 4.294967295e+09
|
||||
# HELP node_infiniband_rate_bytes_per_second Maximum signal transfer rate
|
||||
# TYPE node_infiniband_rate_bytes_per_second gauge
|
||||
node_infiniband_rate_bytes_per_second{device="i40iw0",port="1"} 1.25e+09
|
||||
node_infiniband_rate_bytes_per_second{device="mlx4_0",port="1"} 5e+09
|
||||
node_infiniband_rate_bytes_per_second{device="mlx4_0",port="2"} 5e+09
|
||||
# HELP node_infiniband_state_id State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer)
|
||||
# TYPE node_infiniband_state_id gauge
|
||||
node_infiniband_state_id{device="i40iw0",port="1"} 4
|
||||
node_infiniband_state_id{device="mlx4_0",port="1"} 4
|
||||
node_infiniband_state_id{device="mlx4_0",port="2"} 4
|
||||
# HELP node_infiniband_unicast_packets_received_total Number of unicast packets received (including errors)
|
||||
# TYPE node_infiniband_unicast_packets_received_total counter
|
||||
node_infiniband_unicast_packets_received_total{device="mlx4_0",port="1"} 61148
|
||||
|
|
|
@ -840,6 +840,11 @@ node_infiniband_multicast_packets_received_total{device="mlx4_0",port="2"} 0
|
|||
# TYPE node_infiniband_multicast_packets_transmitted_total counter
|
||||
node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="1"} 16
|
||||
node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="2"} 0
|
||||
# HELP node_infiniband_physical_state_id Physical state of the InfiniBand port (0: no change, 1: sleep, 2: polling, 3: disable, 4: shift, 5: link up, 6: link error recover, 7: phytest)
|
||||
# TYPE node_infiniband_physical_state_id gauge
|
||||
node_infiniband_physical_state_id{device="i40iw0",port="1"} 5
|
||||
node_infiniband_physical_state_id{device="mlx4_0",port="1"} 5
|
||||
node_infiniband_physical_state_id{device="mlx4_0",port="2"} 5
|
||||
# HELP node_infiniband_port_constraint_errors_received_total Number of packets received on the switch physical port that are discarded
|
||||
# TYPE node_infiniband_port_constraint_errors_received_total counter
|
||||
node_infiniband_port_constraint_errors_received_total{device="mlx4_0",port="1"} 0
|
||||
|
@ -872,6 +877,16 @@ node_infiniband_port_packets_transmitted_total{device="mlx4_0",port="1"} 6.23586
|
|||
# HELP node_infiniband_port_transmit_wait_total Number of ticks during which the port had data to transmit but no data was sent during the entire tick
|
||||
# TYPE node_infiniband_port_transmit_wait_total counter
|
||||
node_infiniband_port_transmit_wait_total{device="mlx4_0",port="1"} 4.294967295e+09
|
||||
# HELP node_infiniband_rate_bytes_per_second Maximum signal transfer rate
|
||||
# TYPE node_infiniband_rate_bytes_per_second gauge
|
||||
node_infiniband_rate_bytes_per_second{device="i40iw0",port="1"} 1.25e+09
|
||||
node_infiniband_rate_bytes_per_second{device="mlx4_0",port="1"} 5e+09
|
||||
node_infiniband_rate_bytes_per_second{device="mlx4_0",port="2"} 5e+09
|
||||
# HELP node_infiniband_state_id State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer)
|
||||
# TYPE node_infiniband_state_id gauge
|
||||
node_infiniband_state_id{device="i40iw0",port="1"} 4
|
||||
node_infiniband_state_id{device="mlx4_0",port="1"} 4
|
||||
node_infiniband_state_id{device="mlx4_0",port="2"} 4
|
||||
# HELP node_infiniband_unicast_packets_received_total Number of unicast packets received (including errors)
|
||||
# TYPE node_infiniband_unicast_packets_received_total counter
|
||||
node_infiniband_unicast_packets_received_total{device="mlx4_0",port="1"} 61148
|
||||
|
|
|
@ -57,6 +57,7 @@ func NewInfiniBandCollector() (Collector, error) {
|
|||
"link_error_recovery_total": "Number of times the link successfully recovered from an error state",
|
||||
"multicast_packets_received_total": "Number of multicast packets received (including errors)",
|
||||
"multicast_packets_transmitted_total": "Number of multicast packets transmitted (including errors)",
|
||||
"physical_state_id": "Physical state of the InfiniBand port (0: no change, 1: sleep, 2: polling, 3: disable, 4: shift, 5: link up, 6: link error recover, 7: phytest)",
|
||||
"port_constraint_errors_received_total": "Number of packets received on the switch physical port that are discarded",
|
||||
"port_constraint_errors_transmitted_total": "Number of packets not transmitted from the switch physical port",
|
||||
"port_data_received_bytes_total": "Number of data octets received on all links",
|
||||
|
@ -67,6 +68,8 @@ func NewInfiniBandCollector() (Collector, error) {
|
|||
"port_packets_received_total": "Number of packets received on all VLs by this port (including errors)",
|
||||
"port_packets_transmitted_total": "Number of packets transmitted on all VLs from this port (including errors)",
|
||||
"port_transmit_wait_total": "Number of ticks during which the port had data to transmit but no data was sent during the entire tick",
|
||||
"rate_bytes_per_second": "Maximum signal transfer rate",
|
||||
"state_id": "State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer)",
|
||||
"unicast_packets_received_total": "Number of unicast packets received (including errors)",
|
||||
"unicast_packets_transmitted_total": "Number of unicast packets transmitted (including errors)",
|
||||
}
|
||||
|
@ -105,6 +108,10 @@ func (c *infinibandCollector) Update(ch chan<- prometheus.Metric) error {
|
|||
for _, port := range device.Ports {
|
||||
portStr := strconv.FormatUint(uint64(port.Port), 10)
|
||||
|
||||
c.pushMetric(ch, "state_id", uint64(port.StateID), port.Name, portStr, prometheus.GaugeValue)
|
||||
c.pushMetric(ch, "physical_state_id", uint64(port.PhysStateID), port.Name, portStr, prometheus.GaugeValue)
|
||||
c.pushMetric(ch, "rate_bytes_per_second", port.Rate, port.Name, portStr, prometheus.GaugeValue)
|
||||
|
||||
c.pushCounter(ch, "legacy_multicast_packets_received_total", port.Counters.LegacyPortMulticastRcvPackets, port.Name, portStr)
|
||||
c.pushCounter(ch, "legacy_multicast_packets_transmitted_total", port.Counters.LegacyPortMulticastXmitPackets, port.Name, portStr)
|
||||
c.pushCounter(ch, "legacy_data_received_bytes_total", port.Counters.LegacyPortRcvData64, port.Name, portStr)
|
||||
|
|
Loading…
Reference in New Issue