Collect additional common Infiniband counters (#1120)

* Collect additional common Infiniband counters

Signed-off-by: Patrick Freeman <will.pat.free@gmail.com>
This commit is contained in:
Patrick 2018-10-30 16:54:09 -04:00 committed by Ben Kochie
parent 988f049040
commit bdc0e7e678
5 changed files with 124 additions and 9 deletions

View File

@ -6,7 +6,7 @@
* [BUGFIX]
* [CHANGE]
* [ENHANCEMENT]
* [ENHANCEMENT] Add Infiniband counters #1120
* [FEATURE]
## 0.17.0-rc.0 / 2018-10-19

View File

@ -826,6 +826,14 @@ node_infiniband_multicast_packets_received_total{device="mlx4_0",port="2"} 0
# TYPE node_infiniband_multicast_packets_transmitted_total counter
node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="1"} 16
node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="2"} 0
# HELP node_infiniband_port_constraint_errors_received_total Number of packets received on the switch physical port that are discarded
# TYPE node_infiniband_port_constraint_errors_received_total counter
node_infiniband_port_constraint_errors_received_total{device="i40iw0",port="1"} 0
node_infiniband_port_constraint_errors_received_total{device="mlx4_0",port="1"} 0
# HELP node_infiniband_port_constraint_errors_transmitted_total Number of packets not transmitted from the switch physical port
# TYPE node_infiniband_port_constraint_errors_transmitted_total counter
node_infiniband_port_constraint_errors_transmitted_total{device="i40iw0",port="1"} 0
node_infiniband_port_constraint_errors_transmitted_total{device="mlx4_0",port="1"} 0
# HELP node_infiniband_port_data_received_bytes_total Number of data octets received on all links
# TYPE node_infiniband_port_data_received_bytes_total counter
node_infiniband_port_data_received_bytes_total{device="i40iw0",port="1"} 0
@ -836,6 +844,29 @@ node_infiniband_port_data_received_bytes_total{device="mlx4_0",port="2"} 0
node_infiniband_port_data_transmitted_bytes_total{device="i40iw0",port="1"} 0
node_infiniband_port_data_transmitted_bytes_total{device="mlx4_0",port="1"} 1.493376e+07
node_infiniband_port_data_transmitted_bytes_total{device="mlx4_0",port="2"} 0
# HELP node_infiniband_port_discards_received_total Number of inbound packets discarded by the port because the port is down or congested
# TYPE node_infiniband_port_discards_received_total counter
node_infiniband_port_discards_received_total{device="mlx4_0",port="1"} 0
# HELP node_infiniband_port_discards_transmitted_total Number of outbound packets discarded by the port because the port is down or congested
# TYPE node_infiniband_port_discards_transmitted_total counter
node_infiniband_port_discards_transmitted_total{device="i40iw0",port="1"} 0
node_infiniband_port_discards_transmitted_total{device="mlx4_0",port="1"} 5
# HELP node_infiniband_port_errors_received_total Number of packets containing an error that were received on this port
# TYPE node_infiniband_port_errors_received_total counter
node_infiniband_port_errors_received_total{device="i40iw0",port="1"} 0
node_infiniband_port_errors_received_total{device="mlx4_0",port="1"} 0
# HELP node_infiniband_port_packets_received_total Number of packets received on all VLs by this port (including errors)
# TYPE node_infiniband_port_packets_received_total counter
node_infiniband_port_packets_received_total{device="i40iw0",port="1"} 0
node_infiniband_port_packets_received_total{device="mlx4_0",port="1"} 6.825908347e+09
# HELP node_infiniband_port_packets_transmitted_total Number of packets transmitted on all VLs from this port (including errors)
# TYPE node_infiniband_port_packets_transmitted_total counter
node_infiniband_port_packets_transmitted_total{device="i40iw0",port="1"} 0
node_infiniband_port_packets_transmitted_total{device="mlx4_0",port="1"} 6.235865e+06
# HELP node_infiniband_port_transmit_wait_total Number of ticks during which the port had data to transmit but no data was sent during the entire tick
# TYPE node_infiniband_port_transmit_wait_total counter
node_infiniband_port_transmit_wait_total{device="i40iw0",port="1"} 0
node_infiniband_port_transmit_wait_total{device="mlx4_0",port="1"} 4.294967295e+09
# HELP node_infiniband_unicast_packets_received_total Number of unicast packets received (including errors)
# TYPE node_infiniband_unicast_packets_received_total counter
node_infiniband_unicast_packets_received_total{device="mlx4_0",port="1"} 61148

View File

@ -826,6 +826,14 @@ node_infiniband_multicast_packets_received_total{device="mlx4_0",port="2"} 0
# TYPE node_infiniband_multicast_packets_transmitted_total counter
node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="1"} 16
node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="2"} 0
# HELP node_infiniband_port_constraint_errors_received_total Number of packets received on the switch physical port that are discarded
# TYPE node_infiniband_port_constraint_errors_received_total counter
node_infiniband_port_constraint_errors_received_total{device="i40iw0",port="1"} 0
node_infiniband_port_constraint_errors_received_total{device="mlx4_0",port="1"} 0
# HELP node_infiniband_port_constraint_errors_transmitted_total Number of packets not transmitted from the switch physical port
# TYPE node_infiniband_port_constraint_errors_transmitted_total counter
node_infiniband_port_constraint_errors_transmitted_total{device="i40iw0",port="1"} 0
node_infiniband_port_constraint_errors_transmitted_total{device="mlx4_0",port="1"} 0
# HELP node_infiniband_port_data_received_bytes_total Number of data octets received on all links
# TYPE node_infiniband_port_data_received_bytes_total counter
node_infiniband_port_data_received_bytes_total{device="i40iw0",port="1"} 0
@ -836,6 +844,29 @@ node_infiniband_port_data_received_bytes_total{device="mlx4_0",port="2"} 0
node_infiniband_port_data_transmitted_bytes_total{device="i40iw0",port="1"} 0
node_infiniband_port_data_transmitted_bytes_total{device="mlx4_0",port="1"} 1.493376e+07
node_infiniband_port_data_transmitted_bytes_total{device="mlx4_0",port="2"} 0
# HELP node_infiniband_port_discards_received_total Number of inbound packets discarded by the port because the port is down or congested
# TYPE node_infiniband_port_discards_received_total counter
node_infiniband_port_discards_received_total{device="mlx4_0",port="1"} 0
# HELP node_infiniband_port_discards_transmitted_total Number of outbound packets discarded by the port because the port is down or congested
# TYPE node_infiniband_port_discards_transmitted_total counter
node_infiniband_port_discards_transmitted_total{device="i40iw0",port="1"} 0
node_infiniband_port_discards_transmitted_total{device="mlx4_0",port="1"} 5
# HELP node_infiniband_port_errors_received_total Number of packets containing an error that were received on this port
# TYPE node_infiniband_port_errors_received_total counter
node_infiniband_port_errors_received_total{device="i40iw0",port="1"} 0
node_infiniband_port_errors_received_total{device="mlx4_0",port="1"} 0
# HELP node_infiniband_port_packets_received_total Number of packets received on all VLs by this port (including errors)
# TYPE node_infiniband_port_packets_received_total counter
node_infiniband_port_packets_received_total{device="i40iw0",port="1"} 0
node_infiniband_port_packets_received_total{device="mlx4_0",port="1"} 6.825908347e+09
# HELP node_infiniband_port_packets_transmitted_total Number of packets transmitted on all VLs from this port (including errors)
# TYPE node_infiniband_port_packets_transmitted_total counter
node_infiniband_port_packets_transmitted_total{device="i40iw0",port="1"} 0
node_infiniband_port_packets_transmitted_total{device="mlx4_0",port="1"} 6.235865e+06
# HELP node_infiniband_port_transmit_wait_total Number of ticks during which the port had data to transmit but no data was sent during the entire tick
# TYPE node_infiniband_port_transmit_wait_total counter
node_infiniband_port_transmit_wait_total{device="i40iw0",port="1"} 0
node_infiniband_port_transmit_wait_total{device="mlx4_0",port="1"} 4.294967295e+09
# HELP node_infiniband_unicast_packets_received_total Number of unicast packets received (including errors)
# TYPE node_infiniband_unicast_packets_received_total counter
node_infiniband_unicast_packets_received_total{device="mlx4_0",port="1"} 61148

View File

@ -238,16 +238,61 @@ Lines: 1
16
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_rcv_constraint_errors
Lines: 1
0
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_rcv_data
Lines: 1
4631917
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_rcv_discards
Lines: 1
0
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_xmit_discards
Lines: 1
0
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_rcv_errors
Lines: 1
0
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_rcv_packets
Lines: 1
6825908347
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_xmit_constraint_errors
Lines: 1
0
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_xmit_data
Lines: 1
3733440
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_xmit_discards
Lines: 1
5
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_xmit_packets
Lines: 1
6235865
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_xmit_wait
Lines: 1
4294967295
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/infiniband/mlx4_0/ports/1/counters/unicast_rcv_packets
Lines: 1
61148

View File

@ -54,14 +54,22 @@ func NewInfiniBandCollector() (Collector, error) {
// Filenames of all InfiniBand counter metrics including a detailed description.
i.counters = map[string]infinibandMetric{
"link_downed_total": {"link_downed", "Number of times the link failed to recover from an error state and went down"},
"link_error_recovery_total": {"link_error_recovery", "Number of times the link successfully recovered from an error state"},
"multicast_packets_received_total": {"multicast_rcv_packets", "Number of multicast packets received (including errors)"},
"multicast_packets_transmitted_total": {"multicast_xmit_packets", "Number of multicast packets transmitted (including errors)"},
"port_data_received_bytes_total": {"port_rcv_data", "Number of data octets received on all links"},
"port_data_transmitted_bytes_total": {"port_xmit_data", "Number of data octets transmitted on all links"},
"unicast_packets_received_total": {"unicast_rcv_packets", "Number of unicast packets received (including errors)"},
"unicast_packets_transmitted_total": {"unicast_xmit_packets", "Number of unicast packets transmitted (including errors)"},
"link_downed_total": {"link_downed", "Number of times the link failed to recover from an error state and went down"},
"link_error_recovery_total": {"link_error_recovery", "Number of times the link successfully recovered from an error state"},
"multicast_packets_received_total": {"multicast_rcv_packets", "Number of multicast packets received (including errors)"},
"multicast_packets_transmitted_total": {"multicast_xmit_packets", "Number of multicast packets transmitted (including errors)"},
"port_constraint_errors_received_total": {"port_rcv_constraint_errors", "Number of packets received on the switch physical port that are discarded"},
"port_constraint_errors_transmitted_total": {"port_xmit_constraint_errors", "Number of packets not transmitted from the switch physical port"},
"port_data_received_bytes_total": {"port_rcv_data", "Number of data octets received on all links"},
"port_data_transmitted_bytes_total": {"port_xmit_data", "Number of data octets transmitted on all links"},
"port_discards_received_total": {"port_rcv_discards", "Number of inbound packets discarded by the port because the port is down or congested"},
"port_discards_transmitted_total": {"port_xmit_discards", "Number of outbound packets discarded by the port because the port is down or congested"},
"port_errors_received_total": {"port_rcv_errors", "Number of packets containing an error that were received on this port"},
"port_packets_received_total": {"port_rcv_packets", "Number of packets received on all VLs by this port (including errors)"},
"port_packets_transmitted_total": {"port_xmit_packets", "Number of packets transmitted on all VLs from this port (including errors)"},
"port_transmit_wait_total": {"port_xmit_wait", "Number of ticks during which the port had data to transmit but no data was sent during the entire tick"},
"unicast_packets_received_total": {"unicast_rcv_packets", "Number of unicast packets received (including errors)"},
"unicast_packets_transmitted_total": {"unicast_xmit_packets", "Number of unicast packets transmitted (including errors)"},
}
// Deprecated counters for some older versions of InfiniBand drivers.