logical disk: Fix metrics for non drive letter disks (#1498)

This commit is contained in:
Jan-Otto Kröpke 2024-07-23 13:01:30 +02:00 committed by GitHub
parent 31bb6d03ee
commit e2e1141973
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 285 additions and 171 deletions

View File

@ -23,6 +23,7 @@ If given, a disk needs to *not* match the exclude regexp in order for the corres
Name | Description | Type | Labels
-----|-------------|------|-------
`windows_logical_disk_info` | A metric with a constant '1' value labeled with logical disk information | gauge | `disk`,`filesystem`,`serial_number`,`volume`,`volume_name`,`type`
`windows_logical_disk_requests_queued` | Number of requests outstanding on the disk at the time the performance data is collected | gauge | `volume`
`windows_logical_disk_avg_read_requests_queued` | Average number of read requests that were queued for the selected disk during the sample interval | gauge | `volume`
`windows_logical_disk_avg_write_requests_queued` | Average number of write requests that were queued for the selected disk during the sample interval | gauge | `volume`
@ -36,6 +37,7 @@ Name | Description | Type | Labels
`windows_logical_disk_size_bytes` | Total size of the disk in bytes (not real time, updates every 10-15 min) | gauge | `volume`
`windows_logical_disk_idle_seconds_total` | Seconds the disk was idle (not servicing read/write requests) | counter | `volume`
`windows_logical_disk_split_ios_total` | Number of I/Os to the disk split into multiple I/Os | counter | `volume`
`windows_logical_disk_readonly` | Whether the logical disk is read-only | gauge | `volume`
### Warning about size metrics
The `free_bytes` and `size_bytes` metrics are not updated in real time and might have a delay of 10-15min.
@ -47,6 +49,15 @@ Query the rate of write operations to a disk
rate(windows_logical_disk_read_bytes_total{instance="localhost", volume=~"C:"}[2m])
```
Logical Volume information
```
windows_logical_disk_info{disk_id="0",filesystem="",serial_number="",type="",volume="HarddiskVolume2",volume_name=""} 1
windows_logical_disk_info{disk_id="0",filesystem="",serial_number="",type="",volume="HarddiskVolume3",volume_name=""} 1
windows_logical_disk_info{disk_id="0",filesystem="NTFS",serial_number="668EEC37",type="fixed",volume="C:",volume_name="Windows"} 1
windows_logical_disk_info{disk_id="1",filesystem="NTFS",serial_number="50AE953B",type="fixed",volume="D:",volume_name="Temporary Storage"} 1
windows_logical_disk_info{disk_id="1",filesystem="ReFS",serial_number="C69B59AD",type="fixed",volume="G:",volume_name="Volume"} 1
```
## Useful queries
Calculate rate of total IOPS for disk
```

View File

@ -3,16 +3,18 @@
package logical_disk
import (
"errors"
"encoding/binary"
"fmt"
"golang.org/x/sys/windows"
"regexp"
"strconv"
"strings"
"github.com/alecthomas/kingpin/v2"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus-community/windows_exporter/pkg/perflib"
"github.com/prometheus-community/windows_exporter/pkg/types"
"github.com/prometheus-community/windows_exporter/pkg/wmi"
"github.com/prometheus/client_golang/prometheus"
)
@ -21,15 +23,8 @@ const (
FlagLogicalDiskVolumeExclude = "collector.logical_disk.volume-exclude"
FlagLogicalDiskVolumeInclude = "collector.logical_disk.volume-include"
win32DiskQuery = "SELECT VolumeName,DeviceID FROM WIN32_LogicalDisk"
)
type Win32_LogicalDisk struct {
VolumeName string
DeviceID string
}
type Config struct {
VolumeInclude string `yaml:"volume_include"`
VolumeExclude string `yaml:"volume_exclude"`
@ -47,6 +42,8 @@ type collector struct {
volumeInclude *string
volumeExclude *string
Information *prometheus.Desc
ReadOnly *prometheus.Desc
RequestsQueued *prometheus.Desc
AvgReadQueue *prometheus.Desc
AvgWriteQueue *prometheus.Desc
@ -68,6 +65,14 @@ type collector struct {
volumeExcludePattern *regexp.Regexp
}
type volumeInfo struct {
filesystem string
serialNumber string
label string
volumeType string
readonly float64
}
func New(logger log.Logger, config *Config) types.Collector {
if config == nil {
config = &ConfigDefaults
@ -109,115 +114,127 @@ func (c *collector) GetPerfCounter() ([]string, error) {
}
func (c *collector) Build() error {
c.Information = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "info"),
"A metric with a constant '1' value labeled with logical disk information",
[]string{"disk", "type", "volume", "volume_name", "filesystem", "serial_number"},
nil,
)
c.ReadOnly = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "readonly"),
"Whether the logical disk is read-only",
[]string{"volume"},
nil,
)
c.RequestsQueued = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "requests_queued"),
"The number of requests queued to the disk (LogicalDisk.CurrentDiskQueueLength)",
[]string{"volume", "volume_name"},
[]string{"volume"},
nil,
)
c.AvgReadQueue = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "avg_read_requests_queued"),
"Average number of read requests that were queued for the selected disk during the sample interval (LogicalDisk.AvgDiskReadQueueLength)",
[]string{"volume", "volume_name"},
[]string{"volume"},
nil,
)
c.AvgWriteQueue = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "avg_write_requests_queued"),
"Average number of write requests that were queued for the selected disk during the sample interval (LogicalDisk.AvgDiskWriteQueueLength)",
[]string{"volume", "volume_name"},
[]string{"volume"},
nil,
)
c.ReadBytesTotal = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "read_bytes_total"),
"The number of bytes transferred from the disk during read operations (LogicalDisk.DiskReadBytesPerSec)",
[]string{"volume", "volume_name"},
[]string{"volume"},
nil,
)
c.ReadsTotal = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "reads_total"),
"The number of read operations on the disk (LogicalDisk.DiskReadsPerSec)",
[]string{"volume", "volume_name"},
[]string{"volume"},
nil,
)
c.WriteBytesTotal = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "write_bytes_total"),
"The number of bytes transferred to the disk during write operations (LogicalDisk.DiskWriteBytesPerSec)",
[]string{"volume", "volume_name"},
[]string{"volume"},
nil,
)
c.WritesTotal = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "writes_total"),
"The number of write operations on the disk (LogicalDisk.DiskWritesPerSec)",
[]string{"volume", "volume_name"},
[]string{"volume"},
nil,
)
c.ReadTime = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "read_seconds_total"),
"Seconds that the disk was busy servicing read requests (LogicalDisk.PercentDiskReadTime)",
[]string{"volume", "volume_name"},
[]string{"volume"},
nil,
)
c.WriteTime = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "write_seconds_total"),
"Seconds that the disk was busy servicing write requests (LogicalDisk.PercentDiskWriteTime)",
[]string{"volume", "volume_name"},
[]string{"volume"},
nil,
)
c.FreeSpace = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "free_bytes"),
"Free space in bytes, updates every 10-15 min (LogicalDisk.PercentFreeSpace)",
[]string{"volume", "volume_name"},
[]string{"volume"},
nil,
)
c.TotalSpace = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "size_bytes"),
"Total space in bytes, updates every 10-15 min (LogicalDisk.PercentFreeSpace_Base)",
[]string{"volume", "volume_name"},
[]string{"volume"},
nil,
)
c.IdleTime = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "idle_seconds_total"),
"Seconds that the disk was idle (LogicalDisk.PercentIdleTime)",
[]string{"volume", "volume_name"},
[]string{"volume"},
nil,
)
c.SplitIOs = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "split_ios_total"),
"The number of I/Os to the disk were split into multiple I/Os (LogicalDisk.SplitIOPerSec)",
[]string{"volume", "volume_name"},
[]string{"volume"},
nil,
)
c.ReadLatency = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "read_latency_seconds_total"),
"Shows the average time, in seconds, of a read operation from the disk (LogicalDisk.AvgDiskSecPerRead)",
[]string{"volume", "volume_name"},
[]string{"volume"},
nil,
)
c.WriteLatency = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "write_latency_seconds_total"),
"Shows the average time, in seconds, of a write operation to the disk (LogicalDisk.AvgDiskSecPerWrite)",
[]string{"volume", "volume_name"},
[]string{"volume"},
nil,
)
c.ReadWriteLatency = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "read_write_latency_seconds_total"),
"Shows the time, in seconds, of the average disk transfer (LogicalDisk.AvgDiskSecPerTransfer)",
[]string{"volume", "volume_name"},
[]string{"volume"},
nil,
)
@ -250,7 +267,6 @@ func (c *collector) Collect(ctx *types.ScrapeContext, ch chan<- prometheus.Metri
// - https://msdn.microsoft.com/en-us/library/ms803973.aspx - LogicalDisk object reference
type logicalDisk struct {
Name string
VolumeName string
CurrentDiskQueueLength float64 `perflib:"Current Disk Queue Length"`
AvgDiskReadQueueLength float64 `perflib:"Avg. Disk Read Queue Length"`
AvgDiskWriteQueueLength float64 `perflib:"Avg. Disk Write Queue Length"`
@ -270,17 +286,14 @@ type logicalDisk struct {
}
func (c *collector) collect(ctx *types.ScrapeContext, ch chan<- prometheus.Metric) error {
var dst_Win32_LogicalDisk []Win32_LogicalDisk
var (
err error
diskID string
info volumeInfo
dst []logicalDisk
)
if err := wmi.Query(win32DiskQuery, &dst_Win32_LogicalDisk); err != nil {
return err
}
if len(dst_Win32_LogicalDisk) == 0 {
return errors.New("WMI query returned empty result set")
}
var dst []logicalDisk
if err := perflib.UnmarshalObject(ctx.PerfObjects["LogicalDisk"], &dst, c.logger); err != nil {
if err = perflib.UnmarshalObject(ctx.PerfObjects["LogicalDisk"], &dst, c.logger); err != nil {
return err
}
@ -290,145 +303,233 @@ func (c *collector) collect(ctx *types.ScrapeContext, ch chan<- prometheus.Metri
!c.volumeIncludePattern.MatchString(volume.Name) {
continue
}
for _, logicalDisk := range dst_Win32_LogicalDisk {
if logicalDisk.VolumeName == "" {
logicalDisk.VolumeName = "Local Disk"
}
if logicalDisk.DeviceID == volume.Name {
ch <- prometheus.MustNewConstMetric(
c.RequestsQueued,
prometheus.GaugeValue,
volume.CurrentDiskQueueLength,
volume.Name,
logicalDisk.VolumeName,
)
ch <- prometheus.MustNewConstMetric(
c.AvgReadQueue,
prometheus.GaugeValue,
volume.AvgDiskReadQueueLength*perflib.TicksToSecondScaleFactor,
volume.Name,
logicalDisk.VolumeName,
)
ch <- prometheus.MustNewConstMetric(
c.AvgWriteQueue,
prometheus.GaugeValue,
volume.AvgDiskWriteQueueLength*perflib.TicksToSecondScaleFactor,
volume.Name,
logicalDisk.VolumeName,
)
ch <- prometheus.MustNewConstMetric(
c.ReadBytesTotal,
prometheus.CounterValue,
volume.DiskReadBytesPerSec,
volume.Name,
logicalDisk.VolumeName,
)
ch <- prometheus.MustNewConstMetric(
c.ReadsTotal,
prometheus.CounterValue,
volume.DiskReadsPerSec,
volume.Name,
logicalDisk.VolumeName,
)
ch <- prometheus.MustNewConstMetric(
c.WriteBytesTotal,
prometheus.CounterValue,
volume.DiskWriteBytesPerSec,
volume.Name,
logicalDisk.VolumeName,
)
ch <- prometheus.MustNewConstMetric(
c.WritesTotal,
prometheus.CounterValue,
volume.DiskWritesPerSec,
volume.Name,
logicalDisk.VolumeName,
)
ch <- prometheus.MustNewConstMetric(
c.ReadTime,
prometheus.CounterValue,
volume.PercentDiskReadTime,
volume.Name,
logicalDisk.VolumeName,
)
ch <- prometheus.MustNewConstMetric(
c.WriteTime,
prometheus.CounterValue,
volume.PercentDiskWriteTime,
volume.Name,
logicalDisk.VolumeName,
)
ch <- prometheus.MustNewConstMetric(
c.FreeSpace,
prometheus.GaugeValue,
volume.PercentFreeSpace_Base*1024*1024,
volume.Name,
logicalDisk.VolumeName,
)
ch <- prometheus.MustNewConstMetric(
c.TotalSpace,
prometheus.GaugeValue,
volume.PercentFreeSpace*1024*1024,
volume.Name,
logicalDisk.VolumeName,
)
ch <- prometheus.MustNewConstMetric(
c.IdleTime,
prometheus.CounterValue,
volume.PercentIdleTime,
volume.Name,
logicalDisk.VolumeName,
)
ch <- prometheus.MustNewConstMetric(
c.SplitIOs,
prometheus.CounterValue,
volume.SplitIOPerSec,
volume.Name,
logicalDisk.VolumeName,
)
ch <- prometheus.MustNewConstMetric(
c.ReadLatency,
prometheus.CounterValue,
volume.AvgDiskSecPerRead*perflib.TicksToSecondScaleFactor,
volume.Name,
logicalDisk.VolumeName,
)
ch <- prometheus.MustNewConstMetric(
c.WriteLatency,
prometheus.CounterValue,
volume.AvgDiskSecPerWrite*perflib.TicksToSecondScaleFactor,
volume.Name,
logicalDisk.VolumeName,
)
ch <- prometheus.MustNewConstMetric(
c.ReadWriteLatency,
prometheus.CounterValue,
volume.AvgDiskSecPerTransfer*perflib.TicksToSecondScaleFactor,
volume.Name,
logicalDisk.VolumeName,
)
break
}
diskID, err = getDiskIDByVolume(volume.Name)
if err != nil {
_ = level.Warn(c.logger).Log("msg", "failed to get disk ID for "+volume.Name, "err", err)
}
info, err = getVolumeInfo(volume.Name)
if err != nil {
_ = level.Warn(c.logger).Log("msg", "failed to get volume information for %s"+volume.Name, "err", err)
}
ch <- prometheus.MustNewConstMetric(
c.Information,
prometheus.GaugeValue,
1,
diskID,
info.volumeType,
volume.Name,
info.label,
info.filesystem,
info.serialNumber,
)
ch <- prometheus.MustNewConstMetric(
c.RequestsQueued,
prometheus.GaugeValue,
volume.CurrentDiskQueueLength,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.AvgReadQueue,
prometheus.GaugeValue,
volume.AvgDiskReadQueueLength*perflib.TicksToSecondScaleFactor,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.AvgWriteQueue,
prometheus.GaugeValue,
volume.AvgDiskWriteQueueLength*perflib.TicksToSecondScaleFactor,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.ReadBytesTotal,
prometheus.CounterValue,
volume.DiskReadBytesPerSec,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.ReadsTotal,
prometheus.CounterValue,
volume.DiskReadsPerSec,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.WriteBytesTotal,
prometheus.CounterValue,
volume.DiskWriteBytesPerSec,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.WritesTotal,
prometheus.CounterValue,
volume.DiskWritesPerSec,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.ReadTime,
prometheus.CounterValue,
volume.PercentDiskReadTime,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.WriteTime,
prometheus.CounterValue,
volume.PercentDiskWriteTime,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.FreeSpace,
prometheus.GaugeValue,
volume.PercentFreeSpace_Base*1024*1024,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.TotalSpace,
prometheus.GaugeValue,
volume.PercentFreeSpace*1024*1024,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.IdleTime,
prometheus.CounterValue,
volume.PercentIdleTime,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.SplitIOs,
prometheus.CounterValue,
volume.SplitIOPerSec,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.ReadLatency,
prometheus.CounterValue,
volume.AvgDiskSecPerRead*perflib.TicksToSecondScaleFactor,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.WriteLatency,
prometheus.CounterValue,
volume.AvgDiskSecPerWrite*perflib.TicksToSecondScaleFactor,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.ReadWriteLatency,
prometheus.CounterValue,
volume.AvgDiskSecPerTransfer*perflib.TicksToSecondScaleFactor,
volume.Name,
)
}
return nil
}
func getDriveType(driveType uint32) string {
switch driveType {
case windows.DRIVE_UNKNOWN:
return "unknown"
case windows.DRIVE_NO_ROOT_DIR:
return "norootdir"
case windows.DRIVE_REMOVABLE:
return "removable"
case windows.DRIVE_FIXED:
return "fixed"
case windows.DRIVE_REMOTE:
return "remote"
case windows.DRIVE_CDROM:
return "cdrom"
case windows.DRIVE_RAMDISK:
return "ramdisk"
default:
return "unknown"
}
}
// getDiskIDByVolume returns the disk ID for a given volume.
func getDiskIDByVolume(rootDrive string) (string, error) {
// Open a volume handle to the Disk Root.
var err error
var f windows.Handle
// mode has to include FILE_SHARE permission to allow concurrent access to the disk.
// use 0 as access mode to avoid admin permission.
mode := uint32(windows.FILE_SHARE_READ | windows.FILE_SHARE_WRITE | windows.FILE_SHARE_DELETE)
f, err = windows.CreateFile(
windows.StringToUTF16Ptr(`\\.\`+rootDrive),
0, mode, nil, windows.OPEN_EXISTING, uint32(windows.FILE_ATTRIBUTE_READONLY), 0)
if err != nil {
return "", err
}
defer windows.Close(f)
controlCode := uint32(5636096) // IOCTL_VOLUME_GET_VOLUME_DISK_EXTENTS
volumeDiskExtents := make([]byte, 16*1024)
var bytesReturned uint32
err = windows.DeviceIoControl(f, controlCode, nil, 0, &volumeDiskExtents[0], uint32(len(volumeDiskExtents)), &bytesReturned, nil)
if err != nil {
return "", err
}
if uint(binary.LittleEndian.Uint32(volumeDiskExtents)) != 1 {
return "", fmt.Errorf("could not identify physical drive for %s", rootDrive)
}
diskId := strconv.FormatUint(uint64(binary.LittleEndian.Uint32(volumeDiskExtents[8:])), 10)
return diskId, nil
}
func getVolumeInfo(rootDrive string) (volumeInfo, error) {
if !strings.HasSuffix(rootDrive, ":") {
return volumeInfo{}, nil
}
volPath := windows.StringToUTF16Ptr(rootDrive + `\`)
volBufLabel := make([]uint16, windows.MAX_PATH+1)
volSerialNum := uint32(0)
fsFlags := uint32(0)
volBufType := make([]uint16, windows.MAX_PATH+1)
driveType := windows.GetDriveType(volPath)
err := windows.GetVolumeInformation(volPath, &volBufLabel[0], uint32(len(volBufLabel)),
&volSerialNum, nil, &fsFlags, &volBufType[0], uint32(len(volBufType)))
if err != nil {
if driveType != windows.DRIVE_CDROM && driveType != windows.DRIVE_REMOVABLE {
return volumeInfo{}, err
}
return volumeInfo{}, nil
}
return volumeInfo{
volumeType: getDriveType(driveType),
label: windows.UTF16PtrToString(&volBufLabel[0]),
filesystem: windows.UTF16PtrToString(&volBufType[0]),
serialNumber: fmt.Sprintf("%X", volSerialNum),
readonly: float64(fsFlags & windows.FILE_READ_ONLY_VOLUME),
}, nil
}

View File

@ -63,6 +63,8 @@ windows_exporter_collector_timeout{collector="textfile"} 0
# TYPE windows_logical_disk_free_bytes gauge
# HELP windows_logical_disk_idle_seconds_total Seconds that the disk was idle (LogicalDisk.PercentIdleTime)
# TYPE windows_logical_disk_idle_seconds_total counter
# HELP windows_logical_disk_info A metric with a constant '1' value labeled with logical disk information
# TYPE windows_logical_disk_info gauge
# HELP windows_logical_disk_read_bytes_total The number of bytes transferred from the disk during read operations (LogicalDisk.DiskReadBytesPerSec)
# TYPE windows_logical_disk_read_bytes_total counter
# HELP windows_logical_disk_read_latency_seconds_total Shows the average time, in seconds, of a read operation from the disk (LogicalDisk.AvgDiskSecPerRead)