ceph_exporter/ceph/pool_usage.go

223 lines
8.0 KiB
Go
Raw Normal View History

2022-03-23 21:02:21 +00:00
// Copyright 2022 DigitalOcean
2016-01-06 18:24:20 +00:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2022-02-23 23:43:46 +00:00
package ceph
2016-01-06 18:24:20 +00:00
import (
"encoding/json"
2022-10-06 15:52:45 +00:00
"fmt"
"math"
2016-01-06 18:24:20 +00:00
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
2016-01-06 18:24:20 +00:00
)
2020-06-26 19:19:59 +00:00
// PoolUsageCollector displays statistics about each pool in the Ceph cluster.
2016-01-06 18:24:20 +00:00
type PoolUsageCollector struct {
2022-02-23 23:43:46 +00:00
conn Conn
logger *logrus.Logger
version *Version
2016-01-06 18:24:20 +00:00
// UsedBytes tracks the amount of bytes currently allocated for the pool. This
// does not factor in the overcommitment made for individual images.
2022-10-06 15:52:45 +00:00
UsedBytes *prometheus.Desc
2016-01-06 18:24:20 +00:00
// RawUsedBytes tracks the amount of raw bytes currently used for the pool. This
// factors in the replication factor (size) of the pool.
2022-10-06 15:52:45 +00:00
RawUsedBytes *prometheus.Desc
// MaxAvail tracks the amount of bytes currently free for the pool,
// which depends on the replication settings for the pool in question.
2022-10-06 15:52:45 +00:00
MaxAvail *prometheus.Desc
2020-06-26 19:19:59 +00:00
// PercentUsed is the percentage of raw space available to the pool currently in use
2022-10-06 15:52:45 +00:00
PercentUsed *prometheus.Desc
2020-06-26 19:19:59 +00:00
2016-01-06 18:24:20 +00:00
// Objects shows the no. of RADOS objects created within the pool.
2022-10-06 15:52:45 +00:00
Objects *prometheus.Desc
2016-01-06 18:24:20 +00:00
2016-11-01 00:46:43 +00:00
// DirtyObjects shows the no. of RADOS dirty objects in a cache-tier pool,
// this doesn't make sense in a regular pool, see:
// http://lists.ceph.com/pipermail/ceph-users-ceph.com/2015-April/000557.html
2022-10-06 15:52:45 +00:00
DirtyObjects *prometheus.Desc
// UnfoundObjects shows the no. of RADOS unfound object within each pool.
2022-10-06 15:52:45 +00:00
UnfoundObjects *prometheus.Desc
2016-01-06 18:24:20 +00:00
// ReadIO tracks the read IO calls made for the images within each pool.
2022-10-06 15:52:45 +00:00
ReadIO *prometheus.Desc
2016-01-06 18:24:20 +00:00
// Readbytes tracks the read throughput made for the images within each pool.
2022-10-06 15:52:45 +00:00
ReadBytes *prometheus.Desc
2016-01-06 18:24:20 +00:00
// WriteIO tracks the write IO calls made for the images within each pool.
2022-10-06 15:52:45 +00:00
WriteIO *prometheus.Desc
// WriteBytes tracks the write throughput made for the images within each pool.
2022-10-06 15:52:45 +00:00
WriteBytes *prometheus.Desc
2016-01-06 18:24:20 +00:00
}
// NewPoolUsageCollector creates a new instance of PoolUsageCollector and returns
// its reference.
2022-02-23 23:43:46 +00:00
func NewPoolUsageCollector(exporter *Exporter) *PoolUsageCollector {
var (
subSystem = "pool"
poolLabel = []string{"pool"}
)
labels := make(prometheus.Labels)
2022-02-23 23:43:46 +00:00
labels["cluster"] = exporter.Cluster
2016-01-06 18:24:20 +00:00
return &PoolUsageCollector{
2022-02-23 23:43:46 +00:00
conn: exporter.Conn,
logger: exporter.Logger,
version: exporter.Version,
2016-01-06 18:24:20 +00:00
2022-10-06 15:52:45 +00:00
UsedBytes: prometheus.NewDesc(fmt.Sprintf("%s_%s_used_bytes", cephNamespace, subSystem), "Capacity of the pool that is currently under use",
poolLabel, labels,
2016-01-06 18:24:20 +00:00
),
2022-10-06 15:52:45 +00:00
RawUsedBytes: prometheus.NewDesc(fmt.Sprintf("%s_%s_raw_used_bytes", cephNamespace, subSystem), "Raw capacity of the pool that is currently under use, this factors in the size",
poolLabel, labels,
),
2022-10-06 15:52:45 +00:00
MaxAvail: prometheus.NewDesc(fmt.Sprintf("%s_%s_available_bytes", cephNamespace, subSystem), "Free space for the pool",
poolLabel, labels,
2020-06-26 19:19:59 +00:00
),
2022-10-06 15:52:45 +00:00
PercentUsed: prometheus.NewDesc(fmt.Sprintf("%s_%s_percent_used", cephNamespace, subSystem), "Percentage of the capacity available to this pool that is used by this pool",
poolLabel, labels,
),
2022-10-06 15:52:45 +00:00
Objects: prometheus.NewDesc(fmt.Sprintf("%s_%s_objects_total", cephNamespace, subSystem), "Total no. of objects allocated within the pool",
poolLabel, labels,
2016-01-06 18:24:20 +00:00
),
2022-10-06 15:52:45 +00:00
DirtyObjects: prometheus.NewDesc(fmt.Sprintf("%s_%s_dirty_objects_total", cephNamespace, subSystem), "Total no. of dirty objects in a cache-tier pool",
poolLabel, labels,
),
2022-10-06 15:52:45 +00:00
UnfoundObjects: prometheus.NewDesc(fmt.Sprintf("%s_%s_unfound_objects_total", cephNamespace, subSystem), "Total no. of unfound objects for the pool",
poolLabel, labels,
),
2022-10-06 15:52:45 +00:00
ReadIO: prometheus.NewDesc(fmt.Sprintf("%s_%s_read_total", cephNamespace, subSystem), "Total read I/O calls for the pool",
poolLabel, labels,
2016-01-06 18:24:20 +00:00
),
2022-10-06 15:52:45 +00:00
ReadBytes: prometheus.NewDesc(fmt.Sprintf("%s_%s_read_bytes_total", cephNamespace, subSystem), "Total read throughput for the pool",
poolLabel, labels,
),
2022-10-06 15:52:45 +00:00
WriteIO: prometheus.NewDesc(fmt.Sprintf("%s_%s_write_total", cephNamespace, subSystem), "Total write I/O calls for the pool",
poolLabel, labels,
2016-01-06 18:24:20 +00:00
),
2022-10-06 15:52:45 +00:00
WriteBytes: prometheus.NewDesc(fmt.Sprintf("%s_%s_write_bytes_total", cephNamespace, subSystem), "Total write throughput for the pool",
poolLabel, labels,
),
2016-01-06 18:24:20 +00:00
}
}
type cephPoolStats struct {
Pools []struct {
Name string `json:"name"`
ID int `json:"id"`
Stats struct {
BytesUsed float64 `json:"bytes_used"`
2020-06-05 19:37:45 +00:00
StoredRaw float64 `json:"stored_raw"`
Stored float64 `json:"stored"`
MaxAvail float64 `json:"max_avail"`
2020-06-26 19:19:59 +00:00
PercentUsed float64 `json:"percent_used"`
Objects float64 `json:"objects"`
DirtyObjects float64 `json:"dirty"`
ReadIO float64 `json:"rd"`
ReadBytes float64 `json:"rd_bytes"`
WriteIO float64 `json:"wr"`
WriteBytes float64 `json:"wr_bytes"`
2016-01-06 18:24:20 +00:00
} `json:"stats"`
} `json:"pools"`
}
2022-10-06 15:52:45 +00:00
func (p *PoolUsageCollector) collect(ch chan<- prometheus.Metric) error {
2016-01-06 18:24:20 +00:00
cmd := p.cephUsageCommand()
buf, _, err := p.conn.MonCommand(cmd)
if err != nil {
p.logger.WithError(err).WithField(
"args", string(cmd),
).Error("error executing mon command")
2016-01-06 18:24:20 +00:00
return err
}
stats := &cephPoolStats{}
if err := json.Unmarshal(buf, stats); err != nil {
return err
}
for _, pool := range stats.Pools {
2022-10-06 15:52:45 +00:00
ch <- prometheus.MustNewConstMetric(p.UsedBytes, prometheus.GaugeValue, pool.Stats.Stored, pool.Name)
ch <- prometheus.MustNewConstMetric(p.RawUsedBytes, prometheus.GaugeValue, math.Max(pool.Stats.StoredRaw, pool.Stats.BytesUsed), pool.Name)
ch <- prometheus.MustNewConstMetric(p.MaxAvail, prometheus.GaugeValue, pool.Stats.MaxAvail, pool.Name)
ch <- prometheus.MustNewConstMetric(p.PercentUsed, prometheus.GaugeValue, pool.Stats.PercentUsed, pool.Name)
ch <- prometheus.MustNewConstMetric(p.Objects, prometheus.GaugeValue, pool.Stats.Objects, pool.Name)
ch <- prometheus.MustNewConstMetric(p.DirtyObjects, prometheus.GaugeValue, pool.Stats.DirtyObjects, pool.Name)
ch <- prometheus.MustNewConstMetric(p.ReadIO, prometheus.GaugeValue, pool.Stats.ReadIO, pool.Name)
ch <- prometheus.MustNewConstMetric(p.ReadBytes, prometheus.GaugeValue, pool.Stats.ReadBytes, pool.Name)
ch <- prometheus.MustNewConstMetric(p.WriteIO, prometheus.GaugeValue, pool.Stats.WriteIO, pool.Name)
ch <- prometheus.MustNewConstMetric(p.WriteBytes, prometheus.GaugeValue, pool.Stats.WriteBytes, pool.Name)
st, err := p.conn.GetPoolStats(pool.Name)
if err != nil {
p.logger.WithError(err).WithField(
"pool", pool.Name,
).Error("error getting pool stats")
continue
}
2022-10-06 15:52:45 +00:00
ch <- prometheus.MustNewConstMetric(p.UnfoundObjects, prometheus.GaugeValue, float64(st.ObjectsUnfound), pool.Name)
2016-01-06 18:24:20 +00:00
}
return nil
}
func (p *PoolUsageCollector) cephUsageCommand() []byte {
cmd, err := json.Marshal(map[string]interface{}{
"prefix": "df",
"detail": "detail",
"format": "json",
})
if err != nil {
p.logger.WithError(err).Panic("error marshalling ceph df detail")
2016-01-06 18:24:20 +00:00
}
return cmd
}
// Describe fulfills the prometheus.Collector's interface and sends the descriptors
// of pool's metrics to the given channel.
func (p *PoolUsageCollector) Describe(ch chan<- *prometheus.Desc) {
2022-10-06 15:52:45 +00:00
ch <- p.UsedBytes
ch <- p.RawUsedBytes
ch <- p.MaxAvail
ch <- p.PercentUsed
ch <- p.Objects
ch <- p.DirtyObjects
ch <- p.UnfoundObjects
ch <- p.ReadIO
ch <- p.ReadBytes
ch <- p.WriteIO
ch <- p.WriteBytes
2016-01-06 18:24:20 +00:00
}
// Collect extracts the current values of all the metrics and sends them to the
// prometheus channel.
func (p *PoolUsageCollector) Collect(ch chan<- prometheus.Metric) {
p.logger.Debug("collecting pool usage metrics")
2022-10-06 15:52:45 +00:00
if err := p.collect(ch); err != nil {
p.logger.WithError(err).Error("error collecting pool usage metrics")
2016-01-06 18:24:20 +00:00
return
}
}