ceph_exporter/ceph/cluster_usage.go
2022-02-23 15:43:46 -08:00

157 lines
4.5 KiB
Go

// Copyright 2016 DigitalOcean
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ceph
import (
"encoding/json"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
)
const (
cephNamespace = "ceph"
)
// A ClusterUsageCollector is used to gather all the global stats about a
// given ceph cluster. It is sometimes essential to know how fast the cluster
// is growing or shrinking as a whole in order to zero in on the cause. The
// pool specific stats are provided separately.
type ClusterUsageCollector struct {
conn Conn
logger *logrus.Logger
version *Version
// GlobalCapacity displays the total storage capacity of the cluster. This
// information is based on the actual no. of objects that are
// allocated. It does not take overcommitment into consideration.
GlobalCapacity prometheus.Gauge
// UsedCapacity shows the storage under use.
UsedCapacity prometheus.Gauge
// AvailableCapacity shows the remaining capacity of the cluster that is
// left unallocated.
AvailableCapacity prometheus.Gauge
}
// NewClusterUsageCollector creates and returns the reference to
// ClusterUsageCollector and internally defines each metric that display
// cluster stats.
func NewClusterUsageCollector(exporter *Exporter) *ClusterUsageCollector {
labels := make(prometheus.Labels)
labels["cluster"] = exporter.Cluster
return &ClusterUsageCollector{
conn: exporter.Conn,
logger: exporter.Logger,
version: exporter.Version,
GlobalCapacity: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "cluster_capacity_bytes",
Help: "Total capacity of the cluster",
ConstLabels: labels,
}),
UsedCapacity: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "cluster_used_bytes",
Help: "Capacity of the cluster currently in use",
ConstLabels: labels,
}),
AvailableCapacity: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "cluster_available_bytes",
Help: "Available space within the cluster",
ConstLabels: labels,
}),
}
}
func (c *ClusterUsageCollector) metricsList() []prometheus.Metric {
return []prometheus.Metric{
c.GlobalCapacity,
c.UsedCapacity,
c.AvailableCapacity,
}
}
type cephClusterStats struct {
Stats struct {
TotalBytes float64 `json:"total_bytes"`
TotalUsedBytes float64 `json:"total_used_bytes"`
TotalAvailBytes float64 `json:"total_avail_bytes"`
} `json:"stats"`
}
func (c *ClusterUsageCollector) collect() error {
cmd := c.cephUsageCommand()
buf, _, err := c.conn.MonCommand(cmd)
if err != nil {
c.logger.WithError(err).WithField(
"args", string(cmd),
).Error("error executing mon command")
return err
}
stats := &cephClusterStats{}
if err := json.Unmarshal(buf, stats); err != nil {
return err
}
c.GlobalCapacity.Set(stats.Stats.TotalBytes)
c.UsedCapacity.Set(stats.Stats.TotalUsedBytes)
c.AvailableCapacity.Set(stats.Stats.TotalAvailBytes)
return nil
}
func (c *ClusterUsageCollector) cephUsageCommand() []byte {
cmd, err := json.Marshal(map[string]interface{}{
"prefix": "df",
"detail": "detail",
"format": "json",
})
if err != nil {
// panic! because ideally in no world this hard-coded input
// should fail.
c.logger.WithError(err).Panic("error marshalling ceph df detail")
}
return cmd
}
// Describe sends the descriptors of each metric over to the provided channel.
// The corresponding metric values are sent separately.
func (c *ClusterUsageCollector) Describe(ch chan<- *prometheus.Desc) {
for _, metric := range c.metricsList() {
ch <- metric.Desc()
}
}
// Collect sends the metric values for each metric pertaining to the global
// cluster usage over to the provided prometheus Metric channel.
func (c *ClusterUsageCollector) Collect(ch chan<- prometheus.Metric) {
c.logger.Debug("collecting cluster usage metrics")
if err := c.collect(); err != nil {
c.logger.WithError(err).Error("error collecting cluster usage metrics")
return
}
for _, metric := range c.metricsList() {
ch <- metric
}
}