ceph_exporter/collectors/cluster_usage.go

180 lines
5.1 KiB
Go

// Copyright 2016 DigitalOcean
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collectors
import (
"encoding/json"
"log"
"github.com/prometheus/client_golang/prometheus"
)
const (
cephNamespace = "ceph"
)
// A ClusterUsageCollector is used to gather all the global stats about a given
// ceph cluster. It is sometimes essential to know how fast the cluster is growing
// or shrinking as a whole in order to zero in on the cause. The pool specific
// stats are provided separately.
type ClusterUsageCollector struct {
conn Conn
// GlobalCapacity displays the total storage capacity of the cluster. This
// information is based on the actual no. of objects that are allocated. It
// does not take overcommitment into consideration.
GlobalCapacity prometheus.Gauge
// UsedCapacity shows the storage under use.
UsedCapacity prometheus.Gauge
// AvailableCapacity shows the remaining capacity of the cluster that is left unallocated.
AvailableCapacity prometheus.Gauge
// Objects show the total no. of RADOS objects that are currently allocated.
Objects prometheus.Gauge
}
// NewClusterUsageCollector creates and returns the reference to ClusterUsageCollector
// and internally defines each metric that display cluster stats.
func NewClusterUsageCollector(conn Conn, cluster string) *ClusterUsageCollector {
labels := make(prometheus.Labels)
labels["cluster"] = cluster
return &ClusterUsageCollector{
conn: conn,
GlobalCapacity: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "cluster_capacity_bytes",
Help: "Total capacity of the cluster",
ConstLabels: labels,
}),
UsedCapacity: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "cluster_used_bytes",
Help: "Capacity of the cluster currently in use",
ConstLabels: labels,
}),
AvailableCapacity: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "cluster_available_bytes",
Help: "Available space within the cluster",
ConstLabels: labels,
}),
Objects: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "cluster_objects",
Help: "No. of rados objects within the cluster",
ConstLabels: labels,
}),
}
}
func (c *ClusterUsageCollector) metricsList() []prometheus.Metric {
return []prometheus.Metric{
c.GlobalCapacity,
c.UsedCapacity,
c.AvailableCapacity,
c.Objects,
}
}
type cephClusterStats struct {
Stats struct {
TotalBytes json.Number `json:"total_bytes"`
TotalUsedBytes json.Number `json:"total_used_bytes"`
TotalAvailBytes json.Number `json:"total_avail_bytes"`
TotalObjects json.Number `json:"total_objects"`
} `json:"stats"`
}
func (c *ClusterUsageCollector) collect() error {
cmd := c.cephUsageCommand()
buf, _, err := c.conn.MonCommand(cmd)
if err != nil {
return err
}
stats := &cephClusterStats{}
if err := json.Unmarshal(buf, stats); err != nil {
return err
}
var totBytes, usedBytes, availBytes, totObjects float64
totBytes, err = stats.Stats.TotalBytes.Float64()
if err != nil {
log.Println("[ERROR] cannot extract total bytes:", err)
}
usedBytes, err = stats.Stats.TotalUsedBytes.Float64()
if err != nil {
log.Println("[ERROR] cannot extract used bytes:", err)
}
availBytes, err = stats.Stats.TotalAvailBytes.Float64()
if err != nil {
log.Println("[ERROR] cannot extract available bytes:", err)
}
totObjects, err = stats.Stats.TotalObjects.Float64()
if err != nil {
log.Println("[ERROR] cannot extract total objects:", err)
}
c.GlobalCapacity.Set(totBytes)
c.UsedCapacity.Set(usedBytes)
c.AvailableCapacity.Set(availBytes)
c.Objects.Set(totObjects)
return nil
}
func (c *ClusterUsageCollector) cephUsageCommand() []byte {
cmd, err := json.Marshal(map[string]interface{}{
"prefix": "df",
"detail": "detail",
"format": "json",
})
if err != nil {
// panic! because ideally in no world this hard-coded input
// should fail.
panic(err)
}
return cmd
}
// Describe sends the descriptors of each metric over to the provided channel.
// The corresponding metric values are sent separately.
func (c *ClusterUsageCollector) Describe(ch chan<- *prometheus.Desc) {
for _, metric := range c.metricsList() {
ch <- metric.Desc()
}
}
// Collect sends the metric values for each metric pertaining to the global
// cluster usage over to the provided prometheus Metric channel.
func (c *ClusterUsageCollector) Collect(ch chan<- prometheus.Metric) {
if err := c.collect(); err != nil {
log.Println("[ERROR] failed collecting cluster usage metrics:", err)
return
}
for _, metric := range c.metricsList() {
ch <- metric
}
}