Merge pull request #32 from magicrobotmonkey/add_cache_rate

health: expose metrics for cache tier pool operations
This commit is contained in:
Vaibhav Bhembre 2016-11-10 12:26:30 -05:00 committed by GitHub
commit 0cf254f41d
2 changed files with 98 additions and 0 deletions

View File

@ -35,6 +35,9 @@ var (
clientIOWriteRegex = regexp.MustCompile(`(\d+) ([kKmMgG][bB])/s wr`)
clientIOReadOpsRegex = regexp.MustCompile(`(\d+) op/s rd`)
clientIOWriteOpsRegex = regexp.MustCompile(`(\d+) op/s wr`)
cacheFlushRateRegex = regexp.MustCompile(`(\d+) ([kKmMgG][bB])/s flush`)
cacheEvictRateRegex = regexp.MustCompile(`(\d+) ([kKmMgG][bB])/s evict`)
cachePromoteOpsRegex = regexp.MustCompile(`(\d+) op/s promote`)
// Older versions of Ceph, hammer (v0.94) and below, support this format.
clientIOOpsRegex = regexp.MustCompile(`(\d+) op/s[^ \w]*$`)
@ -132,6 +135,15 @@ type ClusterHealthCollector struct {
// ClientIOWriteOps shows the rate of total write operations conducted by all clients on the cluster.
ClientIOWriteOps prometheus.Gauge
// CacheFlushIORate shows the i/o rate at which data is being flushed from the cache pool.
CacheFlushIORate prometheus.Gauge
// CacheEvictIORate shows the i/o rate at which data is being flushed from the cache pool.
CacheEvictIORate prometheus.Gauge
// CachePromoteIOOps shows the rate of operations promoting objects to the cache pool.
CachePromoteIOOps prometheus.Gauge
}
const (
@ -320,6 +332,27 @@ func NewClusterHealthCollector(conn Conn) *ClusterHealthCollector {
Help: "Total client write I/O ops on the cluster measured per second",
},
),
CacheFlushIORate: prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "cache_flush_io_bytes",
Help: "Rate of bytes being flushed from the cache pool per second",
},
),
CacheEvictIORate: prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "cache_evict_io_bytes",
Help: "Rate of bytes being evicted from the cache pool per second",
},
),
CachePromoteIOOps: prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "cache_promote_io_ops",
Help: "Total cache promote operations measured per second",
},
),
}
}
@ -349,6 +382,9 @@ func (c *ClusterHealthCollector) metricsList() []prometheus.Metric {
c.ClientIOOps,
c.ClientIOReadOps,
c.ClientIOWriteOps,
c.CacheFlushIORate,
c.CacheEvictIORate,
c.CachePromoteIOOps,
}
}
@ -589,6 +625,10 @@ func (c *ClusterHealthCollector) collectRecoveryClientIO() error {
if err := c.collectClientIO(line); err != nil {
return err
}
case strings.HasPrefix(line, "cache io"):
if err := c.collectCacheIO(line); err != nil {
return err
}
}
}
return nil
@ -728,6 +768,60 @@ func (c *ClusterHealthCollector) collectRecoveryIO(recoveryStr string) error {
return nil
}
func (c *ClusterHealthCollector) collectCacheIO(clientStr string) error {
matched := cacheFlushRateRegex.FindStringSubmatch(clientStr)
if len(matched) == 3 {
v, err := strconv.Atoi(matched[1])
if err != nil {
return err
}
switch strings.ToLower(matched[2]) {
case "gb":
v = v * 1e9
case "mb":
v = v * 1e6
case "kb":
v = v * 1e3
default:
return fmt.Errorf("can't parse units %q", matched[2])
}
c.CacheFlushIORate.Set(float64(v))
}
matched = cacheEvictRateRegex.FindStringSubmatch(clientStr)
if len(matched) == 3 {
v, err := strconv.Atoi(matched[1])
if err != nil {
return err
}
switch strings.ToLower(matched[2]) {
case "gb":
v = v * 1e9
case "mb":
v = v * 1e6
case "kb":
v = v * 1e3
default:
return fmt.Errorf("can't parse units %q", matched[2])
}
c.CacheEvictIORate.Set(float64(v))
}
matched = cachePromoteOpsRegex.FindStringSubmatch(clientStr)
if len(matched) == 2 {
v, err := strconv.Atoi(matched[1])
if err != nil {
return err
}
c.CachePromoteIOOps.Set(float64(v))
}
return nil
}
// Describe sends all the descriptions of individual metrics of ClusterHealthCollector
// to the provided prometheus channel.
func (c *ClusterHealthCollector) Describe(ch chan<- *prometheus.Desc) {

View File

@ -310,6 +310,7 @@ $ sudo ceph -s
monmap e3: 3 mons at {mon01,mon02,mon03}
recovery io 5779 MB/s, 4 keys/s, 1522 objects/s
client io 2863 op/s rd, 5847 op/s wr
cache io 251 MB/s flush, 6646 kB/s evict, 55 op/s promote
`,
regexes: []*regexp.Regexp{
regexp.MustCompile(`recovery_io_bytes 5.779e`),
@ -318,6 +319,9 @@ $ sudo ceph -s
regexp.MustCompile(`client_io_ops 8710`),
regexp.MustCompile(`client_io_read_ops 2863`),
regexp.MustCompile(`client_io_write_ops 5847`),
regexp.MustCompile(`cache_flush_io_bytes 2.51e`),
regexp.MustCompile(`cache_evict_io_bytes 6.646e`),
regexp.MustCompile(`cache_promote_io_ops 55`),
},
},
} {