Add Ceph Exporter
This commit is contained in:
commit
9cda67d44a
|
@ -0,0 +1,24 @@
|
|||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
|
@ -0,0 +1,45 @@
|
|||
Contributing
|
||||
============
|
||||
|
||||
Please use this guide before making any contributions to this repository.
|
||||
|
||||
Preliminary
|
||||
-----------
|
||||
|
||||
* All code **must** be [`gofmt`](https://golang.org/cmd/gofmt/)'d, [`golint`](https://github.com/golang/lint)'d and [`go vet`](https://golang.org/cmd/vet/)'d before being committed.
|
||||
* Code **should** have test coverage to ensure its correctness.
|
||||
|
||||
PRs
|
||||
---
|
||||
|
||||
**Commits**
|
||||
|
||||
Keep individual commits descriptive. Prefix them with the collector name and a
|
||||
colon. Anyone viewing the git history should be able to determine from those
|
||||
first 80 characters, the body of the commit. Feel free to expand further on
|
||||
the commit but keep the first 80 characters on point.
|
||||
|
||||
Good Commit:
|
||||
|
||||
```
|
||||
monitor: expose metrics for clock skew
|
||||
- scrape monitor's skew value from ceph's status
|
||||
```
|
||||
|
||||
Bad Commit:
|
||||
|
||||
```
|
||||
new monitor metrics
|
||||
```
|
||||
|
||||
Use your own discretion when deciding whether or not to squash multiple commits
|
||||
in a PR to a single commit. However, each commit should contain a single,
|
||||
logical unit of change, and a descriptive message.
|
||||
|
||||
Resources
|
||||
---------
|
||||
|
||||
* [Effective Go](https://golang.org/doc/effective_go.html)
|
||||
* [Go Code Review Comments](https://github.com/golang/go/wiki/CodeReviewComments)
|
||||
* [How to Write Go Code](https://golang.org/doc/code.html)
|
||||
* [Twelve Go Best Practices](https://talks.golang.org/2013/bestpractices.slide)
|
|
@ -0,0 +1,201 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,17 @@
|
|||
# Copyright 2015 The Prometheus Authors
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
VERSION := 0.1.0
|
||||
TARGET := ceph_exporter
|
||||
|
||||
include Makefile.COMMON
|
|
@ -0,0 +1,119 @@
|
|||
# Copyright 2015 The Prometheus Authors
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# THE AUTHORITATIVE VERSION OF THIS MAKEFILE LIVES IN:
|
||||
#
|
||||
# https://github.com/prometheus/utils
|
||||
#
|
||||
# PLEASE MAKE ANY CHANGES THERE AND PROPAGATE THEM TO ALL PROMETHEUS
|
||||
# REPOSITORIES THAT ARE USING THIS MAKEFILE.
|
||||
#
|
||||
# This file provides common Makefile infrastructure for several Prometheus
|
||||
# components. This includes make tasks for downloading Go, setting up a
|
||||
# self-contained build environment, fetching Go dependencies, building
|
||||
# binaries, running tests, and doing release management. This file is intended
|
||||
# to be included from a project's Makefile, which needs to define the following
|
||||
# variables, at a minimum:
|
||||
#
|
||||
# * VERSION - The current version of the project in question.
|
||||
# * TARGET - The desired name of the built binary.
|
||||
#
|
||||
# Many of the variables defined below are defined conditionally (using '?'),
|
||||
# which allows the project's main Makefile to override any of these settings, if
|
||||
# needed. See also:
|
||||
#
|
||||
# https://www.gnu.org/software/make/manual/html_node/Flavors.html#Flavors.
|
||||
#
|
||||
# The including Makefile may define any number of extra targets that are
|
||||
# specific to that project.
|
||||
|
||||
VERSION ?= $(error VERSION not set in including Makefile)
|
||||
TARGET ?= $(error TARGET not set in including Makefile)
|
||||
|
||||
SRC ?= $(shell find . -type f -name "*.go" ! -path "./.build/*")
|
||||
GOOS := $(shell uname | tr A-Z a-z)
|
||||
GOARCH := $(subst x86_64,amd64,$(patsubst i%86,386,$(shell uname -m)))
|
||||
|
||||
ifeq ($(GOOS),darwin)
|
||||
RELEASE_SUFFIX ?= -osx$(shell sw_vers -productVersion)
|
||||
endif
|
||||
|
||||
GO_VERSION ?= 1.4.2
|
||||
|
||||
ifeq ($(shell type go >/dev/null && go version | sed 's/.*go\([0-9.]*\).*/\1/'), $(GO_VERSION))
|
||||
GOROOT := $(shell go env GOROOT)
|
||||
else
|
||||
GOROOT := $(CURDIR)/.build/go$(GO_VERSION)
|
||||
endif
|
||||
|
||||
GOURL ?= https://golang.org/dl
|
||||
GOPKG ?= go$(GO_VERSION).$(GOOS)-$(GOARCH)$(RELEASE_SUFFIX).tar.gz
|
||||
GOPATH := $(CURDIR)/.build/gopath
|
||||
GOCC ?= $(GOROOT)/bin/go
|
||||
GO ?= GOROOT=$(GOROOT) GOPATH=$(GOPATH) $(GOCC)
|
||||
GOFMT ?= $(GOROOT)/bin/gofmt
|
||||
|
||||
# Never honor GOBIN, should it be set at all.
|
||||
unexport GOBIN
|
||||
|
||||
SUFFIX ?= $(GOOS)-$(GOARCH)
|
||||
BINARY ?= $(TARGET)
|
||||
ARCHIVE ?= $(TARGET)-$(VERSION).$(SUFFIX).tar.gz
|
||||
ROOTPKG ?= github.com/prometheus/$(TARGET)
|
||||
SELFLINK ?= $(GOPATH)/src/$(ROOTPKG)
|
||||
|
||||
default: $(BINARY)
|
||||
|
||||
$(GOCC):
|
||||
@echo Go version $(GO_VERSION) required but not found in PATH.
|
||||
@echo About to download and install go$(GO_VERSION) to $(GOROOT)
|
||||
@echo Abort now if you want to manually install it system-wide instead.
|
||||
@echo
|
||||
@sleep 5
|
||||
mkdir -p $(GOROOT)
|
||||
curl -L $(GOURL)/$(GOPKG) | tar -C $(GOROOT) --strip 1 -xz
|
||||
|
||||
$(SELFLINK):
|
||||
mkdir -p $(dir $@)
|
||||
ln -s $(CURDIR) $@
|
||||
|
||||
dependencies-stamp: $(GOCC) $(SRC) | $(SELFLINK)
|
||||
$(GO) get -d
|
||||
touch $@
|
||||
|
||||
$(BINARY): $(GOCC) $(SRC) dependencies-stamp Makefile Makefile.COMMON
|
||||
$(GO) build $(GOFLAGS) -o $@
|
||||
|
||||
.PHONY: archive
|
||||
archive: $(ARCHIVE)
|
||||
|
||||
$(ARCHIVE): $(BINARY)
|
||||
tar -czf $@ $<
|
||||
|
||||
.PHONY: tag
|
||||
tag:
|
||||
git tag $(VERSION)
|
||||
git push --tags
|
||||
|
||||
.PHONY: test
|
||||
test: $(GOCC) dependencies-stamp
|
||||
$(GO) test ./...
|
||||
|
||||
.PHONY: format
|
||||
format: $(GOCC)
|
||||
find . -iname '*.go' | egrep -v "^\./\.build|./generated|\./Godeps|\.(l|y)\.go" | xargs -n1 $(GOFMT) -w -s=true
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -rf $(BINARY) $(ARCHIVE) .build *-stamp
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
# Ceph Exporter
|
||||
Prometheus exporter that scrapes meta information about a running ceph cluster. All the information gathered from the cluster is done by interacting with the monitors using an appropriate wrapper over `rados_mon_command()`. Hence, no additional setup is necessary other than having a working ceph cluster.
|
||||
|
||||
## Dependencies
|
||||
|
||||
You should ideally run this exporter from the client that can talk to
|
||||
Ceph. Like any other ceph client it needs the following files to run
|
||||
correctly.
|
||||
|
||||
* `ceph.conf` containing your ceph configuration.
|
||||
* `ceph.<user>.keyring` in order to authenticate to your cluster.
|
||||
|
||||
Ceph exporter will automatically pick those up if they are present in
|
||||
any of the [default
|
||||
locations](http://docs.ceph.com/docs/master/rados/configuration/ceph-conf/#the-configuration-file). Otherwise you will need to provide the configuration manually using `--ceph.config` flag.
|
||||
|
||||
We use Ceph's [official Golang client](https://github.com/ceph/go-ceph) to run commands on the cluster.
|
||||
|
||||
## Flags
|
||||
|
||||
Name | Description | Default
|
||||
---- | ---- | ----
|
||||
telemetry.addr | Host:Port pair to run exporter on | `*:9190`
|
||||
telemetry.path | URL Path for surfacing metrics to prometheus | `/metrics`
|
||||
ceph.config | Path to ceph configuration file | ""
|
||||
|
||||
## Installation
|
||||
|
||||
Typical way of installing in Go should work.
|
||||
|
||||
```
|
||||
go install
|
||||
```
|
||||
|
||||
A Makefile is provided in case you find a need for it.
|
||||
|
||||
## Contributing
|
||||
|
||||
Please refer to the [CONTRIBUTING](CONTRIBUTING.md) guide for more
|
||||
information on how to submit your changes to this repository.
|
||||
|
||||
## Sample view
|
||||
|
||||
If you have [promdash](https://github.com/prometheus/promdash) set up you
|
||||
can generate views like:
|
||||
|
||||
![](sample.png)
|
||||
|
||||
---
|
||||
|
||||
Copyright @ 2016 DigitalOcean™ Inc.
|
|
@ -0,0 +1,169 @@
|
|||
// Copyright 2016 DigitalOcean
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const (
|
||||
cephNamespace = "ceph"
|
||||
)
|
||||
|
||||
// A ClusterUsageCollector is used to gather all the global stats about a given
|
||||
// ceph cluster. It is sometimes essential to know how fast the cluster is growing
|
||||
// or shrinking as a whole in order to zero in on the cause. The pool specific
|
||||
// stats are provided separately.
|
||||
type ClusterUsageCollector struct {
|
||||
conn Conn
|
||||
|
||||
// GlobalCapacity displays the total storage capacity of the cluster. This
|
||||
// information is based on the actual no. of objects that are allocated. It
|
||||
// does not take overcommittment into consideration.
|
||||
GlobalCapacity prometheus.Gauge
|
||||
|
||||
// UsedCapacity shows the storage under use.
|
||||
UsedCapacity prometheus.Gauge
|
||||
|
||||
// AvailableCapacity shows the remaining capacity of the cluster that is left unallocated.
|
||||
AvailableCapacity prometheus.Gauge
|
||||
|
||||
// Objects show the total no. of RADOS objects that are currently allocated.
|
||||
Objects prometheus.Gauge
|
||||
}
|
||||
|
||||
// NewClusterUsageCollector creates and returns the reference to ClusterUsageCollector
|
||||
// and internally defines each metric that display cluster stats.
|
||||
func NewClusterUsageCollector(conn Conn) *ClusterUsageCollector {
|
||||
return &ClusterUsageCollector{
|
||||
conn: conn,
|
||||
|
||||
GlobalCapacity: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "cluster_capacity_bytes",
|
||||
Help: "Total capacity of the cluster",
|
||||
}),
|
||||
UsedCapacity: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "cluster_used_bytes",
|
||||
Help: "Capacity of the cluster currently in use",
|
||||
}),
|
||||
AvailableCapacity: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "cluster_available_bytes",
|
||||
Help: "Available space within the cluster",
|
||||
}),
|
||||
Objects: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "cluster_objects",
|
||||
Help: "No. of rados objects within the cluster",
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *ClusterUsageCollector) metricsList() []prometheus.Metric {
|
||||
return []prometheus.Metric{
|
||||
c.GlobalCapacity,
|
||||
c.UsedCapacity,
|
||||
c.AvailableCapacity,
|
||||
c.Objects,
|
||||
}
|
||||
}
|
||||
|
||||
type cephClusterStats struct {
|
||||
Stats struct {
|
||||
TotalBytes json.Number `json:"total_bytes"`
|
||||
TotalUsedBytes json.Number `json:"total_used_bytes"`
|
||||
TotalAvailBytes json.Number `json:"total_avail_bytes"`
|
||||
TotalObjects json.Number `json:"total_objects"`
|
||||
} `json:"stats"`
|
||||
}
|
||||
|
||||
func (c *ClusterUsageCollector) collect() error {
|
||||
cmd := c.cephUsageCommand()
|
||||
buf, _, err := c.conn.MonCommand(cmd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stats := &cephClusterStats{}
|
||||
if err := json.Unmarshal(buf, stats); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
tot, err := stats.Stats.TotalBytes.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.GlobalCapacity.Set(tot)
|
||||
|
||||
used, err := stats.Stats.TotalUsedBytes.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.UsedCapacity.Set(used)
|
||||
|
||||
avail, err := stats.Stats.TotalAvailBytes.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.AvailableCapacity.Set(avail)
|
||||
|
||||
objects, err := stats.Stats.TotalObjects.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.Objects.Set(objects)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *ClusterUsageCollector) cephUsageCommand() []byte {
|
||||
cmd, err := json.Marshal(map[string]interface{}{
|
||||
"prefix": "df",
|
||||
"detail": "detail",
|
||||
"format": "json",
|
||||
})
|
||||
if err != nil {
|
||||
// panic! because ideally in no world this hard-coded input
|
||||
// should fail.
|
||||
panic(err)
|
||||
}
|
||||
return cmd
|
||||
}
|
||||
|
||||
// Describe sends the descriptors of each metric over to the provided channel.
|
||||
// The corresponding metric values are sent separately.
|
||||
func (c *ClusterUsageCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
for _, metric := range c.metricsList() {
|
||||
ch <- metric.Desc()
|
||||
}
|
||||
}
|
||||
|
||||
// Collect sends the metric values for each metric pertaining to the global
|
||||
// cluster usage over to the provided prometheus Metric channel.
|
||||
func (c *ClusterUsageCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
if err := c.collect(); err != nil {
|
||||
log.Println("failed collecting metrics:", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, metric := range c.metricsList() {
|
||||
ch <- metric
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
// Copyright 2016 DigitalOcean
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
func TestClusterUsage(t *testing.T) {
|
||||
var (
|
||||
expected = `
|
||||
{
|
||||
"stats": {
|
||||
"total_bytes": 10,
|
||||
"total_used_bytes": 6,
|
||||
"total_avail_bytes": 4,
|
||||
"total_objects": 1
|
||||
}
|
||||
}`
|
||||
)
|
||||
|
||||
collector := NewClusterUsageCollector(NewNoopConn(expected))
|
||||
if err := prometheus.Register(collector); err != nil {
|
||||
t.Fatalf("collector failed to register: %s", err)
|
||||
}
|
||||
|
||||
server := httptest.NewServer(prometheus.Handler())
|
||||
defer server.Close()
|
||||
|
||||
resp, err := http.Get(server.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected failed response from prometheus: %s", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
buf, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("failed reading server response: %s", err)
|
||||
}
|
||||
|
||||
for _, re := range []*regexp.Regexp{
|
||||
regexp.MustCompile(`ceph_cluster_capacity_bytes 10`),
|
||||
regexp.MustCompile(`ceph_cluster_used_bytes 6`),
|
||||
regexp.MustCompile(`ceph_cluster_available_bytes 4`),
|
||||
regexp.MustCompile(`ceph_cluster_objects 1`),
|
||||
} {
|
||||
if !re.Match(buf) {
|
||||
t.Errorf("failed matching: %q", re)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
// Copyright 2016 DigitalOcean
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collectors
|
||||
|
||||
import "github.com/ceph/go-ceph/rados"
|
||||
|
||||
// Conn interface implements only necessary methods that are used
|
||||
// in this repository of *rados.Conn. This keeps rest of the implementation
|
||||
// clean and *rados.Conn doesn't need to show up everywhere (it being
|
||||
// more of an implementation detail in reality). Also it makes mocking
|
||||
// easier for unit-testing the collectors.
|
||||
type Conn interface {
|
||||
ReadDefaultConfigFile() error
|
||||
Connect() error
|
||||
Shutdown()
|
||||
MonCommand([]byte) ([]byte, string, error)
|
||||
}
|
||||
|
||||
// Verify that *rados.Conn implements Conn correctly.
|
||||
var _ Conn = &rados.Conn{}
|
||||
|
||||
// NoopConn is the stub we use for mocking rados Conn. Unit testing
|
||||
// each individual collectors becomes a lot easier after that.
|
||||
type NoopConn struct {
|
||||
output string
|
||||
}
|
||||
|
||||
// The stub we use for testing should also satisfy the interface properties.
|
||||
var _ Conn = &NoopConn{}
|
||||
|
||||
// NewNoopConn returns an instance of *NoopConn. The string that we want
|
||||
// outputted at the end of the command we issue to ceph, should be
|
||||
// specified in the only input parameter.
|
||||
func NewNoopConn(output string) *NoopConn {
|
||||
return &NoopConn{output}
|
||||
}
|
||||
|
||||
// ReadDefaultConfigFile does not need to return an error. It satisfies
|
||||
// rados.Conn's function with the same prototype.
|
||||
func (n *NoopConn) ReadDefaultConfigFile() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Connect does not need to return an error. It satisfies
|
||||
// rados.Conn's function with the same prototype.
|
||||
func (n *NoopConn) Connect() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Shutdown satisfies rados.Conn's function prototype.
|
||||
func (n *NoopConn) Shutdown() {}
|
||||
|
||||
// MonCommand returns the provided output string to NoopConn as is, making
|
||||
// it seem like it actually ran something and produced that string as a result.
|
||||
func (n *NoopConn) MonCommand(_ []byte) ([]byte, string, error) {
|
||||
return []byte(n.output), "", nil
|
||||
}
|
|
@ -0,0 +1,235 @@
|
|||
// Copyright 2016 DigitalOcean
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log"
|
||||
"regexp"
|
||||
"strconv"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// ClusterHealthCollector collects information about the health of an overall cluster.
|
||||
// It surfaces changes in the ceph parameters unlike data usage that ClusterUsageCollector
|
||||
// does.
|
||||
type ClusterHealthCollector struct {
|
||||
conn Conn
|
||||
|
||||
DegradedPGs prometheus.Gauge
|
||||
UncleanPGs prometheus.Gauge
|
||||
UndersizedPGs prometheus.Gauge
|
||||
StalePGs prometheus.Gauge
|
||||
|
||||
DegradedObjectsCount prometheus.Gauge
|
||||
|
||||
OSDsDown prometheus.Gauge
|
||||
}
|
||||
|
||||
// NewClusterHealthCollector creates a new instance of ClusterHealthCollector to collect health
|
||||
// metrics on.
|
||||
func NewClusterHealthCollector(conn Conn) *ClusterHealthCollector {
|
||||
return &ClusterHealthCollector{
|
||||
conn: conn,
|
||||
|
||||
DegradedPGs: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "degraded_pgs",
|
||||
Help: "No. of PGs in a degraded state",
|
||||
},
|
||||
),
|
||||
UncleanPGs: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "unclean_pgs",
|
||||
Help: "No. of PGs in an unclean state",
|
||||
},
|
||||
),
|
||||
UndersizedPGs: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "undersized_pgs",
|
||||
Help: "No. of undersized PGs in the cluster",
|
||||
},
|
||||
),
|
||||
StalePGs: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "stale_pgs",
|
||||
Help: "No. of stale PGs in the cluster",
|
||||
},
|
||||
),
|
||||
DegradedObjectsCount: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "degraded_objects",
|
||||
Help: "No. of degraded objects across all PGs",
|
||||
},
|
||||
),
|
||||
OSDsDown: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "osds_down",
|
||||
Help: "Count of OSDs that are in DOWN state",
|
||||
},
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *ClusterHealthCollector) metricsList() []prometheus.Metric {
|
||||
return []prometheus.Metric{
|
||||
c.DegradedPGs,
|
||||
c.UncleanPGs,
|
||||
c.UndersizedPGs,
|
||||
c.StalePGs,
|
||||
c.DegradedObjectsCount,
|
||||
c.OSDsDown,
|
||||
}
|
||||
}
|
||||
|
||||
type cephHealthStats struct {
|
||||
Summary []struct {
|
||||
Severity string `json:"severity"`
|
||||
Summary string `json:"summary"`
|
||||
} `json:"summary"`
|
||||
}
|
||||
|
||||
func (c *ClusterHealthCollector) collect() error {
|
||||
cmd := c.cephUsageCommand()
|
||||
buf, _, err := c.conn.MonCommand(cmd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stats := &cephHealthStats{}
|
||||
if err := json.Unmarshal(buf, stats); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, metric := range c.metricsList() {
|
||||
if gauge, ok := metric.(prometheus.Gauge); ok {
|
||||
gauge.Set(0)
|
||||
}
|
||||
}
|
||||
|
||||
if len(stats.Summary) < 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
degradedRegex = regexp.MustCompile(`([\d]+) pgs degraded`)
|
||||
uncleanRegex = regexp.MustCompile(`([\d]+) pgs stuck unclean`)
|
||||
undersizedRegex = regexp.MustCompile(`([\d]+) pgs undersized`)
|
||||
staleRegex = regexp.MustCompile(`([\d]+) pgs stale`)
|
||||
degradedObjectsRegex = regexp.MustCompile(`recovery ([\d]+)/([\d]+) objects degraded`)
|
||||
osdsDownRegex = regexp.MustCompile(`([\d]+)/([\d]+) in osds are down`)
|
||||
)
|
||||
|
||||
for _, s := range stats.Summary {
|
||||
matched := degradedRegex.FindStringSubmatch(s.Summary)
|
||||
if len(matched) == 2 {
|
||||
v, err := strconv.Atoi(matched[1])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.DegradedPGs.Set(float64(v))
|
||||
}
|
||||
|
||||
matched = uncleanRegex.FindStringSubmatch(s.Summary)
|
||||
if len(matched) == 2 {
|
||||
v, err := strconv.Atoi(matched[1])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.UncleanPGs.Set(float64(v))
|
||||
}
|
||||
|
||||
matched = undersizedRegex.FindStringSubmatch(s.Summary)
|
||||
if len(matched) == 2 {
|
||||
v, err := strconv.Atoi(matched[1])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.UndersizedPGs.Set(float64(v))
|
||||
}
|
||||
|
||||
matched = staleRegex.FindStringSubmatch(s.Summary)
|
||||
if len(matched) == 2 {
|
||||
v, err := strconv.Atoi(matched[1])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.StalePGs.Set(float64(v))
|
||||
}
|
||||
|
||||
matched = degradedObjectsRegex.FindStringSubmatch(s.Summary)
|
||||
if len(matched) == 3 {
|
||||
v, err := strconv.Atoi(matched[1])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.DegradedObjectsCount.Set(float64(v))
|
||||
}
|
||||
|
||||
matched = osdsDownRegex.FindStringSubmatch(s.Summary)
|
||||
if len(matched) == 3 {
|
||||
v, err := strconv.Atoi(matched[1])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.OSDsDown.Set(float64(v))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *ClusterHealthCollector) cephUsageCommand() []byte {
|
||||
cmd, err := json.Marshal(map[string]interface{}{
|
||||
"prefix": "health",
|
||||
"detail": "detail",
|
||||
"format": "json",
|
||||
})
|
||||
if err != nil {
|
||||
// panic! because ideally in no world this hard-coded input
|
||||
// should fail.
|
||||
panic(err)
|
||||
}
|
||||
return cmd
|
||||
}
|
||||
|
||||
// Describe sends all the descriptions of individual metrics of ClusterHealthCollector
|
||||
// to the provided prometheus channel.
|
||||
func (c *ClusterHealthCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
for _, metric := range c.metricsList() {
|
||||
ch <- metric.Desc()
|
||||
}
|
||||
}
|
||||
|
||||
// Collect sends all the collected metrics to the provided prometheus channel.
|
||||
// It requires the caller to handle synchronization.
|
||||
func (c *ClusterHealthCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
if err := c.collect(); err != nil {
|
||||
log.Println("failed collecting metrics:", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, metric := range c.metricsList() {
|
||||
ch <- metric
|
||||
}
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
// Copyright 2016 DigitalOcean
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
func TestClusterHealthCollector(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
input string
|
||||
regexes []*regexp.Regexp
|
||||
}{
|
||||
{
|
||||
`{"summary": [{"severity": "HEALTH_WARN", "summary": "5 pgs degraded"}]}`,
|
||||
[]*regexp.Regexp{
|
||||
regexp.MustCompile(`degraded_pgs 5`),
|
||||
},
|
||||
},
|
||||
{
|
||||
`{"summary": [{"severity": "HEALTH_WARN", "summary": "6 pgs stuck unclean"}]}`,
|
||||
[]*regexp.Regexp{
|
||||
regexp.MustCompile(`unclean_pgs 6`),
|
||||
},
|
||||
},
|
||||
{
|
||||
`{"summary": [{"severity": "HEALTH_WARN", "summary": "7 pgs undersized"}]}`,
|
||||
[]*regexp.Regexp{
|
||||
regexp.MustCompile(`undersized_pgs 7`),
|
||||
},
|
||||
},
|
||||
{
|
||||
`{"summary": [{"severity": "HEALTH_WARN", "summary": "8 pgs stale"}]}`,
|
||||
[]*regexp.Regexp{
|
||||
regexp.MustCompile(`stale_pgs 8`),
|
||||
},
|
||||
},
|
||||
{
|
||||
`{"summary": [{"severity": "HEALTH_WARN", "summary": "recovery 10/20 objects degraded"}]}`,
|
||||
[]*regexp.Regexp{
|
||||
regexp.MustCompile(`degraded_objects 10`),
|
||||
},
|
||||
},
|
||||
{
|
||||
`{"summary": [{"severity": "HEALTH_WARN", "summary": "3/20 in osds are down"}]}`,
|
||||
[]*regexp.Regexp{
|
||||
regexp.MustCompile(`osds_down 3`),
|
||||
},
|
||||
},
|
||||
} {
|
||||
func() {
|
||||
collector := NewClusterHealthCollector(NewNoopConn(tt.input))
|
||||
if err := prometheus.Register(collector); err != nil {
|
||||
t.Fatalf("collector failed to register: %s", err)
|
||||
}
|
||||
defer prometheus.Unregister(collector)
|
||||
|
||||
server := httptest.NewServer(prometheus.Handler())
|
||||
defer server.Close()
|
||||
|
||||
resp, err := http.Get(server.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected failed response from prometheus: %s", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
buf, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("failed reading server response: %s", err)
|
||||
}
|
||||
|
||||
for _, re := range tt.regexes {
|
||||
if !re.Match(buf) {
|
||||
t.Errorf("failed matching: %q", re)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,352 @@
|
|||
// Copyright 2016 DigitalOcean
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// MonitorCollector is used to extract stats related to monitors
|
||||
// running within Ceph cluster. As we extract information pertaining
|
||||
// to each monitor instance, there are various vector metrics we
|
||||
// need to use.
|
||||
type MonitorCollector struct {
|
||||
conn Conn
|
||||
|
||||
// TotalKBs display the total storage a given monitor node has.
|
||||
TotalKBs *prometheus.GaugeVec
|
||||
|
||||
// UsedKBs depict how much of the total storage our monitor process
|
||||
// has utilized.
|
||||
UsedKBs *prometheus.GaugeVec
|
||||
|
||||
// AvailKBs shows the space left unused.
|
||||
AvailKBs *prometheus.GaugeVec
|
||||
|
||||
// PercentAvail shows the amount of unused space as a percentage of total
|
||||
// space.
|
||||
PercentAvail *prometheus.GaugeVec
|
||||
|
||||
// Store exposes information about internal backing store.
|
||||
Store Store
|
||||
|
||||
// ClockSkew shows how far the monitor clocks have skewed from each other. This
|
||||
// is an important metric because the functioning of Ceph's paxos depends on
|
||||
// the clocks being aligned as close to each other as possible.
|
||||
ClockSkew *prometheus.GaugeVec
|
||||
|
||||
// Latency displays the time the monitors take to communicate between themselves.
|
||||
Latency *prometheus.GaugeVec
|
||||
|
||||
// NodesinQuorum show the size of the working monitor quorum. Any change in this
|
||||
// metric can imply a significant issue in the cluster if it is not manually changed.
|
||||
NodesinQuorum prometheus.Gauge
|
||||
}
|
||||
|
||||
// Store displays information about Monitor's FileStore. It is responsible for
|
||||
// storing all the meta information about the cluster, including monmaps, osdmaps,
|
||||
// pgmaps, etc. along with logs and other data.
|
||||
type Store struct {
|
||||
// TotalBytes displays the current size of the FileStore.
|
||||
TotalBytes *prometheus.GaugeVec
|
||||
|
||||
// SSTBytes shows the amount used by LevelDB's sorted-string tables.
|
||||
SSTBytes *prometheus.GaugeVec
|
||||
|
||||
// LogBytes shows the amount used by logs.
|
||||
LogBytes *prometheus.GaugeVec
|
||||
|
||||
// MiscBytes shows the amount used by miscellaneous information.
|
||||
MiscBytes *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
// NewMonitorCollector creates an instance of the MonitorCollector and instantiates
|
||||
// the individual metrics that show information about the monitor processes.
|
||||
func NewMonitorCollector(conn Conn) *MonitorCollector {
|
||||
return &MonitorCollector{
|
||||
conn: conn,
|
||||
|
||||
TotalKBs: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "monitor_capacity_bytes",
|
||||
Help: "Total storage capacity of the monitor node",
|
||||
},
|
||||
[]string{"monitor"},
|
||||
),
|
||||
UsedKBs: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "monitor_used_bytes",
|
||||
Help: "Storage of the monitor node that is currently allocated for use",
|
||||
},
|
||||
[]string{"monitor"},
|
||||
),
|
||||
AvailKBs: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "monitor_avail_bytes",
|
||||
Help: "Total unused storage capacity that the monitor node has left",
|
||||
},
|
||||
[]string{"monitor"},
|
||||
),
|
||||
PercentAvail: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "monitor_avail_percent",
|
||||
Help: "Percentage of total unused storage capacity that the monitor node has left",
|
||||
},
|
||||
[]string{"monitor"},
|
||||
),
|
||||
Store: Store{
|
||||
TotalBytes: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "monitor_store_capacity_bytes",
|
||||
Help: "Total capacity of the FileStore backing the monitor daemon",
|
||||
},
|
||||
[]string{"monitor"},
|
||||
),
|
||||
SSTBytes: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "monitor_store_sst_bytes",
|
||||
Help: "Capacity of the FileStore used only for raw SSTs",
|
||||
},
|
||||
[]string{"monitor"},
|
||||
),
|
||||
LogBytes: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "monitor_store_log_bytes",
|
||||
Help: "Capacity of the FileStore used only for logging",
|
||||
},
|
||||
[]string{"monitor"},
|
||||
),
|
||||
MiscBytes: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "monitor_store_misc_bytes",
|
||||
Help: "Capacity of the FileStore used only for storing miscellaneous information",
|
||||
},
|
||||
[]string{"monitor"},
|
||||
),
|
||||
},
|
||||
ClockSkew: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "monitor_clock_skew_seconds",
|
||||
Help: "Clock skew the monitor node is incurring",
|
||||
},
|
||||
[]string{"monitor"},
|
||||
),
|
||||
Latency: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "monitor_latency_seconds",
|
||||
Help: "Latency the monitor node is incurring",
|
||||
},
|
||||
[]string{"monitor"},
|
||||
),
|
||||
NodesinQuorum: prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "monitor_quorum_count",
|
||||
Help: "The total size of the monitor quorum",
|
||||
},
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MonitorCollector) collectorList() []prometheus.Collector {
|
||||
return []prometheus.Collector{
|
||||
m.TotalKBs,
|
||||
m.UsedKBs,
|
||||
m.AvailKBs,
|
||||
m.PercentAvail,
|
||||
|
||||
m.Store.TotalBytes,
|
||||
m.Store.SSTBytes,
|
||||
m.Store.LogBytes,
|
||||
m.Store.MiscBytes,
|
||||
|
||||
m.ClockSkew,
|
||||
m.Latency,
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MonitorCollector) metricsList() []prometheus.Metric {
|
||||
return []prometheus.Metric{
|
||||
m.NodesinQuorum,
|
||||
}
|
||||
}
|
||||
|
||||
type cephMonitorStats struct {
|
||||
Health struct {
|
||||
Health struct {
|
||||
HealthServices []struct {
|
||||
Mons []struct {
|
||||
Name string `json:"name"`
|
||||
KBTotal json.Number `json:"kb_total"`
|
||||
KBUsed json.Number `json:"kb_used"`
|
||||
KBAvail json.Number `json:"kb_avail"`
|
||||
AvailPercent json.Number `json:"avail_percent"`
|
||||
StoreStats struct {
|
||||
BytesTotal json.Number `json:"bytes_total"`
|
||||
BytesSST json.Number `json:"bytes_sst"`
|
||||
BytesLog json.Number `json:"bytes_log"`
|
||||
BytesMisc json.Number `json:"bytes_misc"`
|
||||
} `json:"store_stats"`
|
||||
} `json:"mons"`
|
||||
} `json:"health_services"`
|
||||
} `json:"health"`
|
||||
TimeChecks struct {
|
||||
Mons []struct {
|
||||
Name string `json:"name"`
|
||||
Skew json.Number `json:"skew"`
|
||||
Latency json.Number `json:"latency"`
|
||||
} `json:"mons"`
|
||||
} `json:"timechecks"`
|
||||
} `json:"health"`
|
||||
Quorum []int `json:"quorum"`
|
||||
}
|
||||
|
||||
func (m *MonitorCollector) collect() error {
|
||||
cmd := m.cephUsageCommand()
|
||||
buf, _, err := m.conn.MonCommand(cmd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stats := &cephMonitorStats{}
|
||||
if err := json.Unmarshal(buf, stats); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, healthService := range stats.Health.Health.HealthServices {
|
||||
for _, monstat := range healthService.Mons {
|
||||
kbTotal, err := monstat.KBTotal.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.TotalKBs.WithLabelValues(monstat.Name).Set(kbTotal * 1e3)
|
||||
|
||||
kbUsed, err := monstat.KBUsed.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.UsedKBs.WithLabelValues(monstat.Name).Set(kbUsed * 1e3)
|
||||
|
||||
kbAvail, err := monstat.KBAvail.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.AvailKBs.WithLabelValues(monstat.Name).Set(kbAvail * 1e3)
|
||||
|
||||
percentAvail, err := monstat.AvailPercent.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.PercentAvail.WithLabelValues(monstat.Name).Set(percentAvail)
|
||||
|
||||
storeBytes, err := monstat.StoreStats.BytesTotal.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Store.TotalBytes.WithLabelValues(monstat.Name).Set(storeBytes)
|
||||
|
||||
sstBytes, err := monstat.StoreStats.BytesSST.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Store.SSTBytes.WithLabelValues(monstat.Name).Set(sstBytes)
|
||||
|
||||
logBytes, err := monstat.StoreStats.BytesLog.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Store.LogBytes.WithLabelValues(monstat.Name).Set(logBytes)
|
||||
|
||||
miscBytes, err := monstat.StoreStats.BytesMisc.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Store.MiscBytes.WithLabelValues(monstat.Name).Set(miscBytes)
|
||||
}
|
||||
}
|
||||
|
||||
for _, monstat := range stats.Health.TimeChecks.Mons {
|
||||
skew, err := monstat.Skew.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.ClockSkew.WithLabelValues(monstat.Name).Set(skew)
|
||||
|
||||
latency, err := monstat.Latency.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Latency.WithLabelValues(monstat.Name).Set(latency)
|
||||
}
|
||||
|
||||
m.NodesinQuorum.Set(float64(len(stats.Quorum)))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MonitorCollector) cephUsageCommand() []byte {
|
||||
cmd, err := json.Marshal(map[string]interface{}{
|
||||
"prefix": "status",
|
||||
"format": "json",
|
||||
})
|
||||
if err != nil {
|
||||
// panic! because ideally in no world this hard-coded input
|
||||
// should fail.
|
||||
panic(err)
|
||||
}
|
||||
return cmd
|
||||
}
|
||||
|
||||
// Describe sends the descriptors of each Monitor related metric we have defined
|
||||
// to the channel provided.
|
||||
func (m *MonitorCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
for _, metric := range m.collectorList() {
|
||||
metric.Describe(ch)
|
||||
}
|
||||
|
||||
for _, metric := range m.metricsList() {
|
||||
ch <- metric.Desc()
|
||||
}
|
||||
}
|
||||
|
||||
// Collect extracts the given metrics from the Monitors and sends it to the prometheus
|
||||
// channel.
|
||||
func (m *MonitorCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
if err := m.collect(); err != nil {
|
||||
log.Println("failed collecting metrics:", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, metric := range m.collectorList() {
|
||||
metric.Collect(ch)
|
||||
}
|
||||
|
||||
for _, metric := range m.metricsList() {
|
||||
ch <- metric
|
||||
}
|
||||
}
|
|
@ -0,0 +1,292 @@
|
|||
// Copyright 2016 DigitalOcean
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
func TestMonitorCollector(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
input string
|
||||
regexes []*regexp.Regexp
|
||||
}{
|
||||
{
|
||||
`
|
||||
{
|
||||
"health": {
|
||||
"health": {
|
||||
"health_services": [
|
||||
{
|
||||
"mons": [
|
||||
{
|
||||
"name": "test-mon01",
|
||||
"kb_total": 412718256,
|
||||
"kb_used": 1812852,
|
||||
"kb_avail": 389917500,
|
||||
"avail_percent": 94,
|
||||
"last_updated": "2015-12-28 15:54:03.763348",
|
||||
"store_stats": {
|
||||
"bytes_total": 1781282079,
|
||||
"bytes_sst": 1,
|
||||
"bytes_log": 609694,
|
||||
"bytes_misc": 1780672385,
|
||||
"last_updated": "0.000000"
|
||||
},
|
||||
"health": "HEALTH_OK"
|
||||
},
|
||||
{
|
||||
"name": "test-mon02",
|
||||
"kb_total": 412718256,
|
||||
"kb_used": 1875304,
|
||||
"kb_avail": 389855048,
|
||||
"avail_percent": 94,
|
||||
"last_updated": "2015-12-28 15:53:53.808657",
|
||||
"store_stats": {
|
||||
"bytes_total": 1844348214,
|
||||
"bytes_sst": 2,
|
||||
"bytes_log": 871605,
|
||||
"bytes_misc": 1843476609,
|
||||
"last_updated": "0.000000"
|
||||
},
|
||||
"health": "HEALTH_OK"
|
||||
},
|
||||
{
|
||||
"name": "test-mon03",
|
||||
"kb_total": 412718256,
|
||||
"kb_used": 2095356,
|
||||
"kb_avail": 389634996,
|
||||
"avail_percent": 94,
|
||||
"last_updated": "2015-12-28 15:53:06.292749",
|
||||
"store_stats": {
|
||||
"bytes_total": 2069468587,
|
||||
"bytes_sst": 3,
|
||||
"bytes_log": 871605,
|
||||
"bytes_misc": 2068596982,
|
||||
"last_updated": "0.000000"
|
||||
},
|
||||
"health": "HEALTH_OK"
|
||||
},
|
||||
{
|
||||
"name": "test-mon04",
|
||||
"kb_total": 412718256,
|
||||
"kb_used": 1726276,
|
||||
"kb_avail": 390004076,
|
||||
"avail_percent": 94,
|
||||
"last_updated": "2015-12-28 15:53:10.770775",
|
||||
"store_stats": {
|
||||
"bytes_total": 1691972147,
|
||||
"bytes_sst": 4,
|
||||
"bytes_log": 871605,
|
||||
"bytes_misc": 1691100542,
|
||||
"last_updated": "0.000000"
|
||||
},
|
||||
"health": "HEALTH_OK"
|
||||
},
|
||||
{
|
||||
"name": "test-mon05",
|
||||
"kb_total": 412718256,
|
||||
"kb_used": 1883228,
|
||||
"kb_avail": 389847124,
|
||||
"avail_percent": 94,
|
||||
"last_updated": "2015-12-28 15:53:11.407033",
|
||||
"store_stats": {
|
||||
"bytes_total": 1852485942,
|
||||
"bytes_sst": 5,
|
||||
"bytes_log": 871605,
|
||||
"bytes_misc": 1851614337,
|
||||
"last_updated": "0.000000"
|
||||
},
|
||||
"health": "HEALTH_OK"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"timechecks": {
|
||||
"epoch": 70,
|
||||
"round": 3362,
|
||||
"round_status": "finished",
|
||||
"mons": [
|
||||
{
|
||||
"name": "test-mon01",
|
||||
"skew": 0.000000,
|
||||
"latency": 0.000000,
|
||||
"health": "HEALTH_OK"
|
||||
},
|
||||
{
|
||||
"name": "test-mon02",
|
||||
"skew": -0.000002,
|
||||
"latency": 0.000815,
|
||||
"health": "HEALTH_OK"
|
||||
},
|
||||
{
|
||||
"name": "test-mon03",
|
||||
"skew": -0.000002,
|
||||
"latency": 0.000829,
|
||||
"health": "HEALTH_OK"
|
||||
},
|
||||
{
|
||||
"name": "test-mon04",
|
||||
"skew": -0.000019,
|
||||
"latency": 0.000609,
|
||||
"health": "HEALTH_OK"
|
||||
},
|
||||
{
|
||||
"name": "test-mon05",
|
||||
"skew": -0.000628,
|
||||
"latency": 0.000659,
|
||||
"health": "HEALTH_OK"
|
||||
}
|
||||
]
|
||||
},
|
||||
"summary": [],
|
||||
"overall_status": "HEALTH_OK",
|
||||
"detail": []
|
||||
},
|
||||
"fsid": "6C9BF03E-044E-4EEB-9C5F-145A54ECF7DB",
|
||||
"election_epoch": 70,
|
||||
"quorum": [
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4
|
||||
],
|
||||
"monmap": {
|
||||
"epoch": 12,
|
||||
"fsid": "6C9BF03E-044E-4EEB-9C5F-145A54ECF7DB",
|
||||
"modified": "2015-11-25 07:58:56.388352",
|
||||
"created": "0.000000",
|
||||
"mons": [
|
||||
{
|
||||
"rank": 0,
|
||||
"name": "test-mon01",
|
||||
"addr": "10.123.1.25:6789\/0"
|
||||
},
|
||||
{
|
||||
"rank": 1,
|
||||
"name": "test-mon02",
|
||||
"addr": "10.123.1.26:6789\/0"
|
||||
},
|
||||
{
|
||||
"rank": 2,
|
||||
"name": "test-mon03",
|
||||
"addr": "10.123.2.25:6789\/0"
|
||||
},
|
||||
{
|
||||
"rank": 3,
|
||||
"name": "test-mon04",
|
||||
"addr": "10.123.2.26:6789\/0"
|
||||
},
|
||||
{
|
||||
"rank": 4,
|
||||
"name": "test-mon05",
|
||||
"addr": "10.123.2.27:6789\/0"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
`,
|
||||
[]*regexp.Regexp{
|
||||
regexp.MustCompile(`ceph_monitor_avail_bytes{monitor="test-mon01"} 3.899175e`),
|
||||
regexp.MustCompile(`ceph_monitor_avail_bytes{monitor="test-mon02"} 3.89855048e`),
|
||||
regexp.MustCompile(`ceph_monitor_avail_bytes{monitor="test-mon03"} 3.89634996e`),
|
||||
regexp.MustCompile(`ceph_monitor_avail_bytes{monitor="test-mon04"} 3.90004076e`),
|
||||
regexp.MustCompile(`ceph_monitor_avail_bytes{monitor="test-mon05"} 3.89847124e`),
|
||||
regexp.MustCompile(`ceph_monitor_avail_percent{monitor="test-mon01"} 94`),
|
||||
regexp.MustCompile(`ceph_monitor_avail_percent{monitor="test-mon02"} 94`),
|
||||
regexp.MustCompile(`ceph_monitor_avail_percent{monitor="test-mon03"} 94`),
|
||||
regexp.MustCompile(`ceph_monitor_avail_percent{monitor="test-mon04"} 94`),
|
||||
regexp.MustCompile(`ceph_monitor_avail_percent{monitor="test-mon05"} 94`),
|
||||
regexp.MustCompile(`ceph_monitor_clock_skew_seconds{monitor="test-mon01"} 0`),
|
||||
regexp.MustCompile(`ceph_monitor_clock_skew_seconds{monitor="test-mon02"} -2e-06`),
|
||||
regexp.MustCompile(`ceph_monitor_clock_skew_seconds{monitor="test-mon03"} -2e-06`),
|
||||
regexp.MustCompile(`ceph_monitor_clock_skew_seconds{monitor="test-mon04"} -1.9e-05`),
|
||||
regexp.MustCompile(`ceph_monitor_clock_skew_seconds{monitor="test-mon05"} -0.000628`),
|
||||
regexp.MustCompile(`ceph_monitor_latency_seconds{monitor="test-mon01"} 0`),
|
||||
regexp.MustCompile(`ceph_monitor_latency_seconds{monitor="test-mon02"} 0.000815`),
|
||||
regexp.MustCompile(`ceph_monitor_latency_seconds{monitor="test-mon03"} 0.000829`),
|
||||
regexp.MustCompile(`ceph_monitor_latency_seconds{monitor="test-mon04"} 0.000609`),
|
||||
regexp.MustCompile(`ceph_monitor_latency_seconds{monitor="test-mon05"} 0.000659`),
|
||||
regexp.MustCompile(`ceph_monitor_quorum_count 5`),
|
||||
regexp.MustCompile(`ceph_monitor_store_log_bytes{monitor="test-mon01"} 609694`),
|
||||
regexp.MustCompile(`ceph_monitor_store_log_bytes{monitor="test-mon02"} 871605`),
|
||||
regexp.MustCompile(`ceph_monitor_store_log_bytes{monitor="test-mon03"} 871605`),
|
||||
regexp.MustCompile(`ceph_monitor_store_log_bytes{monitor="test-mon04"} 871605`),
|
||||
regexp.MustCompile(`ceph_monitor_store_log_bytes{monitor="test-mon05"} 871605`),
|
||||
regexp.MustCompile(`ceph_monitor_store_misc_bytes{monitor="test-mon01"} 1.780672385e`),
|
||||
regexp.MustCompile(`ceph_monitor_store_misc_bytes{monitor="test-mon02"} 1.843476609e`),
|
||||
regexp.MustCompile(`ceph_monitor_store_misc_bytes{monitor="test-mon03"} 2.068596982e`),
|
||||
regexp.MustCompile(`ceph_monitor_store_misc_bytes{monitor="test-mon04"} 1.691100542e`),
|
||||
regexp.MustCompile(`ceph_monitor_store_misc_bytes{monitor="test-mon05"} 1.851614337e`),
|
||||
regexp.MustCompile(`ceph_monitor_store_sst_bytes{monitor="test-mon01"} 1`),
|
||||
regexp.MustCompile(`ceph_monitor_store_sst_bytes{monitor="test-mon02"} 2`),
|
||||
regexp.MustCompile(`ceph_monitor_store_sst_bytes{monitor="test-mon03"} 3`),
|
||||
regexp.MustCompile(`ceph_monitor_store_sst_bytes{monitor="test-mon04"} 4`),
|
||||
regexp.MustCompile(`ceph_monitor_store_sst_bytes{monitor="test-mon05"} 5`),
|
||||
regexp.MustCompile(`ceph_monitor_store_capacity_bytes{monitor="test-mon01"} 1.781282079e`),
|
||||
regexp.MustCompile(`ceph_monitor_store_capacity_bytes{monitor="test-mon02"} 1.844348214e`),
|
||||
regexp.MustCompile(`ceph_monitor_store_capacity_bytes{monitor="test-mon03"} 2.069468587e`),
|
||||
regexp.MustCompile(`ceph_monitor_store_capacity_bytes{monitor="test-mon04"} 1.691972147e`),
|
||||
regexp.MustCompile(`ceph_monitor_store_capacity_bytes{monitor="test-mon05"} 1.852485942e`),
|
||||
regexp.MustCompile(`ceph_monitor_capacity_bytes{monitor="test-mon01"} 4.12718256e`),
|
||||
regexp.MustCompile(`ceph_monitor_capacity_bytes{monitor="test-mon02"} 4.12718256e`),
|
||||
regexp.MustCompile(`ceph_monitor_capacity_bytes{monitor="test-mon03"} 4.12718256e`),
|
||||
regexp.MustCompile(`ceph_monitor_capacity_bytes{monitor="test-mon04"} 4.12718256e`),
|
||||
regexp.MustCompile(`ceph_monitor_capacity_bytes{monitor="test-mon05"} 4.12718256e`),
|
||||
regexp.MustCompile(`ceph_monitor_used_bytes{monitor="test-mon01"} 1.812852e`),
|
||||
regexp.MustCompile(`ceph_monitor_used_bytes{monitor="test-mon02"} 1.875304e`),
|
||||
regexp.MustCompile(`ceph_monitor_used_bytes{monitor="test-mon03"} 2.095356e`),
|
||||
regexp.MustCompile(`ceph_monitor_used_bytes{monitor="test-mon04"} 1.726276e`),
|
||||
regexp.MustCompile(`ceph_monitor_used_bytes{monitor="test-mon05"} 1.883228e`),
|
||||
},
|
||||
},
|
||||
} {
|
||||
func() {
|
||||
collector := NewMonitorCollector(NewNoopConn(tt.input))
|
||||
if err := prometheus.Register(collector); err != nil {
|
||||
t.Fatalf("collector failed to register: %s", err)
|
||||
}
|
||||
defer prometheus.Unregister(collector)
|
||||
|
||||
server := httptest.NewServer(prometheus.Handler())
|
||||
defer server.Close()
|
||||
|
||||
resp, err := http.Get(server.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected failed response from prometheus: %s", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
buf, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("failed reading server response: %s", err)
|
||||
}
|
||||
|
||||
for _, re := range tt.regexes {
|
||||
if !re.Match(buf) {
|
||||
t.Errorf("failed matching: %q", re)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,185 @@
|
|||
// Copyright 2016 DigitalOcean
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"log"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// PoolUsageCollector displays statistics about each pool we have created
|
||||
// in the ceph cluster.
|
||||
type PoolUsageCollector struct {
|
||||
conn Conn
|
||||
|
||||
// UsedBytes tracks the amount of bytes currently allocated for the pool. This
|
||||
// does not factor in the overcommitment made for individual images.
|
||||
UsedBytes *prometheus.GaugeVec
|
||||
|
||||
// Objects shows the no. of RADOS objects created within the pool.
|
||||
Objects *prometheus.GaugeVec
|
||||
|
||||
// ReadIO tracks the read IO calls made for the images within each pool.
|
||||
ReadIO *prometheus.CounterVec
|
||||
|
||||
// WriteIO tracks the write IO calls made for the images within each pool.
|
||||
WriteIO *prometheus.CounterVec
|
||||
}
|
||||
|
||||
// NewPoolUsageCollector creates a new instance of PoolUsageCollector and returns
|
||||
// its reference.
|
||||
func NewPoolUsageCollector(conn Conn) *PoolUsageCollector {
|
||||
return &PoolUsageCollector{
|
||||
conn: conn,
|
||||
|
||||
UsedBytes: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "pool_used_bytes",
|
||||
Help: "Capacity of the pool that is currently under use",
|
||||
},
|
||||
[]string{"pool"},
|
||||
),
|
||||
Objects: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "pool_objects_total",
|
||||
Help: "Total no. of objects allocated within the pool",
|
||||
},
|
||||
[]string{"pool"},
|
||||
),
|
||||
ReadIO: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "pool_read_total",
|
||||
Help: "Total read i/o calls the pool has been subject to",
|
||||
},
|
||||
[]string{"pool"},
|
||||
),
|
||||
WriteIO: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: cephNamespace,
|
||||
Name: "pool_write_total",
|
||||
Help: "Total write i/o calls the pool has been subject to",
|
||||
},
|
||||
[]string{"pool"},
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
func (p *PoolUsageCollector) collectorList() []prometheus.Collector {
|
||||
return []prometheus.Collector{
|
||||
p.UsedBytes,
|
||||
p.Objects,
|
||||
p.ReadIO,
|
||||
p.WriteIO,
|
||||
}
|
||||
}
|
||||
|
||||
type cephPoolStats struct {
|
||||
Pools []struct {
|
||||
Name string `json:"name"`
|
||||
ID int `json:"id"`
|
||||
Stats struct {
|
||||
BytesUsed json.Number `json:"bytes_used"`
|
||||
Objects json.Number `json:"objects"`
|
||||
Read json.Number `json:"rd"`
|
||||
Write json.Number `json:"wr"`
|
||||
} `json:"stats"`
|
||||
} `json:"pools"`
|
||||
}
|
||||
|
||||
func (p *PoolUsageCollector) collect() error {
|
||||
cmd := p.cephUsageCommand()
|
||||
buf, _, err := p.conn.MonCommand(cmd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stats := &cephPoolStats{}
|
||||
if err := json.Unmarshal(buf, stats); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(stats.Pools) < 1 {
|
||||
return errors.New("no pools found in the cluster to report stats on")
|
||||
}
|
||||
|
||||
for _, pool := range stats.Pools {
|
||||
bytesUsed, err := pool.Stats.BytesUsed.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.UsedBytes.WithLabelValues(pool.Name).Set(bytesUsed)
|
||||
|
||||
objects, err := pool.Stats.Objects.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.Objects.WithLabelValues(pool.Name).Set(objects)
|
||||
|
||||
read, err := pool.Stats.Read.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.ReadIO.WithLabelValues(pool.Name).Set(read)
|
||||
|
||||
write, err := pool.Stats.Write.Float64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.WriteIO.WithLabelValues(pool.Name).Set(write)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *PoolUsageCollector) cephUsageCommand() []byte {
|
||||
cmd, err := json.Marshal(map[string]interface{}{
|
||||
"prefix": "df",
|
||||
"detail": "detail",
|
||||
"format": "json",
|
||||
})
|
||||
if err != nil {
|
||||
// panic! because ideally in no world this hard-coded input
|
||||
// should fail.
|
||||
panic(err)
|
||||
}
|
||||
return cmd
|
||||
}
|
||||
|
||||
// Describe fulfills the prometheus.Collector's interface and sends the descriptors
|
||||
// of pool's metrics to the given channel.
|
||||
func (p *PoolUsageCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
for _, metric := range p.collectorList() {
|
||||
metric.Describe(ch)
|
||||
}
|
||||
}
|
||||
|
||||
// Collect extracts the current values of all the metrics and sends them to the
|
||||
// prometheus channel.
|
||||
func (p *PoolUsageCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
if err := p.collect(); err != nil {
|
||||
log.Println("failed collecting metrics:", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, metric := range p.collectorList() {
|
||||
metric.Collect(ch)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
// Copyright 2016 DigitalOcean
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
func TestPoolUsageCollector(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
input string
|
||||
regexes []*regexp.Regexp
|
||||
}{
|
||||
{
|
||||
`
|
||||
{"pools": [
|
||||
{"name": "rbd", "id": 11, "stats": {"bytes_used": 20, "objects": 5, "rd": 4, "wr": 6}}
|
||||
]}`,
|
||||
[]*regexp.Regexp{
|
||||
regexp.MustCompile(`pool_used_bytes{pool="rbd"} 20`),
|
||||
regexp.MustCompile(`pool_objects_total{pool="rbd"} 5`),
|
||||
regexp.MustCompile(`pool_read_total{pool="rbd"} 4`),
|
||||
regexp.MustCompile(`pool_write_total{pool="rbd"} 6`),
|
||||
},
|
||||
},
|
||||
} {
|
||||
func() {
|
||||
collector := NewPoolUsageCollector(NewNoopConn(tt.input))
|
||||
if err := prometheus.Register(collector); err != nil {
|
||||
t.Fatalf("collector failed to register: %s", err)
|
||||
}
|
||||
defer prometheus.Unregister(collector)
|
||||
|
||||
server := httptest.NewServer(prometheus.Handler())
|
||||
defer server.Close()
|
||||
|
||||
resp, err := http.Get(server.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected failed response from prometheus: %s", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
buf, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("failed reading server response: %s", err)
|
||||
}
|
||||
|
||||
for _, re := range tt.regexes {
|
||||
if !re.Match(buf) {
|
||||
t.Errorf("failed matching: %q", re)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,115 @@
|
|||
// Copyright 2016 DigitalOcean
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"log"
|
||||
"net/http"
|
||||
"sync"
|
||||
|
||||
ceph_collectors "github.com/digitalocean/ceph_exporter/collectors"
|
||||
|
||||
"github.com/ceph/go-ceph/rados"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// CephExporter wraps all the ceph collectors and provides a single global
|
||||
// exporter to extracts metrics out of. It also ensures that the collection
|
||||
// is done in a thread-safe manner, the necessary requirement stated by
|
||||
// prometheus. It also implements a prometheus.Collector interface in order
|
||||
// to register it correctly.
|
||||
type CephExporter struct {
|
||||
mu sync.Mutex
|
||||
collectors []prometheus.Collector
|
||||
}
|
||||
|
||||
// Verify that the exporter implements the interface correctly.
|
||||
var _ prometheus.Collector = &CephExporter{}
|
||||
|
||||
// NewCephExporter creates an instance to CephExporter and returns a reference
|
||||
// to it. We can choose to enable a collector to extract stats out of by adding
|
||||
// it to the list of collectors.
|
||||
func NewCephExporter(conn *rados.Conn) *CephExporter {
|
||||
return &CephExporter{
|
||||
collectors: []prometheus.Collector{
|
||||
ceph_collectors.NewClusterUsageCollector(conn),
|
||||
ceph_collectors.NewPoolUsageCollector(conn),
|
||||
ceph_collectors.NewClusterHealthCollector(conn),
|
||||
ceph_collectors.NewMonitorCollector(conn),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Describe sends all the descriptors of the collectors included to
|
||||
// the provided channel.
|
||||
func (c *CephExporter) Describe(ch chan<- *prometheus.Desc) {
|
||||
for _, cc := range c.collectors {
|
||||
cc.Describe(ch)
|
||||
}
|
||||
}
|
||||
|
||||
// Collect sends the collected metrics from each of the collectors to
|
||||
// prometheus. Collect could be called several times concurrently
|
||||
// and thus its run is protected by a single mutex.
|
||||
func (c *CephExporter) Collect(ch chan<- prometheus.Metric) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
for _, cc := range c.collectors {
|
||||
cc.Collect(ch)
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
var (
|
||||
addr = flag.String("telemetry.addr", ":9190", "host:port for ceph exporter")
|
||||
metricsPath = flag.String("telemetry.path", "/metrics", "URL path for surfacing collected metrics")
|
||||
|
||||
cephConfig = flag.String("ceph.config", "", "path to ceph config file")
|
||||
)
|
||||
flag.Parse()
|
||||
|
||||
conn, err := rados.NewConn()
|
||||
if err != nil {
|
||||
log.Fatalf("cannot create new ceph connection: %s", err)
|
||||
}
|
||||
|
||||
if *cephConfig != "" {
|
||||
err = conn.ReadConfigFile(*cephConfig)
|
||||
} else {
|
||||
err = conn.ReadDefaultConfigFile()
|
||||
}
|
||||
if err != nil {
|
||||
log.Fatalf("cannot read ceph config file: %s", err)
|
||||
}
|
||||
|
||||
if err := conn.Connect(); err != nil {
|
||||
log.Fatalf("cannot connect to ceph cluster: %s", err)
|
||||
}
|
||||
defer conn.Shutdown()
|
||||
|
||||
prometheus.MustRegister(NewCephExporter(conn))
|
||||
|
||||
http.Handle(*metricsPath, prometheus.Handler())
|
||||
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||
http.Redirect(w, r, *metricsPath, http.StatusMovedPermanently)
|
||||
})
|
||||
|
||||
log.Printf("Starting ceph exporter on %q", *addr)
|
||||
if err := http.ListenAndServe(*addr, nil); err != nil {
|
||||
log.Fatalf("cannot start ceph exporter: %s", err)
|
||||
}
|
||||
}
|
Binary file not shown.
After Width: | Height: | Size: 72 KiB |
Loading…
Reference in New Issue