commit
00c0dacc02
17
Makefile
17
Makefile
|
@ -1,17 +0,0 @@
|
|||
# Copyright 2015 The Prometheus Authors
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
VERSION := 0.1.0
|
||||
TARGET := ceph_exporter
|
||||
|
||||
include Makefile.COMMON
|
132
Makefile.COMMON
132
Makefile.COMMON
|
@ -1,132 +0,0 @@
|
|||
# Copyright 2015 The Prometheus Authors
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# THE AUTHORITATIVE VERSION OF THIS MAKEFILE LIVES IN:
|
||||
#
|
||||
# https://github.com/prometheus/utils
|
||||
#
|
||||
# PLEASE MAKE ANY CHANGES THERE AND PROPAGATE THEM TO ALL PROMETHEUS
|
||||
# REPOSITORIES THAT ARE USING THIS MAKEFILE.
|
||||
#
|
||||
# This file provides common Makefile infrastructure for several Prometheus
|
||||
# components. This includes make tasks for downloading Go, setting up a
|
||||
# self-contained build environment, fetching Go dependencies, building
|
||||
# binaries, running tests, and doing release management. This file is intended
|
||||
# to be included from a project's Makefile, which needs to define the following
|
||||
# variables, at a minimum:
|
||||
#
|
||||
# * VERSION - The current version of the project in question.
|
||||
# * TARGET - The desired name of the built binary.
|
||||
#
|
||||
# Many of the variables defined below are defined conditionally (using '?'),
|
||||
# which allows the project's main Makefile to override any of these settings, if
|
||||
# needed. See also:
|
||||
#
|
||||
# https://www.gnu.org/software/make/manual/html_node/Flavors.html#Flavors.
|
||||
#
|
||||
# The including Makefile may define any number of extra targets that are
|
||||
# specific to that project.
|
||||
|
||||
VERSION ?= $(error VERSION not set in including Makefile)
|
||||
TARGET ?= $(error TARGET not set in including Makefile)
|
||||
|
||||
SRC ?= $(shell find . -type f -name "*.go" ! -path "./.build/*")
|
||||
GOOS ?= $(shell uname | tr A-Z a-z)
|
||||
GOARCH ?= $(subst x86_64,amd64,$(patsubst i%86,386,$(shell uname -m)))
|
||||
|
||||
GO_VERSION ?= 1.13
|
||||
|
||||
# Check for the correct version of go in the path. If we find it, use it.
|
||||
# Otherwise, prepare to build go locally.
|
||||
ifeq ($(shell command -v "go" >/dev/null && go version | sed -e 's/^[^0-9.]*\([0-9.]*\).*/\1/'), $(GO_VERSION))
|
||||
GOCC ?= $(shell command -v "go")
|
||||
GOFMT ?= $(shell command -v "gofmt")
|
||||
GO ?= $(GOCC)
|
||||
else
|
||||
GOURL ?= https://golang.org/dl
|
||||
GOPKG ?= go$(GO_VERSION).$(GOOS)-$(GOARCH).tar.gz
|
||||
GOROOT ?= $(CURDIR)/.build/go$(GO_VERSION)
|
||||
GOCC ?= $(GOROOT)/bin/go
|
||||
GOFMT ?= $(GOROOT)/bin/gofmt
|
||||
GO ?= GOROOT=$(GOROOT) $(GOCC)
|
||||
endif
|
||||
|
||||
# Use vendored dependencies if available. Otherwise try to download them.
|
||||
ifneq (,$(wildcard vendor))
|
||||
DEPENDENCIES := $(shell find vendor/ -type f -iname '*.go')
|
||||
GO := GO15VENDOREXPERIMENT=1 $(GO)
|
||||
else
|
||||
GOPATH := $(CURDIR)/.build/gopath
|
||||
ROOTPKG ?= github.com/prometheus/$(TARGET)
|
||||
SELFLINK ?= $(GOPATH)/src/$(ROOTPKG)
|
||||
DEPENDENCIES := dependencies-stamp
|
||||
GO := GOPATH=$(GOPATH) $(GO)
|
||||
endif
|
||||
|
||||
# Never honor GOBIN, should it be set at all.
|
||||
unexport GOBIN
|
||||
|
||||
SUFFIX ?= $(GOOS)-$(GOARCH)
|
||||
BINARY ?= $(TARGET)
|
||||
ARCHIVE ?= $(TARGET)-$(VERSION).$(SUFFIX).tar.gz
|
||||
|
||||
default: $(BINARY)
|
||||
|
||||
$(BINARY): $(GOCC) $(SRC) $(DEPENDENCIES) Makefile Makefile.COMMON
|
||||
$(GO) build $(GOFLAGS) -o $@
|
||||
|
||||
.PHONY: archive
|
||||
archive: $(ARCHIVE)
|
||||
|
||||
$(ARCHIVE): $(BINARY)
|
||||
tar -czf $@ $<
|
||||
|
||||
.PHONY: tag
|
||||
tag:
|
||||
git tag $(VERSION)
|
||||
git push --tags
|
||||
|
||||
.PHONY: test
|
||||
test: $(GOCC) $(DEPENDENCIES)
|
||||
$(GO) test $$($(GO) list ./... | grep -v /vendor/)
|
||||
|
||||
.PHONY: format
|
||||
format: $(GOCC)
|
||||
find . -iname '*.go' | egrep -v "^\./\.build|./generated|\./vendor|\.(l|y)\.go" | xargs -n1 $(GOFMT) -w -s=true
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -rf $(BINARY) $(ARCHIVE) .build *-stamp
|
||||
|
||||
|
||||
|
||||
$(GOCC):
|
||||
@echo Go version $(GO_VERSION) required but not found in PATH.
|
||||
@echo About to download and install go$(GO_VERSION) to $(GOROOT)
|
||||
@echo Abort now if you want to manually install it system-wide instead.
|
||||
@echo
|
||||
@sleep 5
|
||||
mkdir -p .build
|
||||
# The archive contains a single directory called 'go/'.
|
||||
curl -L $(GOURL)/$(GOPKG) | tar -C .build -xzf -
|
||||
rm -rf $(GOROOT)
|
||||
mv .build/go $(GOROOT)
|
||||
|
||||
$(SELFLINK):
|
||||
mkdir -p $(dir $@)
|
||||
ln -s $(CURDIR) $@
|
||||
|
||||
# Download dependencies if project doesn't vendor them.
|
||||
dependencies-stamp: $(GOCC) $(SRC) | $(SELFLINK)
|
||||
$(GO) get -d
|
||||
touch $@
|
38
README.md
38
README.md
|
@ -1,4 +1,4 @@
|
|||
# Ceph Exporter [![GoDoc](https://godoc.org/github.com/digitalocean/ceph_exporter?status.svg)](https://godoc.org/github.com/digitalocean/ceph_exporter) [![Build Status](https://travis-ci.org/digitalocean/ceph_exporter.svg)](https://travis-ci.org/digitalocean/ceph_exporter) [![Coverage Status](https://coveralls.io/repos/github/digitalocean/ceph_exporter/badge.svg?branch=master&service=github)](https://coveralls.io/github/digitalocean/ceph_exporter?branch=master) [![Go Report Card](https://goreportcard.com/badge/digitalocean/ceph_exporter)](https://goreportcard.com/report/digitalocean/ceph_exporter)
|
||||
# Ceph Exporter [![GoDoc](https://godoc.org/github.com/digitalocean/ceph_exporter?status.svg)](https://godoc.org/github.com/digitalocean/ceph_exporter) ![build](https://github.com/digitalocean/ceph_exporter/actions/workflows/run_build.yml/badge.svg) ![tests](https://github.com/digitalocean/ceph_exporter/actions/workflows/run_tests.yml/badge.svg) [![Go Report Card](https://goreportcard.com/badge/digitalocean/ceph_exporter)](https://goreportcard.com/report/digitalocean/ceph_exporter)
|
||||
|
||||
A Prometheus exporter that scrapes meta information about a running Ceph
|
||||
cluster. All the information gathered from the cluster is done by interacting
|
||||
|
@ -30,32 +30,38 @@ variables:
|
|||
We use Ceph's [official Golang client](https://github.com/ceph/go-ceph) to run
|
||||
commands on the cluster.
|
||||
|
||||
This `ceph_exporter` branch is tested only on Ceph Nautilus releases. It might
|
||||
This `ceph_exporter` branch currently supports the Nautilus, Octopus (untested), and Pacific releases. It might
|
||||
not work as expected with older or non-LTS versions of Ceph.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
Name | Description | Default
|
||||
---- | ---- | ----
|
||||
`TELEMETRY_ADDR` | Host:Port for ceph_exporter's metrics endpoint | `*:9128`
|
||||
`TELEMETRY_PATH` | URL Path for surfacing metrics to Prometheus | `/metrics`
|
||||
`EXPORTER_CONFIG` | Path to ceph_exporter configuration file | `/etc/ceph/exporter.yml`
|
||||
`RGW_MODE` | Enable collection of stats from RGW (0:disabled 1:enabled 2:background) | `0`
|
||||
`CEPH_CLUSTER` | Ceph cluster name | `ceph`
|
||||
`CEPH_CONFIG` | Path to Ceph configuration file | `/etc/ceph/ceph.conf`
|
||||
`CEPH_USER` | Ceph user to connect to cluster | `admin`
|
||||
`CEPH_RADOS_OP_TIMEOUT` | Ceph rados_osd_op_timeout and rados_mon_op_timeout used to contact cluster (0s means no limit) | `30s`
|
||||
`LOG_LEVEL` | logging level. One of: [trace, debug, info, warn, error, fatal, panic] | `info`
|
||||
| Name | Description | Default |
|
||||
|-------------------------|------------------------------------------------------------------------------------------------|--------------------------|
|
||||
| `TELEMETRY_ADDR` | Host:Port for ceph_exporter's metrics endpoint | `*:9128` |
|
||||
| `TELEMETRY_PATH` | URL Path for surfacing metrics to Prometheus | `/metrics` |
|
||||
| `EXPORTER_CONFIG` | Path to ceph_exporter configuration file | `/etc/ceph/exporter.yml` |
|
||||
| `RGW_MODE` | Enable collection of stats from RGW (0:disabled 1:enabled 2:background) | `0` |
|
||||
| `CEPH_CLUSTER` | Ceph cluster name | `ceph` |
|
||||
| `CEPH_CONFIG` | Path to Ceph configuration file | `/etc/ceph/ceph.conf` |
|
||||
| `CEPH_USER` | Ceph user to connect to cluster | `admin` |
|
||||
| `CEPH_RADOS_OP_TIMEOUT` | Ceph rados_osd_op_timeout and rados_mon_op_timeout used to contact cluster (0s means no limit) | `30s` |
|
||||
| `LOG_LEVEL` | Logging level. One of: [trace, debug, info, warn, error, fatal, panic] | `info` |
|
||||
| `TLS_CERT_FILE_PATH` | Path to the x509 certificate file for enabling TLS (the key file path must also be specified) | |
|
||||
| `TLS_KEY_FILE_PATH` | Path to the x509 key file for enabling TLS (the cert file path must also be specified) | |
|
||||
|
||||
## Installation
|
||||
|
||||
Typical way of installing in Go should work.
|
||||
The typical Go way of installing or building should work provided you have the [cgo dependencies](https://github.com/ceph/go-ceph#installation).
|
||||
|
||||
```
|
||||
$ go install
|
||||
$ go install -tags nautilus
|
||||
```
|
||||
|
||||
A Makefile is provided in case you find a need for it.
|
||||
```
|
||||
$ go build -o ceph_exporter -tags nautilus
|
||||
```
|
||||
|
||||
We build the client with support for nautilus specifically but the binary will work for Octopus and Pacific as well.
|
||||
|
||||
## Docker Image
|
||||
|
||||
|
|
|
@ -278,7 +278,7 @@ func NewClusterHealthCollector(exporter *Exporter) *ClusterHealthCollector {
|
|||
labels := make(prometheus.Labels)
|
||||
labels["cluster"] = exporter.Cluster
|
||||
|
||||
return &ClusterHealthCollector{
|
||||
collector := &ClusterHealthCollector{
|
||||
conn: exporter.Conn,
|
||||
logger: exporter.Logger,
|
||||
version: exporter.Version,
|
||||
|
@ -898,6 +898,15 @@ func NewClusterHealthCollector(exporter *Exporter) *ClusterHealthCollector {
|
|||
labels,
|
||||
),
|
||||
}
|
||||
|
||||
if exporter.Version.IsAtLeast(Pacific) {
|
||||
// pacific adds the DAEMON_OLD_VERSION health check
|
||||
// that indicates that multiple versions of Ceph have been running for longer than mon_warn_older_version_delay
|
||||
// we'll interpret this is a critical warning (2)
|
||||
collector.healthChecksMap["DAEMON_OLD_VERSION"] = 2
|
||||
}
|
||||
|
||||
return collector
|
||||
}
|
||||
|
||||
func (c *ClusterHealthCollector) metricsList() []prometheus.Metric {
|
||||
|
@ -989,9 +998,8 @@ type cephHealthStats struct {
|
|||
Severity string `json:"severity"`
|
||||
Summary string `json:"summary"`
|
||||
} `json:"summary"`
|
||||
OverallStatus string `json:"overall_status"`
|
||||
Status string `json:"status"`
|
||||
Checks map[string]struct {
|
||||
Status string `json:"status"`
|
||||
Checks map[string]struct {
|
||||
Severity string `json:"severity"`
|
||||
Summary struct {
|
||||
Message string `json:"message"`
|
||||
|
@ -1034,18 +1042,6 @@ type cephHealthStats struct {
|
|||
} `json:"servicemap"`
|
||||
}
|
||||
|
||||
type cephHealthDetailStats struct {
|
||||
Checks map[string]struct {
|
||||
Details []struct {
|
||||
Message string `json:"message"`
|
||||
} `json:"detail"`
|
||||
Summary struct {
|
||||
Message string `json:"message"`
|
||||
} `json:"summary"`
|
||||
Severity string `json:"severity"`
|
||||
} `json:"checks"`
|
||||
}
|
||||
|
||||
func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
||||
cmd := c.cephUsageCommand(jsonFormat)
|
||||
buf, _, err := c.conn.MonCommand(cmd)
|
||||
|
@ -1068,23 +1064,6 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
|||
}
|
||||
}
|
||||
|
||||
switch stats.Health.OverallStatus {
|
||||
case CephHealthOK:
|
||||
c.HealthStatus.Set(0)
|
||||
c.HealthStatusInterpreter.Set(0)
|
||||
case CephHealthWarn:
|
||||
c.HealthStatus.Set(1)
|
||||
c.HealthStatusInterpreter.Set(2)
|
||||
case CephHealthErr:
|
||||
c.HealthStatus.Set(2)
|
||||
c.HealthStatusInterpreter.Set(3)
|
||||
default:
|
||||
c.HealthStatus.Set(2)
|
||||
c.HealthStatusInterpreter.Set(3)
|
||||
}
|
||||
|
||||
// This will be set only if Luminous is running. Will be
|
||||
// ignored otherwise.
|
||||
switch stats.Health.Status {
|
||||
case CephHealthOK:
|
||||
c.HealthStatus.Set(0)
|
||||
|
@ -1435,18 +1414,6 @@ func (c *ClusterHealthCollector) cephUsageCommand(f format) []byte {
|
|||
return cmd
|
||||
}
|
||||
|
||||
func (c *ClusterHealthCollector) cephHealthDetailCommand() []byte {
|
||||
cmd, err := json.Marshal(map[string]interface{}{
|
||||
"prefix": "health",
|
||||
"detail": "detail",
|
||||
"format": jsonFormat,
|
||||
})
|
||||
if err != nil {
|
||||
c.logger.WithError(err).Panic("error marshalling ceph health detail")
|
||||
}
|
||||
return cmd
|
||||
}
|
||||
|
||||
func (c *ClusterHealthCollector) collectRecoveryClientIO() error {
|
||||
cmd := c.cephUsageCommand(plainFormat)
|
||||
buf, _, err := c.conn.MonCommand(cmd)
|
||||
|
|
|
@ -192,7 +192,7 @@ func TestClusterHealthCollector(t *testing.T) {
|
|||
"num_remapped_pgs": 10
|
||||
}
|
||||
},
|
||||
"health": { "overall_status": "HEALTH_OK" } }`,
|
||||
"health": { "status": "HEALTH_OK" } }`,
|
||||
reMatch: []*regexp.Regexp{
|
||||
regexp.MustCompile(`health_status{cluster="ceph"} 0`),
|
||||
},
|
||||
|
@ -209,7 +209,7 @@ func TestClusterHealthCollector(t *testing.T) {
|
|||
"num_remapped_pgs": 10
|
||||
}
|
||||
},
|
||||
"health": { "overall_status": "HEALTH_WARN", "status": "HEALTH_OK } }`,
|
||||
"health": { "status": "HEALTH_OK } }`,
|
||||
reMatch: []*regexp.Regexp{
|
||||
regexp.MustCompile(`health_status{cluster="ceph"} 0`),
|
||||
regexp.MustCompile(`health_status_interp{cluster="ceph"} 0`),
|
||||
|
@ -245,7 +245,7 @@ func TestClusterHealthCollector(t *testing.T) {
|
|||
"num_remapped_pgs": 10
|
||||
}
|
||||
},
|
||||
"health": { "overall_status": "HEALTH_WARN" } }`,
|
||||
"health": { "status": "HEALTH_WARN" } }`,
|
||||
reMatch: []*regexp.Regexp{
|
||||
regexp.MustCompile(`health_status{cluster="ceph"} 1`),
|
||||
regexp.MustCompile(`health_status_interp{cluster="ceph"} 2`),
|
||||
|
@ -263,7 +263,7 @@ func TestClusterHealthCollector(t *testing.T) {
|
|||
"num_remapped_pgs": 10
|
||||
}
|
||||
},
|
||||
"health": { "overall_status": "HEALTH_ERR" } }`,
|
||||
"health": { "status": "HEALTH_ERR" } }`,
|
||||
reMatch: []*regexp.Regexp{
|
||||
regexp.MustCompile(`health_status{cluster="ceph"} 2`),
|
||||
regexp.MustCompile(`health_status_interp{cluster="ceph"} 3`),
|
||||
|
@ -815,8 +815,7 @@ $ sudo ceph -s
|
|||
[]byte(tt.input), "", nil,
|
||||
)
|
||||
|
||||
collector := NewClusterHealthCollector(&Exporter{Conn: conn, Cluster: "ceph", Logger: logrus.New()})
|
||||
collector.version = &Version{Major: 14, Minor: 2, Patch: 0}
|
||||
collector := NewClusterHealthCollector(&Exporter{Conn: conn, Cluster: "ceph", Logger: logrus.New(), Version: &Version{Major: 14, Minor: 2, Patch: 0}})
|
||||
err := prometheus.Register(collector)
|
||||
require.NoError(t, err)
|
||||
defer prometheus.Unregister(collector)
|
||||
|
|
88
ceph/osd.go
88
ceph/osd.go
|
@ -5,8 +5,6 @@ import (
|
|||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
|
@ -540,19 +538,16 @@ type cephOSDTree struct {
|
|||
} `json:"stray"`
|
||||
}
|
||||
|
||||
type osdNode struct {
|
||||
ID int64 `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
type cephOSDTreeDown struct {
|
||||
Nodes []struct {
|
||||
ID int64 `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Status string `json:"status"`
|
||||
} `json:"nodes"`
|
||||
Stray []struct {
|
||||
ID int64 `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Status string `json:"status"`
|
||||
} `json:"stray"`
|
||||
Nodes []osdNode `json:"nodes"`
|
||||
Stray []osdNode `json:"stray"`
|
||||
}
|
||||
|
||||
type cephPGDumpBrief struct {
|
||||
|
@ -564,24 +559,6 @@ type cephPGDumpBrief struct {
|
|||
} `json:"pg_stats"`
|
||||
}
|
||||
|
||||
type cephPGQuery struct {
|
||||
State string `json:"state"`
|
||||
Info struct {
|
||||
Stats struct {
|
||||
StatSum struct {
|
||||
NumObjectsRecovered int64 `json:"num_objects_recovered"`
|
||||
} `json:"stat_sum"`
|
||||
} `json:"stats"`
|
||||
} `json:"info"`
|
||||
RecoveryState []struct {
|
||||
Name string `json:"name"`
|
||||
EnterTime string `json:"enter_time"`
|
||||
RecoverProgress *struct {
|
||||
BackfillTargets []string `json:"backfill_targets"`
|
||||
} `json:"recovery_progress"`
|
||||
} `json:"recovery_state"`
|
||||
}
|
||||
|
||||
type cephOSDLabel struct {
|
||||
ID int64 `json:"id"`
|
||||
Name string `json:"name"`
|
||||
|
@ -595,40 +572,6 @@ type cephOSDLabel struct {
|
|||
parent int64 // parent id when building tables
|
||||
}
|
||||
|
||||
// backfillTargets returns a map from PG query result containing OSDs and
|
||||
// corresponding shards that are being backfilled.
|
||||
func (c cephPGQuery) backfillTargets() map[int64]int64 {
|
||||
osdRegExp := regexp.MustCompile(`^(\d+)\((\d+)\)$`)
|
||||
targets := make(map[int64]int64)
|
||||
|
||||
for _, state := range c.RecoveryState {
|
||||
if state.RecoverProgress == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, osd := range state.RecoverProgress.BackfillTargets {
|
||||
m := osdRegExp.FindStringSubmatch(osd)
|
||||
if m == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
osdID, err := strconv.ParseInt(m[1], 10, 64)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
shard, err := strconv.ParseInt(m[2], 10, 64)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
targets[osdID] = shard
|
||||
}
|
||||
}
|
||||
|
||||
return targets
|
||||
}
|
||||
|
||||
func (o *OSDCollector) collectOSDDF() error {
|
||||
args := o.cephOSDDFCommand()
|
||||
buf, _, err := o.conn.MgrCommand(args)
|
||||
|
@ -904,7 +847,6 @@ func (o *OSDCollector) collectOSDTreeDown(ch chan<- prometheus.Metric) error {
|
|||
}
|
||||
|
||||
downItems := append(osdDown.Nodes, osdDown.Stray...)
|
||||
|
||||
for _, downItem := range downItems {
|
||||
if downItem.Type != "osd" {
|
||||
continue
|
||||
|
@ -1118,18 +1060,6 @@ func (o *OSDCollector) cephPGDumpCommand() [][]byte {
|
|||
return [][]byte{cmd}
|
||||
}
|
||||
|
||||
func (o *OSDCollector) cephPGQueryCommand(pgid string) []byte {
|
||||
cmd, err := json.Marshal(map[string]interface{}{
|
||||
"prefix": "query",
|
||||
"pgid": pgid,
|
||||
"format": jsonFormat,
|
||||
})
|
||||
if err != nil {
|
||||
o.logger.WithError(err).Panic("error marshalling ceph pg query")
|
||||
}
|
||||
return cmd
|
||||
}
|
||||
|
||||
func (o *OSDCollector) collectPGStates(ch chan<- prometheus.Metric) error {
|
||||
// - See if there are PGs that we're tracking that are now active
|
||||
// - See if there are new ones to add
|
||||
|
|
27
ceph/pool.go
27
ceph/pool.go
|
@ -16,6 +16,7 @@ package ceph
|
|||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"math"
|
||||
"strconv"
|
||||
|
||||
|
@ -273,23 +274,29 @@ func (p *PoolInfoCollector) Collect(ch chan<- prometheus.Metric) {
|
|||
}
|
||||
|
||||
func (p *PoolInfoCollector) getExpansionFactor(pool poolInfo) float64 {
|
||||
if ef, ok := p.getECExpansionFactor(pool); ok {
|
||||
ef, err := p.getECExpansionFactor(pool)
|
||||
if err == nil {
|
||||
return ef
|
||||
} else {
|
||||
// Non-EC pool (or unable to get profile info); assume that it's replicated.
|
||||
logrus.WithError(err).Warn("failed to get ec expansion factor")
|
||||
return pool.ActualSize
|
||||
}
|
||||
// Non-EC pool (or unable to get profile info); assume that it's replicated.
|
||||
return pool.ActualSize
|
||||
}
|
||||
|
||||
func (p *PoolInfoCollector) getECExpansionFactor(pool poolInfo) (float64, bool) {
|
||||
func (p *PoolInfoCollector) getECExpansionFactor(pool poolInfo) (float64, error) {
|
||||
cmd, err := json.Marshal(map[string]interface{}{
|
||||
"prefix": "osd erasure-code-profile get",
|
||||
"name": pool.Profile,
|
||||
"format": "json",
|
||||
})
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
buf, _, err := p.conn.MonCommand(cmd)
|
||||
if err != nil {
|
||||
return -1, false
|
||||
return -1, err
|
||||
}
|
||||
|
||||
type ecInfo struct {
|
||||
|
@ -299,8 +306,12 @@ func (p *PoolInfoCollector) getECExpansionFactor(pool poolInfo) (float64, bool)
|
|||
|
||||
ecStats := ecInfo{}
|
||||
err = json.Unmarshal(buf, &ecStats)
|
||||
if err != nil || ecStats.K == "" || ecStats.M == "" {
|
||||
return -1, false
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
if ecStats.K == "" || ecStats.M == "" {
|
||||
return -1, errors.New("missing stats")
|
||||
}
|
||||
|
||||
k, _ := strconv.ParseFloat(ecStats.K, 64)
|
||||
|
@ -308,7 +319,7 @@ func (p *PoolInfoCollector) getECExpansionFactor(pool poolInfo) (float64, bool)
|
|||
|
||||
expansionFactor := (k + m) / k
|
||||
roundedExpansion := math.Round(expansionFactor*100) / 100
|
||||
return roundedExpansion, true
|
||||
return roundedExpansion, nil
|
||||
}
|
||||
|
||||
func (p *PoolInfoCollector) getCrushRuleToRootMappings() map[int64]string {
|
||||
|
|
|
@ -162,7 +162,7 @@ func (r *RGWCollector) collect() error {
|
|||
return err
|
||||
}
|
||||
|
||||
tasks := make([]rgwTaskGC, 0, 0)
|
||||
tasks := make([]rgwTaskGC, 0)
|
||||
err = json.Unmarshal(data, &tasks)
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
35
main.go
35
main.go
|
@ -16,7 +16,7 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"crypto/tls"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
|
@ -39,10 +39,6 @@ const (
|
|||
defaultRadosOpTimeout = 30 * time.Second
|
||||
)
|
||||
|
||||
var (
|
||||
errCephVersionUnsupported = errors.New("ceph version unsupported")
|
||||
)
|
||||
|
||||
// This horrible thing is a copy of tcpKeepAliveListener, tweaked to
|
||||
// specifically check if it hits EMFILE when doing an accept, and if so,
|
||||
// terminate the process.
|
||||
|
@ -85,6 +81,9 @@ func main() {
|
|||
cephConfig = envflag.String("CEPH_CONFIG", defaultCephConfigPath, "Path to Ceph config file")
|
||||
cephUser = envflag.String("CEPH_USER", defaultCephUser, "Ceph user to connect to cluster")
|
||||
cephRadosOpTimeout = envflag.Duration("CEPH_RADOS_OP_TIMEOUT", defaultRadosOpTimeout, "Ceph rados_osd_op_timeout and rados_mon_op_timeout used to contact cluster (0s means no limit)")
|
||||
|
||||
tlsCertPath = envflag.String("TLS_CERT_FILE_PATH", "", "Path to certificate file for TLS")
|
||||
tlsKeyPath = envflag.String("TLS_KEY_FILE_PATH", "", "Path to key file for TLS")
|
||||
)
|
||||
|
||||
envflag.Parse()
|
||||
|
@ -157,8 +156,28 @@ func main() {
|
|||
logrus.WithError(err).Fatal("error creating listener")
|
||||
}
|
||||
|
||||
err = http.Serve(emfileAwareTcpListener{ln.(*net.TCPListener), logger}, nil)
|
||||
if err != nil {
|
||||
logrus.WithError(err).Fatal("error serving requests")
|
||||
if len(*tlsCertPath) != 0 && len(*tlsKeyPath) != 0 {
|
||||
server := &http.Server{
|
||||
TLSConfig: &tls.Config{
|
||||
GetCertificate: func(info *tls.ClientHelloInfo) (*tls.Certificate, error) {
|
||||
caFiles, err := tls.LoadX509KeyPair(*tlsCertPath, *tlsKeyPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &caFiles, nil
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
err = server.ServeTLS(emfileAwareTcpListener{ln.(*net.TCPListener), logger}, "", "")
|
||||
if err != nil {
|
||||
logrus.WithError(err).Fatal("error serving TLS requests")
|
||||
}
|
||||
} else {
|
||||
err = http.Serve(emfileAwareTcpListener{ln.(*net.TCPListener), logger}, nil)
|
||||
if err != nil {
|
||||
logrus.WithError(err).Fatal("error serving requests")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue