mirror of
https://github.com/prometheus/prometheus
synced 2025-01-11 17:19:45 +00:00
Add discovery support for triton compute nodes (#7250)
Added optional configuration item role, defaults to 'container' (backwards-compatible).
Setting role to 'cn' will discover compute nodes instead.
Human-friendly compute node hostname discovery depends on cmon 1.7.0:
c1a2aeca36
Adjust testcases to use discovery config per case as two different types are now supported.
Updated documentation:
* new role setting
* clarify what the name 'container' covers as triton uses different names in different locations
Signed-off-by: jzinkweg <jzinkweg@gmail.com>
This commit is contained in:
parent
f4dd45609a
commit
1f69c38ba4
@ -590,8 +590,8 @@ var expectedConf = &Config{
|
||||
ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{
|
||||
TritonSDConfigs: []*triton.SDConfig{
|
||||
{
|
||||
|
||||
Account: "testAccount",
|
||||
Role: "container",
|
||||
DNSSuffix: "triton.example.com",
|
||||
Endpoint: "triton.example.com",
|
||||
Port: 9163,
|
||||
|
@ -46,6 +46,7 @@ const (
|
||||
|
||||
// DefaultSDConfig is the default Triton SD configuration.
|
||||
var DefaultSDConfig = SDConfig{
|
||||
Role: "container",
|
||||
Port: 9163,
|
||||
RefreshInterval: model.Duration(60 * time.Second),
|
||||
Version: 1,
|
||||
@ -54,6 +55,7 @@ var DefaultSDConfig = SDConfig{
|
||||
// SDConfig is the configuration for Triton based service discovery.
|
||||
type SDConfig struct {
|
||||
Account string `yaml:"account"`
|
||||
Role string `yaml:"role"`
|
||||
DNSSuffix string `yaml:"dns_suffix"`
|
||||
Endpoint string `yaml:"endpoint"`
|
||||
Groups []string `yaml:"groups,omitempty"`
|
||||
@ -71,6 +73,9 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if c.Role != "container" && c.Role != "cn" {
|
||||
return errors.New("triton SD configuration requires role to be 'container' or 'cn'")
|
||||
}
|
||||
if c.Account == "" {
|
||||
return errors.New("triton SD configuration requires an account")
|
||||
}
|
||||
@ -87,7 +92,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
}
|
||||
|
||||
// DiscoveryResponse models a JSON response from the Triton discovery.
|
||||
type discoveryResponse struct {
|
||||
type DiscoveryResponse struct {
|
||||
Containers []struct {
|
||||
Groups []string `json:"groups"`
|
||||
ServerUUID string `json:"server_uuid"`
|
||||
@ -98,6 +103,14 @@ type discoveryResponse struct {
|
||||
} `json:"containers"`
|
||||
}
|
||||
|
||||
// ComputeNodeDiscoveryResponse models a JSON response from the Triton discovery /gz/ endpoint.
|
||||
type ComputeNodeDiscoveryResponse struct {
|
||||
ComputeNodes []struct {
|
||||
ServerUUID string `json:"server_uuid"`
|
||||
ServerHostname string `json:"server_hostname"`
|
||||
} `json:"cns"`
|
||||
}
|
||||
|
||||
// Discovery periodically performs Triton-SD requests. It implements
|
||||
// the Discoverer interface.
|
||||
type Discovery struct {
|
||||
@ -137,17 +150,34 @@ func New(logger log.Logger, conf *SDConfig) (*Discovery, error) {
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// triton-cmon has two discovery endpoints:
|
||||
// https://github.com/joyent/triton-cmon/blob/master/lib/endpoints/discover.js
|
||||
//
|
||||
// The default endpoint exposes "containers", otherwise called "virtual machines" in triton,
|
||||
// which are (branded) zones running on the triton platform.
|
||||
//
|
||||
// The /gz/ endpoint exposes "compute nodes", also known as "servers" or "global zones",
|
||||
// on which the "containers" are running.
|
||||
//
|
||||
// As triton is not internally consistent in using these names,
|
||||
// the terms as used in triton-cmon are used here.
|
||||
|
||||
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||
var endpoint = fmt.Sprintf("https://%s:%d/v%d/discover", d.sdConfig.Endpoint, d.sdConfig.Port, d.sdConfig.Version)
|
||||
var endpointFormat string
|
||||
switch d.sdConfig.Role {
|
||||
case "container":
|
||||
endpointFormat = "https://%s:%d/v%d/discover"
|
||||
case "cn":
|
||||
endpointFormat = "https://%s:%d/v%d/gz/discover"
|
||||
default:
|
||||
return nil, errors.New(fmt.Sprintf("unknown role '%s' in configuration", d.sdConfig.Role))
|
||||
}
|
||||
var endpoint = fmt.Sprintf(endpointFormat, d.sdConfig.Endpoint, d.sdConfig.Port, d.sdConfig.Version)
|
||||
if len(d.sdConfig.Groups) > 0 {
|
||||
groups := url.QueryEscape(strings.Join(d.sdConfig.Groups, ","))
|
||||
endpoint = fmt.Sprintf("%s?groups=%s", endpoint, groups)
|
||||
}
|
||||
|
||||
tg := &targetgroup.Group{
|
||||
Source: endpoint,
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("GET", endpoint, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -168,8 +198,24 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||
return nil, errors.Wrap(err, "an error occurred when reading the response body")
|
||||
}
|
||||
|
||||
dr := discoveryResponse{}
|
||||
err = json.Unmarshal(data, &dr)
|
||||
// The JSON response body is different so it needs to be processed/mapped separately.
|
||||
switch d.sdConfig.Role {
|
||||
case "container":
|
||||
return d.processContainerResponse(data, endpoint)
|
||||
case "cn":
|
||||
return d.processComputeNodeResponse(data, endpoint)
|
||||
default:
|
||||
return nil, errors.New(fmt.Sprintf("unknown role '%s' in configuration", d.sdConfig.Role))
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Discovery) processContainerResponse(data []byte, endpoint string) ([]*targetgroup.Group, error) {
|
||||
tg := &targetgroup.Group{
|
||||
Source: endpoint,
|
||||
}
|
||||
|
||||
dr := DiscoveryResponse{}
|
||||
err := json.Unmarshal(data, &dr)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "an error occurred unmarshaling the discovery response json")
|
||||
}
|
||||
@ -195,3 +241,28 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||
|
||||
return []*targetgroup.Group{tg}, nil
|
||||
}
|
||||
|
||||
func (d *Discovery) processComputeNodeResponse(data []byte, endpoint string) ([]*targetgroup.Group, error) {
|
||||
tg := &targetgroup.Group{
|
||||
Source: endpoint,
|
||||
}
|
||||
|
||||
dr := ComputeNodeDiscoveryResponse{}
|
||||
err := json.Unmarshal(data, &dr)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "an error occurred unmarshaling the compute node discovery response json")
|
||||
}
|
||||
|
||||
for _, cn := range dr.ComputeNodes {
|
||||
labels := model.LabelSet{
|
||||
tritonLabelMachineID: model.LabelValue(cn.ServerUUID),
|
||||
tritonLabelMachineAlias: model.LabelValue(cn.ServerHostname),
|
||||
}
|
||||
addr := fmt.Sprintf("%s.%s:%d", cn.ServerUUID, d.sdConfig.DNSSuffix, d.sdConfig.Port)
|
||||
labels[model.AddressLabel] = model.LabelValue(addr)
|
||||
|
||||
tg.Targets = append(tg.Targets, labels)
|
||||
}
|
||||
|
||||
return []*targetgroup.Group{tg}, nil
|
||||
}
|
||||
|
@ -33,6 +33,7 @@ import (
|
||||
var (
|
||||
conf = SDConfig{
|
||||
Account: "testAccount",
|
||||
Role: "container",
|
||||
DNSSuffix: "triton.example.com",
|
||||
Endpoint: "127.0.0.1",
|
||||
Port: 443,
|
||||
@ -42,6 +43,7 @@ var (
|
||||
}
|
||||
badconf = SDConfig{
|
||||
Account: "badTestAccount",
|
||||
Role: "container",
|
||||
DNSSuffix: "bad.triton.example.com",
|
||||
Endpoint: "127.0.0.1",
|
||||
Port: 443,
|
||||
@ -56,6 +58,7 @@ var (
|
||||
}
|
||||
groupsconf = SDConfig{
|
||||
Account: "testAccount",
|
||||
Role: "container",
|
||||
DNSSuffix: "triton.example.com",
|
||||
Endpoint: "127.0.0.1",
|
||||
Groups: []string{"foo", "bar"},
|
||||
@ -64,6 +67,16 @@ var (
|
||||
RefreshInterval: 1,
|
||||
TLSConfig: config.TLSConfig{InsecureSkipVerify: true},
|
||||
}
|
||||
cnconf = SDConfig{
|
||||
Account: "testAccount",
|
||||
Role: "cn",
|
||||
DNSSuffix: "triton.example.com",
|
||||
Endpoint: "127.0.0.1",
|
||||
Port: 443,
|
||||
Version: 1,
|
||||
RefreshInterval: 1,
|
||||
TLSConfig: config.TLSConfig{InsecureSkipVerify: true},
|
||||
}
|
||||
)
|
||||
|
||||
func newTritonDiscovery(c SDConfig) (*Discovery, error) {
|
||||
@ -103,8 +116,22 @@ func TestTritonSDNewGroupsConfig(t *testing.T) {
|
||||
testutil.Equals(t, groupsconf.Port, td.sdConfig.Port)
|
||||
}
|
||||
|
||||
func TestTritonSDNewCNConfig(t *testing.T) {
|
||||
td, err := newTritonDiscovery(cnconf)
|
||||
testutil.Ok(t, err)
|
||||
testutil.Assert(t, td != nil, "")
|
||||
testutil.Assert(t, td.client != nil, "")
|
||||
testutil.Assert(t, td.interval != 0, "")
|
||||
testutil.Assert(t, td.sdConfig != nil, "")
|
||||
testutil.Equals(t, cnconf.Role, td.sdConfig.Role)
|
||||
testutil.Equals(t, cnconf.Account, td.sdConfig.Account)
|
||||
testutil.Equals(t, cnconf.DNSSuffix, td.sdConfig.DNSSuffix)
|
||||
testutil.Equals(t, cnconf.Endpoint, td.sdConfig.Endpoint)
|
||||
testutil.Equals(t, cnconf.Port, td.sdConfig.Port)
|
||||
}
|
||||
|
||||
func TestTritonSDRefreshNoTargets(t *testing.T) {
|
||||
tgts := testTritonSDRefresh(t, "{\"containers\":[]}")
|
||||
tgts := testTritonSDRefresh(t, conf, "{\"containers\":[]}")
|
||||
testutil.Assert(t, tgts == nil, "")
|
||||
}
|
||||
|
||||
@ -129,7 +156,7 @@ func TestTritonSDRefreshMultipleTargets(t *testing.T) {
|
||||
}`
|
||||
)
|
||||
|
||||
tgts := testTritonSDRefresh(t, dstr)
|
||||
tgts := testTritonSDRefresh(t, conf, dstr)
|
||||
testutil.Assert(t, tgts != nil, "")
|
||||
testutil.Equals(t, 2, len(tgts))
|
||||
}
|
||||
@ -156,9 +183,45 @@ func TestTritonSDRefreshCancelled(t *testing.T) {
|
||||
testutil.Equals(t, strings.Contains(err.Error(), context.Canceled.Error()), true)
|
||||
}
|
||||
|
||||
func testTritonSDRefresh(t *testing.T, dstr string) []model.LabelSet {
|
||||
func TestTritonSDRefreshCNsUUIDOnly(t *testing.T) {
|
||||
var (
|
||||
td, _ = newTritonDiscovery(conf)
|
||||
dstr = `{"cns":[
|
||||
{
|
||||
"server_uuid":"44454c4c-5000-104d-8037-b7c04f5a5131"
|
||||
},
|
||||
{
|
||||
"server_uuid":"a5894692-bd32-4ca1-908a-e2dda3c3a5e6"
|
||||
}]
|
||||
}`
|
||||
)
|
||||
|
||||
tgts := testTritonSDRefresh(t, cnconf, dstr)
|
||||
testutil.Assert(t, tgts != nil, "")
|
||||
testutil.Equals(t, 2, len(tgts))
|
||||
}
|
||||
|
||||
func TestTritonSDRefreshCNsWithHostname(t *testing.T) {
|
||||
var (
|
||||
dstr = `{"cns":[
|
||||
{
|
||||
"server_uuid":"44454c4c-5000-104d-8037-b7c04f5a5131",
|
||||
"server_hostname": "server01"
|
||||
},
|
||||
{
|
||||
"server_uuid":"a5894692-bd32-4ca1-908a-e2dda3c3a5e6",
|
||||
"server_hostname": "server02"
|
||||
}]
|
||||
}`
|
||||
)
|
||||
|
||||
tgts := testTritonSDRefresh(t, cnconf, dstr)
|
||||
testutil.Assert(t, tgts != nil, "")
|
||||
testutil.Equals(t, 2, len(tgts))
|
||||
}
|
||||
|
||||
func testTritonSDRefresh(t *testing.T, c SDConfig, dstr string) []model.LabelSet {
|
||||
var (
|
||||
td, _ = newTritonDiscovery(c)
|
||||
s = httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
fmt.Fprintln(w, dstr)
|
||||
}))
|
||||
|
@ -1020,37 +1020,60 @@ Serverset data must be in the JSON format, the Thrift format is not currently su
|
||||
scrape targets from [Container Monitor](https://github.com/joyent/rfd/blob/master/rfd/0027/README.md)
|
||||
discovery endpoints.
|
||||
|
||||
The following meta labels are available on targets during relabeling:
|
||||
One of the following `<triton_role>` types can be configured to discover targets:
|
||||
|
||||
#### `container`
|
||||
|
||||
The `container` role discovers one target per "virtual machine" owned by the `account`.
|
||||
These are SmartOS zones or lx/KVM/bhyve branded zones.
|
||||
|
||||
The following meta labels are available on targets during [relabeling](#relabel_config):
|
||||
|
||||
* `__meta_triton_groups`: the list of groups belonging to the target joined by a comma separator
|
||||
* `__meta_triton_machine_alias`: the alias of the target container
|
||||
* `__meta_triton_machine_brand`: the brand of the target container
|
||||
* `__meta_triton_machine_id`: the UUID of the target container
|
||||
* `__meta_triton_machine_image`: the target containers image type
|
||||
* `__meta_triton_server_id`: the server UUID for the target container
|
||||
* `__meta_triton_machine_image`: the target container's image type
|
||||
* `__meta_triton_server_id`: the server UUID the target container is running on
|
||||
|
||||
#### `cn`
|
||||
|
||||
The `cn` role discovers one target for per compute node (also known as "server" or "global zone") making up the Triton infrastructure.
|
||||
The `account` must be a Triton operator and is currently required to own at least one `container`.
|
||||
|
||||
The following meta labels are available on targets during [relabeling](#relabel_config):
|
||||
|
||||
* `__meta_triton_machine_alias`: the hostname of the target (requires triton-cmon 1.7.0 or newer)
|
||||
* `__meta_triton_machine_id`: the UUID of the target
|
||||
|
||||
See below for the configuration options for Triton discovery:
|
||||
```yaml
|
||||
# The information to access the Triton discovery API.
|
||||
|
||||
# The account to use for discovering new target containers.
|
||||
# The account to use for discovering new targets.
|
||||
account: <string>
|
||||
|
||||
# The DNS suffix which should be applied to target containers.
|
||||
# The type of targets to discover, can be set to:
|
||||
# * "container" to discover virtual machines (SmartOS zones, lx/KVM/bhyve branded zones) running on Triton
|
||||
# * "cn" to discover compute nodes (servers/global zones) making up the Triton infrastructure
|
||||
[ role : <string> | default = "container" ]
|
||||
|
||||
# The DNS suffix which should be applied to target.
|
||||
dns_suffix: <string>
|
||||
|
||||
# The Triton discovery endpoint (e.g. 'cmon.us-east-3b.triton.zone'). This is
|
||||
# often the same value as dns_suffix.
|
||||
endpoint: <string>
|
||||
|
||||
# A list of groups for which targets are retrieved. If omitted, all containers
|
||||
# available to the requesting account are scraped.
|
||||
# A list of groups for which targets are retrieved, only supported when `role` == `container`.
|
||||
# If omitted all containers owned by the requesting account are scraped.
|
||||
groups:
|
||||
[ - <string> ... ]
|
||||
|
||||
# The port to use for discovery and metric scraping.
|
||||
[ port: <int> | default = 9163 ]
|
||||
|
||||
# The interval which should be used for refreshing target containers.
|
||||
# The interval which should be used for refreshing targets.
|
||||
[ refresh_interval: <duration> | default = 60s ]
|
||||
|
||||
# The Triton discovery API version.
|
||||
|
Loading…
Reference in New Issue
Block a user