Support AirBnB's Smartstack Nerve client for SD
nerve's registration format differs from serverset. With this commit there is now a dedicated treecache file in util, and two separate files for serverset and nerve. Reference: https://github.com/airbnb/nerve
This commit is contained in:
parent
c44594185f
commit
061fe2f364
|
@ -119,6 +119,11 @@ var (
|
||||||
Timeout: Duration(10 * time.Second),
|
Timeout: Duration(10 * time.Second),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DefaultNerveSDConfig is the default Nerve SD configuration.
|
||||||
|
DefaultNerveSDConfig = NerveSDConfig{
|
||||||
|
Timeout: Duration(10 * time.Second),
|
||||||
|
}
|
||||||
|
|
||||||
// DefaultMarathonSDConfig is the default Marathon SD configuration.
|
// DefaultMarathonSDConfig is the default Marathon SD configuration.
|
||||||
DefaultMarathonSDConfig = MarathonSDConfig{
|
DefaultMarathonSDConfig = MarathonSDConfig{
|
||||||
RefreshInterval: Duration(30 * time.Second),
|
RefreshInterval: Duration(30 * time.Second),
|
||||||
|
@ -367,6 +372,8 @@ type ScrapeConfig struct {
|
||||||
ConsulSDConfigs []*ConsulSDConfig `yaml:"consul_sd_configs,omitempty"`
|
ConsulSDConfigs []*ConsulSDConfig `yaml:"consul_sd_configs,omitempty"`
|
||||||
// List of Serverset service discovery configurations.
|
// List of Serverset service discovery configurations.
|
||||||
ServersetSDConfigs []*ServersetSDConfig `yaml:"serverset_sd_configs,omitempty"`
|
ServersetSDConfigs []*ServersetSDConfig `yaml:"serverset_sd_configs,omitempty"`
|
||||||
|
// NerveSDConfigs is a list of Nerve service discovery configurations.
|
||||||
|
NerveSDConfigs []*NerveSDConfig `yaml:"nerve_sd_configs,omitempty"`
|
||||||
// MarathonSDConfigs is a list of Marathon service discovery configurations.
|
// MarathonSDConfigs is a list of Marathon service discovery configurations.
|
||||||
MarathonSDConfigs []*MarathonSDConfig `yaml:"marathon_sd_configs,omitempty"`
|
MarathonSDConfigs []*MarathonSDConfig `yaml:"marathon_sd_configs,omitempty"`
|
||||||
// List of Kubernetes service discovery configurations.
|
// List of Kubernetes service discovery configurations.
|
||||||
|
@ -647,6 +654,38 @@ func (c *ServersetSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) err
|
||||||
return checkOverflow(c.XXX, "serverset_sd_config")
|
return checkOverflow(c.XXX, "serverset_sd_config")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NerveSDConfig is the configuration for AirBnB's Nerve in Zookeeper based discovery.
|
||||||
|
type NerveSDConfig struct {
|
||||||
|
Servers []string `yaml:"servers"`
|
||||||
|
Paths []string `yaml:"paths"`
|
||||||
|
Timeout Duration `yaml:"timeout,omitempty"`
|
||||||
|
|
||||||
|
// Catches all undefined fields and must be empty after parsing.
|
||||||
|
XXX map[string]interface{} `yaml:",inline"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||||
|
func (c *NerveSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||||
|
*c = DefaultNerveSDConfig
|
||||||
|
type plain NerveSDConfig
|
||||||
|
err := unmarshal((*plain)(c))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if len(c.Servers) == 0 {
|
||||||
|
return fmt.Errorf("nerve SD config must contain at least one Zookeeper server")
|
||||||
|
}
|
||||||
|
if len(c.Paths) == 0 {
|
||||||
|
return fmt.Errorf("nerve SD config must contain at least one path")
|
||||||
|
}
|
||||||
|
for _, path := range c.Paths {
|
||||||
|
if !strings.HasPrefix(path, "/") {
|
||||||
|
return fmt.Errorf("nerve SD config paths must begin with '/': %s", path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return checkOverflow(c.XXX, "nerve_sd_config")
|
||||||
|
}
|
||||||
|
|
||||||
// MarathonSDConfig is the configuration for services running on Marathon.
|
// MarathonSDConfig is the configuration for services running on Marathon.
|
||||||
type MarathonSDConfig struct {
|
type MarathonSDConfig struct {
|
||||||
Servers []string `yaml:"servers,omitempty"`
|
Servers []string `yaml:"servers,omitempty"`
|
||||||
|
|
|
@ -270,6 +270,23 @@ var expectedConf = &Config{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
JobName: "service-nerve",
|
||||||
|
|
||||||
|
ScrapeInterval: Duration(15 * time.Second),
|
||||||
|
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||||
|
|
||||||
|
MetricsPath: DefaultScrapeConfig.MetricsPath,
|
||||||
|
Scheme: DefaultScrapeConfig.Scheme,
|
||||||
|
|
||||||
|
NerveSDConfigs: []*NerveSDConfig{
|
||||||
|
{
|
||||||
|
Servers: []string{"localhost"},
|
||||||
|
Paths: []string{"/monitoring"},
|
||||||
|
Timeout: Duration(10 * time.Second),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
original: "",
|
original: "",
|
||||||
}
|
}
|
||||||
|
|
|
@ -126,3 +126,10 @@ scrape_configs:
|
||||||
- region: us-east-1
|
- region: us-east-1
|
||||||
access_key: access
|
access_key: access
|
||||||
secret_key: secret
|
secret_key: secret
|
||||||
|
|
||||||
|
- job_name: service-nerve
|
||||||
|
nerve_sd_configs:
|
||||||
|
- servers:
|
||||||
|
- localhost
|
||||||
|
paths:
|
||||||
|
- /monitoring
|
||||||
|
|
|
@ -0,0 +1,150 @@
|
||||||
|
// Copyright 2015 The Prometheus Authors
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package discovery
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/prometheus/common/model"
|
||||||
|
"github.com/samuel/go-zookeeper/zk"
|
||||||
|
|
||||||
|
"github.com/prometheus/prometheus/config"
|
||||||
|
"github.com/prometheus/prometheus/util/treecache"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
nerveNodePrefix = "member_"
|
||||||
|
|
||||||
|
nerveLabelPrefix = model.MetaLabelPrefix + "nerve_"
|
||||||
|
nervePathLabel = nerveLabelPrefix + "path"
|
||||||
|
nerveEndpointLabelPrefix = nerveLabelPrefix + "endpoint"
|
||||||
|
)
|
||||||
|
|
||||||
|
type nerveMember struct {
|
||||||
|
Host string `json:"host"`
|
||||||
|
Port int `json:"port"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// NerveDiscovery retrieves target information from a Nerve server
|
||||||
|
// and updates them via watches.
|
||||||
|
type NerveDiscovery struct {
|
||||||
|
conf *config.NerveSDConfig
|
||||||
|
conn *zk.Conn
|
||||||
|
mu sync.RWMutex
|
||||||
|
sources map[string]*config.TargetGroup
|
||||||
|
sdUpdates *chan<- config.TargetGroup
|
||||||
|
updates chan treecache.ZookeeperTreeCacheEvent
|
||||||
|
treeCaches []*treecache.ZookeeperTreeCache
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewNerveDiscovery returns a new NerveDiscovery for the given config.
|
||||||
|
func NewNerveDiscovery(conf *config.NerveSDConfig) *NerveDiscovery {
|
||||||
|
conn, _, err := zk.Connect(conf.Servers, time.Duration(conf.Timeout))
|
||||||
|
conn.SetLogger(treecache.ZookeeperLogger{})
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
updates := make(chan treecache.ZookeeperTreeCacheEvent)
|
||||||
|
sd := &NerveDiscovery{
|
||||||
|
conf: conf,
|
||||||
|
conn: conn,
|
||||||
|
updates: updates,
|
||||||
|
sources: map[string]*config.TargetGroup{},
|
||||||
|
}
|
||||||
|
go sd.processUpdates()
|
||||||
|
for _, path := range conf.Paths {
|
||||||
|
sd.treeCaches = append(sd.treeCaches, treecache.NewZookeeperTreeCache(conn, path, updates))
|
||||||
|
}
|
||||||
|
return sd
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sources implements the TargetProvider interface.
|
||||||
|
func (sd *NerveDiscovery) Sources() []string {
|
||||||
|
sd.mu.RLock()
|
||||||
|
defer sd.mu.RUnlock()
|
||||||
|
srcs := []string{}
|
||||||
|
for t := range sd.sources {
|
||||||
|
srcs = append(srcs, t)
|
||||||
|
}
|
||||||
|
return srcs
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sd *NerveDiscovery) processUpdates() {
|
||||||
|
defer sd.conn.Close()
|
||||||
|
for event := range sd.updates {
|
||||||
|
tg := &config.TargetGroup{
|
||||||
|
Source: event.Path,
|
||||||
|
}
|
||||||
|
sd.mu.Lock()
|
||||||
|
if event.Data != nil {
|
||||||
|
labelSet, err := parseNerveMember(*event.Data, event.Path)
|
||||||
|
if err == nil {
|
||||||
|
tg.Targets = []model.LabelSet{*labelSet}
|
||||||
|
sd.sources[event.Path] = tg
|
||||||
|
} else {
|
||||||
|
delete(sd.sources, event.Path)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
delete(sd.sources, event.Path)
|
||||||
|
}
|
||||||
|
sd.mu.Unlock()
|
||||||
|
if sd.sdUpdates != nil {
|
||||||
|
*sd.sdUpdates <- *tg
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if sd.sdUpdates != nil {
|
||||||
|
close(*sd.sdUpdates)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run implements the TargetProvider interface.
|
||||||
|
func (sd *NerveDiscovery) Run(ch chan<- config.TargetGroup, done <-chan struct{}) {
|
||||||
|
// Send on everything we have seen so far.
|
||||||
|
sd.mu.Lock()
|
||||||
|
for _, targetGroup := range sd.sources {
|
||||||
|
ch <- *targetGroup
|
||||||
|
}
|
||||||
|
// Tell processUpdates to send future updates.
|
||||||
|
sd.sdUpdates = &ch
|
||||||
|
sd.mu.Unlock()
|
||||||
|
|
||||||
|
<-done
|
||||||
|
for _, tc := range sd.treeCaches {
|
||||||
|
tc.Stop()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseNerveMember(data []byte, path string) (*model.LabelSet, error) {
|
||||||
|
member := nerveMember{}
|
||||||
|
err := json.Unmarshal(data, &member)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error unmarshaling nerve member %q: %s", path, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
labels := model.LabelSet{}
|
||||||
|
labels[nervePathLabel] = model.LabelValue(path)
|
||||||
|
labels[model.AddressLabel] = model.LabelValue(
|
||||||
|
fmt.Sprintf("%s:%d", member.Host, member.Port))
|
||||||
|
|
||||||
|
labels[nerveEndpointLabelPrefix+"_host"] = model.LabelValue(member.Host)
|
||||||
|
labels[nerveEndpointLabelPrefix+"_port"] = model.LabelValue(fmt.Sprintf("%d", member.Port))
|
||||||
|
labels[nerveEndpointLabelPrefix+"_name"] = model.LabelValue(member.Name)
|
||||||
|
|
||||||
|
return &labels, nil
|
||||||
|
}
|
|
@ -14,20 +14,18 @@
|
||||||
package discovery
|
package discovery
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/prometheus/common/log"
|
|
||||||
"github.com/prometheus/common/model"
|
"github.com/prometheus/common/model"
|
||||||
"github.com/samuel/go-zookeeper/zk"
|
"github.com/samuel/go-zookeeper/zk"
|
||||||
|
|
||||||
"github.com/prometheus/prometheus/config"
|
"github.com/prometheus/prometheus/config"
|
||||||
"github.com/prometheus/prometheus/util/strutil"
|
"github.com/prometheus/prometheus/util/strutil"
|
||||||
|
"github.com/prometheus/prometheus/util/treecache"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -52,14 +50,6 @@ type serversetEndpoint struct {
|
||||||
Port int
|
Port int
|
||||||
}
|
}
|
||||||
|
|
||||||
type zookeeperLogger struct {
|
|
||||||
}
|
|
||||||
|
|
||||||
// Implements zk.Logger
|
|
||||||
func (zl zookeeperLogger) Printf(s string, i ...interface{}) {
|
|
||||||
log.Infof(s, i...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ServersetDiscovery retrieves target information from a Serverset server
|
// ServersetDiscovery retrieves target information from a Serverset server
|
||||||
// and updates them via watches.
|
// and updates them via watches.
|
||||||
type ServersetDiscovery struct {
|
type ServersetDiscovery struct {
|
||||||
|
@ -68,18 +58,18 @@ type ServersetDiscovery struct {
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
sources map[string]*config.TargetGroup
|
sources map[string]*config.TargetGroup
|
||||||
sdUpdates *chan<- config.TargetGroup
|
sdUpdates *chan<- config.TargetGroup
|
||||||
updates chan zookeeperTreeCacheEvent
|
updates chan treecache.ZookeeperTreeCacheEvent
|
||||||
treeCaches []*zookeeperTreeCache
|
treeCaches []*treecache.ZookeeperTreeCache
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewServersetDiscovery returns a new ServersetDiscovery for the given config.
|
// NewServersetDiscovery returns a new ServersetDiscovery for the given config.
|
||||||
func NewServersetDiscovery(conf *config.ServersetSDConfig) *ServersetDiscovery {
|
func NewServersetDiscovery(conf *config.ServersetSDConfig) *ServersetDiscovery {
|
||||||
conn, _, err := zk.Connect(conf.Servers, time.Duration(conf.Timeout))
|
conn, _, err := zk.Connect(conf.Servers, time.Duration(conf.Timeout))
|
||||||
conn.SetLogger(zookeeperLogger{})
|
conn.SetLogger(treecache.ZookeeperLogger{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
updates := make(chan zookeeperTreeCacheEvent)
|
updates := make(chan treecache.ZookeeperTreeCacheEvent)
|
||||||
sd := &ServersetDiscovery{
|
sd := &ServersetDiscovery{
|
||||||
conf: conf,
|
conf: conf,
|
||||||
conn: conn,
|
conn: conn,
|
||||||
|
@ -88,7 +78,7 @@ func NewServersetDiscovery(conf *config.ServersetSDConfig) *ServersetDiscovery {
|
||||||
}
|
}
|
||||||
go sd.processUpdates()
|
go sd.processUpdates()
|
||||||
for _, path := range conf.Paths {
|
for _, path := range conf.Paths {
|
||||||
sd.treeCaches = append(sd.treeCaches, newZookeeperTreeCache(conn, path, updates))
|
sd.treeCaches = append(sd.treeCaches, treecache.NewZookeeperTreeCache(conn, path, updates))
|
||||||
}
|
}
|
||||||
return sd
|
return sd
|
||||||
}
|
}
|
||||||
|
@ -179,197 +169,3 @@ func parseServersetMember(data []byte, path string) (*model.LabelSet, error) {
|
||||||
|
|
||||||
return &labels, nil
|
return &labels, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type zookeeperTreeCache struct {
|
|
||||||
conn *zk.Conn
|
|
||||||
prefix string
|
|
||||||
events chan zookeeperTreeCacheEvent
|
|
||||||
zkEvents chan zk.Event
|
|
||||||
stop chan struct{}
|
|
||||||
head *zookeeperTreeCacheNode
|
|
||||||
}
|
|
||||||
|
|
||||||
type zookeeperTreeCacheEvent struct {
|
|
||||||
Path string
|
|
||||||
Data *[]byte
|
|
||||||
}
|
|
||||||
|
|
||||||
type zookeeperTreeCacheNode struct {
|
|
||||||
data *[]byte
|
|
||||||
events chan zk.Event
|
|
||||||
done chan struct{}
|
|
||||||
stopped bool
|
|
||||||
children map[string]*zookeeperTreeCacheNode
|
|
||||||
}
|
|
||||||
|
|
||||||
func newZookeeperTreeCache(conn *zk.Conn, path string, events chan zookeeperTreeCacheEvent) *zookeeperTreeCache {
|
|
||||||
tc := &zookeeperTreeCache{
|
|
||||||
conn: conn,
|
|
||||||
prefix: path,
|
|
||||||
events: events,
|
|
||||||
stop: make(chan struct{}),
|
|
||||||
}
|
|
||||||
tc.head = &zookeeperTreeCacheNode{
|
|
||||||
events: make(chan zk.Event),
|
|
||||||
children: map[string]*zookeeperTreeCacheNode{},
|
|
||||||
stopped: true,
|
|
||||||
}
|
|
||||||
err := tc.recursiveNodeUpdate(path, tc.head)
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("Error during initial read of Zookeeper: %s", err)
|
|
||||||
}
|
|
||||||
go tc.loop(err != nil)
|
|
||||||
return tc
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tc *zookeeperTreeCache) Stop() {
|
|
||||||
tc.stop <- struct{}{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tc *zookeeperTreeCache) loop(failureMode bool) {
|
|
||||||
retryChan := make(chan struct{})
|
|
||||||
|
|
||||||
failure := func() {
|
|
||||||
failureMode = true
|
|
||||||
time.AfterFunc(time.Second*10, func() {
|
|
||||||
retryChan <- struct{}{}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
if failureMode {
|
|
||||||
failure()
|
|
||||||
}
|
|
||||||
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case ev := <-tc.head.events:
|
|
||||||
log.Debugf("Received Zookeeper event: %s", ev)
|
|
||||||
if failureMode {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if ev.Type == zk.EventNotWatching {
|
|
||||||
log.Infof("Lost connection to Zookeeper.")
|
|
||||||
failure()
|
|
||||||
} else {
|
|
||||||
path := strings.TrimPrefix(ev.Path, tc.prefix)
|
|
||||||
parts := strings.Split(path, "/")
|
|
||||||
node := tc.head
|
|
||||||
for _, part := range parts[1:] {
|
|
||||||
childNode := node.children[part]
|
|
||||||
if childNode == nil {
|
|
||||||
childNode = &zookeeperTreeCacheNode{
|
|
||||||
events: tc.head.events,
|
|
||||||
children: map[string]*zookeeperTreeCacheNode{},
|
|
||||||
done: make(chan struct{}, 1),
|
|
||||||
}
|
|
||||||
node.children[part] = childNode
|
|
||||||
}
|
|
||||||
node = childNode
|
|
||||||
}
|
|
||||||
|
|
||||||
err := tc.recursiveNodeUpdate(ev.Path, node)
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("Error during processing of Zookeeper event: %s", err)
|
|
||||||
failure()
|
|
||||||
} else if tc.head.data == nil {
|
|
||||||
log.Errorf("Error during processing of Zookeeper event: path %s no longer exists", tc.prefix)
|
|
||||||
failure()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case <-retryChan:
|
|
||||||
log.Infof("Attempting to resync state with Zookeeper")
|
|
||||||
// Reset root child nodes before traversing the Zookeeper path.
|
|
||||||
tc.head.children = make(map[string]*zookeeperTreeCacheNode)
|
|
||||||
err := tc.recursiveNodeUpdate(tc.prefix, tc.head)
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("Error during Zookeeper resync: %s", err)
|
|
||||||
failure()
|
|
||||||
} else {
|
|
||||||
log.Infof("Zookeeper resync successful")
|
|
||||||
failureMode = false
|
|
||||||
}
|
|
||||||
case <-tc.stop:
|
|
||||||
close(tc.events)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tc *zookeeperTreeCache) recursiveNodeUpdate(path string, node *zookeeperTreeCacheNode) error {
|
|
||||||
data, _, dataWatcher, err := tc.conn.GetW(path)
|
|
||||||
if err == zk.ErrNoNode {
|
|
||||||
tc.recursiveDelete(path, node)
|
|
||||||
if node == tc.head {
|
|
||||||
return fmt.Errorf("path %s does not exist", path)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
} else if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if node.data == nil || !bytes.Equal(*node.data, data) {
|
|
||||||
node.data = &data
|
|
||||||
tc.events <- zookeeperTreeCacheEvent{Path: path, Data: node.data}
|
|
||||||
}
|
|
||||||
|
|
||||||
children, _, childWatcher, err := tc.conn.ChildrenW(path)
|
|
||||||
if err == zk.ErrNoNode {
|
|
||||||
tc.recursiveDelete(path, node)
|
|
||||||
return nil
|
|
||||||
} else if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
currentChildren := map[string]struct{}{}
|
|
||||||
for _, child := range children {
|
|
||||||
currentChildren[child] = struct{}{}
|
|
||||||
childNode := node.children[child]
|
|
||||||
// Does not already exists or we previous had a watch that
|
|
||||||
// triggered.
|
|
||||||
if childNode == nil || childNode.stopped {
|
|
||||||
node.children[child] = &zookeeperTreeCacheNode{
|
|
||||||
events: node.events,
|
|
||||||
children: map[string]*zookeeperTreeCacheNode{},
|
|
||||||
done: make(chan struct{}, 1),
|
|
||||||
}
|
|
||||||
err = tc.recursiveNodeUpdate(path+"/"+child, node.children[child])
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove nodes that no longer exist
|
|
||||||
for name, childNode := range node.children {
|
|
||||||
if _, ok := currentChildren[name]; !ok || node.data == nil {
|
|
||||||
tc.recursiveDelete(path+"/"+name, childNode)
|
|
||||||
delete(node.children, name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
// Pass up zookeeper events, until the node is deleted.
|
|
||||||
select {
|
|
||||||
case event := <-dataWatcher:
|
|
||||||
node.events <- event
|
|
||||||
case event := <-childWatcher:
|
|
||||||
node.events <- event
|
|
||||||
case <-node.done:
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tc *zookeeperTreeCache) recursiveDelete(path string, node *zookeeperTreeCacheNode) {
|
|
||||||
if !node.stopped {
|
|
||||||
node.done <- struct{}{}
|
|
||||||
node.stopped = true
|
|
||||||
}
|
|
||||||
if node.data != nil {
|
|
||||||
tc.events <- zookeeperTreeCacheEvent{Path: path, Data: nil}
|
|
||||||
node.data = nil
|
|
||||||
}
|
|
||||||
for name, childNode := range node.children {
|
|
||||||
tc.recursiveDelete(path+"/"+name, childNode)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -434,6 +434,9 @@ func providersFromConfig(cfg *config.ScrapeConfig) []TargetProvider {
|
||||||
for i, c := range cfg.ServersetSDConfigs {
|
for i, c := range cfg.ServersetSDConfigs {
|
||||||
app("serverset", i, discovery.NewServersetDiscovery(c))
|
app("serverset", i, discovery.NewServersetDiscovery(c))
|
||||||
}
|
}
|
||||||
|
for i, c := range cfg.NerveSDConfigs {
|
||||||
|
app("nerve", i, discovery.NewNerveDiscovery(c))
|
||||||
|
}
|
||||||
for i, c := range cfg.EC2SDConfigs {
|
for i, c := range cfg.EC2SDConfigs {
|
||||||
app("ec2", i, discovery.NewEC2Discovery(c))
|
app("ec2", i, discovery.NewEC2Discovery(c))
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,213 @@
|
||||||
|
package treecache
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/prometheus/common/log"
|
||||||
|
"github.com/samuel/go-zookeeper/zk"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ZookeeperLogger struct {
|
||||||
|
}
|
||||||
|
|
||||||
|
// Implements zk.Logger
|
||||||
|
func (zl ZookeeperLogger) Printf(s string, i ...interface{}) {
|
||||||
|
log.Infof(s, i...)
|
||||||
|
}
|
||||||
|
|
||||||
|
type ZookeeperTreeCache struct {
|
||||||
|
conn *zk.Conn
|
||||||
|
prefix string
|
||||||
|
events chan ZookeeperTreeCacheEvent
|
||||||
|
zkEvents chan zk.Event
|
||||||
|
stop chan struct{}
|
||||||
|
head *zookeeperTreeCacheNode
|
||||||
|
}
|
||||||
|
|
||||||
|
type ZookeeperTreeCacheEvent struct {
|
||||||
|
Path string
|
||||||
|
Data *[]byte
|
||||||
|
}
|
||||||
|
|
||||||
|
type zookeeperTreeCacheNode struct {
|
||||||
|
data *[]byte
|
||||||
|
events chan zk.Event
|
||||||
|
done chan struct{}
|
||||||
|
stopped bool
|
||||||
|
children map[string]*zookeeperTreeCacheNode
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewZookeeperTreeCache(conn *zk.Conn, path string, events chan ZookeeperTreeCacheEvent) *ZookeeperTreeCache {
|
||||||
|
tc := &ZookeeperTreeCache{
|
||||||
|
conn: conn,
|
||||||
|
prefix: path,
|
||||||
|
events: events,
|
||||||
|
stop: make(chan struct{}),
|
||||||
|
}
|
||||||
|
tc.head = &zookeeperTreeCacheNode{
|
||||||
|
events: make(chan zk.Event),
|
||||||
|
children: map[string]*zookeeperTreeCacheNode{},
|
||||||
|
stopped: true,
|
||||||
|
}
|
||||||
|
err := tc.recursiveNodeUpdate(path, tc.head)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error during initial read of Zookeeper: %s", err)
|
||||||
|
}
|
||||||
|
go tc.loop(err != nil)
|
||||||
|
return tc
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tc *ZookeeperTreeCache) Stop() {
|
||||||
|
tc.stop <- struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tc *ZookeeperTreeCache) loop(failureMode bool) {
|
||||||
|
retryChan := make(chan struct{})
|
||||||
|
|
||||||
|
failure := func() {
|
||||||
|
failureMode = true
|
||||||
|
time.AfterFunc(time.Second*10, func() {
|
||||||
|
retryChan <- struct{}{}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
if failureMode {
|
||||||
|
failure()
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case ev := <-tc.head.events:
|
||||||
|
log.Debugf("Received Zookeeper event: %s", ev)
|
||||||
|
if failureMode {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if ev.Type == zk.EventNotWatching {
|
||||||
|
log.Infof("Lost connection to Zookeeper.")
|
||||||
|
failure()
|
||||||
|
} else {
|
||||||
|
path := strings.TrimPrefix(ev.Path, tc.prefix)
|
||||||
|
parts := strings.Split(path, "/")
|
||||||
|
node := tc.head
|
||||||
|
for _, part := range parts[1:] {
|
||||||
|
childNode := node.children[part]
|
||||||
|
if childNode == nil {
|
||||||
|
childNode = &zookeeperTreeCacheNode{
|
||||||
|
events: tc.head.events,
|
||||||
|
children: map[string]*zookeeperTreeCacheNode{},
|
||||||
|
done: make(chan struct{}, 1),
|
||||||
|
}
|
||||||
|
node.children[part] = childNode
|
||||||
|
}
|
||||||
|
node = childNode
|
||||||
|
}
|
||||||
|
|
||||||
|
err := tc.recursiveNodeUpdate(ev.Path, node)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error during processing of Zookeeper event: %s", err)
|
||||||
|
failure()
|
||||||
|
} else if tc.head.data == nil {
|
||||||
|
log.Errorf("Error during processing of Zookeeper event: path %s no longer exists", tc.prefix)
|
||||||
|
failure()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case <-retryChan:
|
||||||
|
log.Infof("Attempting to resync state with Zookeeper")
|
||||||
|
// Reset root child nodes before traversing the Zookeeper path.
|
||||||
|
tc.head.children = make(map[string]*zookeeperTreeCacheNode)
|
||||||
|
err := tc.recursiveNodeUpdate(tc.prefix, tc.head)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error during Zookeeper resync: %s", err)
|
||||||
|
failure()
|
||||||
|
} else {
|
||||||
|
log.Infof("Zookeeper resync successful")
|
||||||
|
failureMode = false
|
||||||
|
}
|
||||||
|
case <-tc.stop:
|
||||||
|
close(tc.events)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tc *ZookeeperTreeCache) recursiveNodeUpdate(path string, node *zookeeperTreeCacheNode) error {
|
||||||
|
data, _, dataWatcher, err := tc.conn.GetW(path)
|
||||||
|
if err == zk.ErrNoNode {
|
||||||
|
tc.recursiveDelete(path, node)
|
||||||
|
if node == tc.head {
|
||||||
|
return fmt.Errorf("path %s does not exist", path)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
} else if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if node.data == nil || !bytes.Equal(*node.data, data) {
|
||||||
|
node.data = &data
|
||||||
|
tc.events <- ZookeeperTreeCacheEvent{Path: path, Data: node.data}
|
||||||
|
}
|
||||||
|
|
||||||
|
children, _, childWatcher, err := tc.conn.ChildrenW(path)
|
||||||
|
if err == zk.ErrNoNode {
|
||||||
|
tc.recursiveDelete(path, node)
|
||||||
|
return nil
|
||||||
|
} else if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
currentChildren := map[string]struct{}{}
|
||||||
|
for _, child := range children {
|
||||||
|
currentChildren[child] = struct{}{}
|
||||||
|
childNode := node.children[child]
|
||||||
|
// Does not already exists or we previous had a watch that
|
||||||
|
// triggered.
|
||||||
|
if childNode == nil || childNode.stopped {
|
||||||
|
node.children[child] = &zookeeperTreeCacheNode{
|
||||||
|
events: node.events,
|
||||||
|
children: map[string]*zookeeperTreeCacheNode{},
|
||||||
|
done: make(chan struct{}, 1),
|
||||||
|
}
|
||||||
|
err = tc.recursiveNodeUpdate(path+"/"+child, node.children[child])
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove nodes that no longer exist
|
||||||
|
for name, childNode := range node.children {
|
||||||
|
if _, ok := currentChildren[name]; !ok || node.data == nil {
|
||||||
|
tc.recursiveDelete(path+"/"+name, childNode)
|
||||||
|
delete(node.children, name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
// Pass up zookeeper events, until the node is deleted.
|
||||||
|
select {
|
||||||
|
case event := <-dataWatcher:
|
||||||
|
node.events <- event
|
||||||
|
case event := <-childWatcher:
|
||||||
|
node.events <- event
|
||||||
|
case <-node.done:
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tc *ZookeeperTreeCache) recursiveDelete(path string, node *zookeeperTreeCacheNode) {
|
||||||
|
if !node.stopped {
|
||||||
|
node.done <- struct{}{}
|
||||||
|
node.stopped = true
|
||||||
|
}
|
||||||
|
if node.data != nil {
|
||||||
|
tc.events <- ZookeeperTreeCacheEvent{Path: path, Data: nil}
|
||||||
|
node.data = nil
|
||||||
|
}
|
||||||
|
for name, childNode := range node.children {
|
||||||
|
tc.recursiveDelete(path+"/"+name, childNode)
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue