diff --git a/discovery/manager.go b/discovery/manager.go index bb4409fea..3b13cf1cc 100644 --- a/discovery/manager.go +++ b/discovery/manager.go @@ -93,6 +93,7 @@ func NewManager(ctx context.Context, logger log.Logger) *Manager { targets: make(map[poolKey]map[string]*targetgroup.Group), discoverCancel: []context.CancelFunc{}, ctx: ctx, + updatert: 5 * time.Second, } } @@ -111,6 +112,10 @@ type Manager struct { providers []*provider // The sync channels sends the updates in map[targetSetName] where targetSetName is the job value from the scrape config. syncCh chan map[string][]*targetgroup.Group + + // How long to wait before sending updates to the channel. The variable + // should only be modified in unit tests. + updatert time.Duration } // Run starts the background processing @@ -166,7 +171,7 @@ func (m *Manager) startProvider(ctx context.Context, p *provider) { } func (m *Manager) updater(ctx context.Context, p *provider, updates chan []*targetgroup.Group) { - ticker := time.NewTicker(5 * time.Second) + ticker := time.NewTicker(m.updatert) defer ticker.Stop() triggerUpdate := make(chan struct{}, 1) @@ -181,11 +186,10 @@ func (m *Manager) updater(ctx context.Context, p *provider, updates chan []*targ select { case m.syncCh <- m.allGroups(): // Waiting until the receiver can accept the last update. level.Debug(m.logger).Log("msg", "discoverer exited", "provider", p.name) - return case <-ctx.Done(): - return } + return } for _, s := range p.subs { m.updateGroup(poolKey{setName: s, provider: p.name}, tgs) diff --git a/discovery/manager_test.go b/discovery/manager_test.go index d9566201f..e776de910 100644 --- a/discovery/manager_test.go +++ b/discovery/manager_test.go @@ -655,53 +655,69 @@ func TestTargetUpdatesOrder(t *testing.T) { }, } - for testIndex, testCase := range testCases { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + for i, tc := range testCases { + tc := tc + t.Run(tc.title, func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager.updatert = 100 * time.Millisecond - var totalUpdatesCount int + var totalUpdatesCount int - provUpdates := make(chan []*targetgroup.Group) - for _, up := range testCase.updates { - go newMockDiscoveryProvider(up).Run(ctx, provUpdates) - if len(up) > 0 { - totalUpdatesCount = totalUpdatesCount + len(up) + provUpdates := make(chan []*targetgroup.Group) + for _, up := range tc.updates { + go newMockDiscoveryProvider(up).Run(ctx, provUpdates) + if len(up) > 0 { + totalUpdatesCount = totalUpdatesCount + len(up) + } } - } - Loop: - for x := 0; x < totalUpdatesCount; x++ { - select { - case <-time.After(10 * time.Second): - t.Errorf("%v. %q: no update arrived within the timeout limit", x, testCase.title) - break Loop - case tgs := <-provUpdates: - discoveryManager.updateGroup(poolKey{setName: strconv.Itoa(testIndex), provider: testCase.title}, tgs) - for _, received := range discoveryManager.allGroups() { - // Need to sort by the Groups source as the received order is not guaranteed. - sort.Sort(byGroupSource(received)) - if !reflect.DeepEqual(received, testCase.expectedTargets[x]) { - var receivedFormated string - for _, receivedTargets := range received { - receivedFormated = receivedFormated + receivedTargets.Source + ":" + fmt.Sprint(receivedTargets.Targets) - } - var expectedFormated string - for _, expectedTargets := range testCase.expectedTargets[x] { - expectedFormated = expectedFormated + expectedTargets.Source + ":" + fmt.Sprint(expectedTargets.Targets) - } - - t.Errorf("%v. %v: \ntargets mismatch \nreceived: %v \nexpected: %v", - x, testCase.title, - receivedFormated, - expectedFormated) + Loop: + for x := 0; x < totalUpdatesCount; x++ { + select { + case <-time.After(10 * time.Second): + t.Errorf("%d: no update arrived within the timeout limit", x) + break Loop + case tgs := <-provUpdates: + discoveryManager.updateGroup(poolKey{setName: strconv.Itoa(i), provider: tc.title}, tgs) + for _, got := range discoveryManager.allGroups() { + assertEqualGroups(t, got, tc.expectedTargets[x], func(got, expected string) string { + return fmt.Sprintf("%d: \ntargets mismatch \ngot: %v \nexpected: %v", + x, + got, + expected) + }) } } } - } + }) } } +func assertEqualGroups(t *testing.T, got, expected []*targetgroup.Group, msg func(got, expected string) string) { + t.Helper() + format := func(groups []*targetgroup.Group) string { + var s string + for i, group := range groups { + if i > 0 { + s += "," + } + s += group.Source + ":" + fmt.Sprint(group.Targets) + } + return s + } + + // Need to sort by the groups's source as the received order is not guaranteed. + sort.Sort(byGroupSource(got)) + sort.Sort(byGroupSource(expected)) + + if !reflect.DeepEqual(got, expected) { + t.Errorf(msg(format(got), format(expected))) + } + +} + func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Group, poolKey poolKey, label string, present bool) { if _, ok := tSets[poolKey]; !ok { t.Fatalf("'%s' should be present in Pool keys: %v", poolKey, tSets) @@ -813,6 +829,7 @@ scrape_configs: ctx, cancel := context.WithCancel(context.Background()) defer cancel() discoveryManager := NewManager(ctx, nil) + discoveryManager.updatert = 500 * time.Millisecond go discoveryManager.Run() c := make(map[string]sd_config.ServiceDiscoveryConfig) @@ -868,6 +885,251 @@ scrape_configs: } } +func TestCoordinationWithReceiver(t *testing.T) { + updateDelay := 100 * time.Millisecond + + type expect struct { + delay time.Duration + tgs map[string][]*targetgroup.Group + } + + testCases := []struct { + title string + providers map[string][]update + expected []expect + }{ + { + title: "Receiver should get updates even when the channel is blocked", + providers: map[string][]update{ + "mock1": []update{ + update{ + targetGroups: []targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{{"__instance__": "1"}}, + }, + }, + }, + update{ + interval: 4 * updateDelay / time.Millisecond, + targetGroups: []targetgroup.Group{ + { + Source: "tg2", + Targets: []model.LabelSet{{"__instance__": "2"}}, + }, + }, + }, + }, + }, + expected: []expect{ + { + delay: 2 * updateDelay, + tgs: map[string][]*targetgroup.Group{ + "mock1": []*targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{{"__instance__": "1"}}, + }, + }, + }, + }, + { + delay: 4 * updateDelay, + tgs: map[string][]*targetgroup.Group{ + "mock1": []*targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{{"__instance__": "1"}}, + }, + { + Source: "tg2", + Targets: []model.LabelSet{{"__instance__": "2"}}, + }, + }, + }, + }, + }, + }, + { + title: "The receiver gets an update when a target group is gone", + providers: map[string][]update{ + "mock1": []update{ + update{ + targetGroups: []targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{{"__instance__": "1"}}, + }, + }, + }, + update{ + interval: 2 * updateDelay / time.Millisecond, + targetGroups: []targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{}, + }, + }, + }, + }, + }, + expected: []expect{ + { + tgs: map[string][]*targetgroup.Group{ + "mock1": []*targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{{"__instance__": "1"}}, + }, + }, + }, + }, + { + tgs: map[string][]*targetgroup.Group{ + "mock1": []*targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{}, + }, + }, + }, + }, + }, + }, + { + title: "The receiver gets merged updates", + providers: map[string][]update{ + "mock1": []update{ + // This update should never be seen by the receiver because + // it is overwritten by the next one. + update{ + targetGroups: []targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{{"__instance__": "0"}}, + }, + }, + }, + update{ + targetGroups: []targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{{"__instance__": "1"}}, + }, + }, + }, + }, + }, + expected: []expect{ + { + tgs: map[string][]*targetgroup.Group{ + "mock1": []*targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{{"__instance__": "1"}}, + }, + }, + }, + }, + }, + }, + { + title: "Discovery with multiple providers", + providers: map[string][]update{ + "mock1": []update{ + // This update is available in the first receive. + update{ + targetGroups: []targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{{"__instance__": "1"}}, + }, + }, + }, + }, + "mock2": []update{ + // This update should only arrive after the receiver has read from the channel once. + update{ + interval: 2 * updateDelay / time.Millisecond, + targetGroups: []targetgroup.Group{ + { + Source: "tg2", + Targets: []model.LabelSet{{"__instance__": "2"}}, + }, + }, + }, + }, + }, + expected: []expect{ + { + tgs: map[string][]*targetgroup.Group{ + "mock1": []*targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{{"__instance__": "1"}}, + }, + }, + }, + }, + { + delay: 1 * updateDelay, + tgs: map[string][]*targetgroup.Group{ + "mock1": []*targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{{"__instance__": "1"}}, + }, + }, + "mock2": []*targetgroup.Group{ + { + Source: "tg2", + Targets: []model.LabelSet{{"__instance__": "2"}}, + }, + }, + }, + }, + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.title, func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + mgr := NewManager(ctx, nil) + mgr.updatert = updateDelay + go mgr.Run() + + for name := range tc.providers { + p := newMockDiscoveryProvider(tc.providers[name]) + mgr.StartCustomProvider(ctx, name, p) + } + + for i, expected := range tc.expected { + time.Sleep(expected.delay) + tgs, ok := <-mgr.SyncCh() + if !ok { + t.Fatal("discovery manager channel is closed") + } + if len(tgs) != len(expected.tgs) { + t.Fatalf("step %d: target groups mismatch, got: %d, expected: %d\ngot: %#v\nexpected: %#v", + i, len(tgs), len(expected.tgs), tgs, expected.tgs) + } + for k := range expected.tgs { + if _, ok := tgs[k]; !ok { + t.Fatalf("step %d: target group not found: %s", i, k) + } + assertEqualGroups(t, tgs[k], expected.tgs[k], func(got, expected string) string { + return fmt.Sprintf("step %d: targets mismatch \ngot: %q \nexpected: %q", i, got, expected) + }) + } + } + }) + } +} + type update struct { targetGroups []targetgroup.Group interval time.Duration @@ -875,33 +1137,37 @@ type update struct { type mockdiscoveryProvider struct { updates []update - up chan<- []*targetgroup.Group } func newMockDiscoveryProvider(updates []update) mockdiscoveryProvider { - tp := mockdiscoveryProvider{ updates: updates, } return tp } -func (tp mockdiscoveryProvider) Run(ctx context.Context, up chan<- []*targetgroup.Group) { - tp.up = up - tp.sendUpdates() -} - -func (tp mockdiscoveryProvider) sendUpdates() { - for _, update := range tp.updates { - - time.Sleep(update.interval * time.Millisecond) - - tgs := make([]*targetgroup.Group, len(update.targetGroups)) - for i := range update.targetGroups { - tgs[i] = &update.targetGroups[i] +func (tp mockdiscoveryProvider) Run(ctx context.Context, upCh chan<- []*targetgroup.Group) { + for _, u := range tp.updates { + if u.interval > 0 { + t := time.NewTicker(u.interval * time.Millisecond) + defer t.Stop() + Loop: + for { + select { + case <-ctx.Done(): + return + case <-t.C: + break Loop + } + } } - tp.up <- tgs + tgs := make([]*targetgroup.Group, len(u.targetGroups)) + for i := range u.targetGroups { + tgs[i] = &u.targetGroups[i] + } + upCh <- tgs } + <-ctx.Done() } // byGroupSource implements sort.Interface so we can sort by the Source field.