mgr/cephadm: make jaeger-collector urls a dep for jaeger-agent

the jaeger-agent's need to know the url for the collector(s)
that have been deployed. If a collector moves, or we deployed
the agents before the collector, we need to reconfig the agents
with updated info about the collectors. Failure to do so can
leave the jager-agents down reporting

```
Could not create collector proxy","error":"at least one collector hostPort address is required when resolver is not available"
```

Fixes: https://tracker.ceph.com/issues/59704

Signed-off-by: Adam King <adking@redhat.com>
This commit is contained in:
Adam King 2023-05-09 15:06:41 -04:00
parent 6c6f0d3109
commit acc26d7b17
3 changed files with 15 additions and 0 deletions

View File

@ -40,6 +40,7 @@ from cephadm.agent import CephadmAgentHelpers
from mgr_module import MgrModule, HandleCommandResult, Option, NotifyType
from mgr_util import build_url
import orchestrator
from orchestrator.module import to_format, Format
@ -2712,6 +2713,12 @@ Then run the following:
deps.append(f'{hash(alertmanager_user + alertmanager_password)}')
elif daemon_type == 'promtail':
deps += get_daemon_names(['loki'])
elif daemon_type == JaegerAgentService.TYPE:
for dd in self.cache.get_daemons_by_type(JaegerCollectorService.TYPE):
assert dd.hostname is not None
port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT
deps.append(build_url(host=dd.hostname, port=port).lstrip('/'))
deps = sorted(deps)
else:
# TODO(redo): some error message!
pass

View File

@ -1060,6 +1060,11 @@ class CephadmServe:
diff = list(set(last_deps) - set(deps))
if any('secure_monitoring_stack' in e for e in diff):
action = 'redeploy'
elif dd.daemon_type == 'jaeger-agent':
# changes to jaeger-agent deps affect the way the unit.run for
# the daemon is written, which we rewrite on redeploy, but not
# on reconfig.
action = 'redeploy'
elif spec is not None and hasattr(spec, 'extra_container_args') and dd.extra_container_args != spec.extra_container_args:
self.log.debug(

View File

@ -20,13 +20,16 @@ class JaegerAgentService(CephadmService):
def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
assert self.TYPE == daemon_spec.daemon_type
collectors = []
deps: List[str] = []
for dd in self.mgr.cache.get_daemons_by_type(JaegerCollectorService.TYPE):
# scrape jaeger-collector nodes
assert dd.hostname is not None
port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT
url = build_url(host=dd.hostname, port=port).lstrip('/')
collectors.append(url)
deps.append(url)
daemon_spec.final_config = {'collector_nodes': ",".join(collectors)}
daemon_spec.deps = sorted(deps)
return daemon_spec