Merge PR #41483 into master

* refs/pull/41483/head:
	cephadm: stop passing --no-hosts to podman
	mgr/nfs: use host.addr for backend IP where possible
	mgr/cephadm: convert host addr if non-IP to IP
	mgr/dashboard,prometheus: new method of getting mgr IP
	doc/cephadm: remove any reference to the use of DNS or /etc/hosts
	mgr/cephadm: use known host addr
	mgr/cephadm: resolve IP at 'orch host add' time

Reviewed-by: Sebastian Wagner <swagner@suse.com>
This commit is contained in:
Sage Weil 2021-05-27 19:14:53 -04:00
commit 1f30c0114d
15 changed files with 118 additions and 54 deletions

View File

@ -126,8 +126,8 @@ Adoption process
This will perform a ``cephadm check-host`` on each host before adding it;
this check ensures that the host is functioning properly. The IP address
argument is required only if DNS does not allow you to connect to each host
by its short name.
argument is recommended; if not provided, then the host name will be resolved
via DNS.
#. Verify that the adopted monitor and manager daemons are visible:

View File

@ -37,14 +37,18 @@ To add each new host to the cluster, perform two steps:
.. prompt:: bash #
ceph orch host add *newhost* [*<label1> ...*]
ceph orch host add *<newhost>* [*<ip>*] [*<label1> ...*]
For example:
.. prompt:: bash #
ceph orch host add host2
ceph orch host add host3
ceph orch host add host2 10.10.0.102
ceph orch host add host3 10.10.0.103
It is best to explicitly provide the host IP address. If an IP is
not provided, then the host name will be immediately resolved via
DNS and that IP will be used.
One or more labels can also be included to immediately label the
new host. For example, by default the ``_admin`` label will make
@ -53,7 +57,7 @@ To add each new host to the cluster, perform two steps:
.. prompt:: bash #
ceph orch host add host4 _admin
ceph orch host add host4 10.10.0.104 --labels _admin
.. _cephadm-removing-hosts:
@ -174,21 +178,21 @@ Many hosts can be added at once using
---
service_type: host
addr: node-00
hostname: node-00
addr: 192.168.0.10
labels:
- example1
- example2
---
service_type: host
addr: node-01
hostname: node-01
addr: 192.168.0.11
labels:
- grafana
---
service_type: host
addr: node-02
hostname: node-02
addr: 192.168.0.12
This can be combined with service specifications (below) to create a cluster spec
file to deploy a whole cluster in one command. see ``cephadm bootstrap --apply-spec``
@ -286,23 +290,12 @@ There are two ways to customize this configuration for your environment:
Fully qualified domain names vs bare host names
===============================================
cephadm has very minimal requirements when it comes to resolving host
names etc. When cephadm initiates an ssh connection to a remote host,
the host name can be resolved in four different ways:
- a custom ssh config resolving the name to an IP
- via explicitly providing an IP address to cephadm: ``ceph orch host add <hostname> <IP>``
- automatic name resolution via DNS.
Ceph itself uses the command ``hostname`` to determine the name of the
current host.
.. note::
cephadm demands that the name of the host given via ``ceph orch host add``
equals the output of ``hostname`` on remote hosts.
Otherwise cephadm can't be sure, the host names returned by
Otherwise cephadm can't be sure that names returned by
``ceph * metadata`` match the hosts known to cephadm. This might result
in a :ref:`cephadm-stray-host` warning.

View File

@ -3148,10 +3148,6 @@ class CephContainer:
]
if isinstance(self.ctx.container_engine, Podman):
# podman adds the container *name* to /etc/hosts (for 127.0.1.1)
# by default, which makes python's socket.getfqdn() return that
# instead of a valid hostname.
cmd_args.append('--no-hosts')
if os.path.exists('/etc/ceph/podman-auth.json'):
cmd_args.append('--authfile=/etc/ceph/podman-auth.json')

View File

@ -1,5 +1,6 @@
import datetime
from copy import copy
import ipaddress
import json
import logging
from typing import TYPE_CHECKING, Dict, List, Iterator, Optional, Any, Tuple, Set, Mapping, cast, \
@ -11,6 +12,8 @@ from ceph.deployment.service_spec import ServiceSpec, PlacementSpec
from ceph.utils import str_to_datetime, datetime_to_str, datetime_now
from orchestrator import OrchestratorError, HostSpec, OrchestratorEvent, service_to_daemon_types
from .utils import resolve_ip
if TYPE_CHECKING:
from .module import CephadmOrchestrator
@ -28,6 +31,7 @@ class Inventory:
def __init__(self, mgr: 'CephadmOrchestrator'):
self.mgr = mgr
adjusted_addrs = False
# load inventory
i = self.mgr.get_store('inventory')
if i:
@ -36,6 +40,22 @@ class Inventory:
for k, v in self._inventory.items():
if 'hostname' not in v:
v['hostname'] = k
# convert legacy non-IP addr?
try:
ipaddress.ip_address(v.get('addr'))
except ValueError:
ip = resolve_ip(cast(str, v.get('addr')))
try:
ipaddress.ip_address(ip)
if not ip.startswith('127.0.'):
self.mgr.log.info(f"inventory: adjusted host {v['hostname']} addr '{v['addr']}' -> '{ip}'")
v['addr'] = ip
adjusted_addrs = True
except ValueError:
pass
if adjusted_addrs:
self.save()
else:
self._inventory = dict()
logger.debug('Loaded inventory %s' % self._inventory)

View File

@ -1383,10 +1383,10 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
)
]
def _check_valid_addr(self, host: str, addr: str) -> None:
def _check_valid_addr(self, host: str, addr: str) -> str:
# make sure hostname is resolvable before trying to make a connection
try:
utils.resolve_ip(addr)
ip_addr = utils.resolve_ip(addr)
except OrchestratorError as e:
msg = str(e) + f'''
You may need to supply an address for {addr}
@ -1417,6 +1417,7 @@ Then run the following:
errors = [_i.replace("ERROR: ", "") for _i in err if _i.startswith('ERROR')]
raise OrchestratorError('Host %s (%s) failed check(s): %s' % (
host, addr, errors))
return ip_addr
def _add_host(self, spec):
# type: (HostSpec) -> str
@ -1426,7 +1427,9 @@ Then run the following:
:param host: host name
"""
assert_valid_host(spec.hostname)
self._check_valid_addr(spec.hostname, spec.addr)
ip_addr = self._check_valid_addr(spec.hostname, spec.addr)
if spec.addr == spec.hostname and ip_addr:
spec.addr = ip_addr
# prime crush map?
if spec.location:
@ -1443,7 +1446,7 @@ Then run the following:
self.offline_hosts_remove(spec.hostname)
self.event.set() # refresh stray health check
self.log.info('Added host %s' % spec.hostname)
return "Added host '{}'".format(spec.hostname)
return "Added host '{}' with addr '{}'".format(spec.hostname, spec.addr)
@handle_orch_error
def add_host(self, spec: HostSpec) -> str:

View File

@ -99,7 +99,7 @@ class IngressService(CephService):
assert(d.ports)
servers.append({
'name': f"{spec.backend_service}.{rank}",
'ip': d.ip or resolve_ip(str(d.hostname)),
'ip': d.ip or resolve_ip(self.mgr.inventory.get_addr(str(d.hostname))),
'port': d.ports[0],
})
else:
@ -114,7 +114,7 @@ class IngressService(CephService):
servers = [
{
'name': d.name(),
'ip': d.ip or resolve_ip(str(d.hostname)),
'ip': d.ip or resolve_ip(self.mgr.inventory.get_addr(str(d.hostname))),
'port': d.ports[0],
} for d in daemons if d.ports
]
@ -232,7 +232,7 @@ class IngressService(CephService):
# other_ips in conf file and converter to ips
if host in hosts:
hosts.remove(host)
other_ips = [resolve_ip(h) for h in hosts]
other_ips = [resolve_ip(self.mgr.inventory.get_addr(h)) for h in hosts]
keepalived_conf = self.mgr.template.render(
'services/ingress/keepalived.conf.j2',
@ -243,7 +243,7 @@ class IngressService(CephService):
'interface': interface,
'state': state,
'other_ips': other_ips,
'host_ip': resolve_ip(host),
'host_ip': resolve_ip(self.mgr.inventory.get_addr(host)),
}
)

View File

@ -96,7 +96,7 @@ class IscsiService(CephService):
if not spec:
logger.warning('No ServiceSpec found for %s', dd)
continue
ip = utils.resolve_ip(dd.hostname)
ip = utils.resolve_ip(self.mgr.inventory.get_addr(dd.hostname))
# IPv6 URL encoding requires square brackets enclosing the ip
if type(ip_address(ip)) is IPv6Address:
ip = f'[{ip}]'

View File

@ -70,9 +70,9 @@ def wait(m, c):
@contextmanager
def with_host(m: CephadmOrchestrator, name, refresh_hosts=True):
def with_host(m: CephadmOrchestrator, name, addr='1.2.3.4', refresh_hosts=True):
# type: (CephadmOrchestrator, str) -> None
with mock.patch("cephadm.utils.resolve_ip"):
with mock.patch("cephadm.utils.resolve_ip", return_value=addr):
wait(m, m.add_host(HostSpec(hostname=name)))
if refresh_hosts:
CephadmServe(m)._refresh_hosts_and_daemons()

View File

@ -84,19 +84,19 @@ class TestCephadm(object):
def test_host(self, cephadm_module):
assert wait(cephadm_module, cephadm_module.get_hosts()) == []
with with_host(cephadm_module, 'test'):
assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', 'test')]
assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '1.2.3.4')]
# Be careful with backward compatibility when changing things here:
assert json.loads(cephadm_module.get_store('inventory')) == \
{"test": {"hostname": "test", "addr": "test", "labels": [], "status": ""}}
{"test": {"hostname": "test", "addr": "1.2.3.4", "labels": [], "status": ""}}
with with_host(cephadm_module, 'second'):
with with_host(cephadm_module, 'second', '1.2.3.5'):
assert wait(cephadm_module, cephadm_module.get_hosts()) == [
HostSpec('test', 'test'),
HostSpec('second', 'second')
HostSpec('test', '1.2.3.4'),
HostSpec('second', '1.2.3.5')
]
assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', 'test')]
assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '1.2.3.4')]
assert wait(cephadm_module, cephadm_module.get_hosts()) == []
@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
@ -316,7 +316,7 @@ class TestCephadm(object):
with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd:
CephadmServe(cephadm_module)._check_daemons()
_mon_cmd.assert_any_call(
{'prefix': 'dashboard set-grafana-api-url', 'value': 'https://test:3000'},
{'prefix': 'dashboard set-grafana-api-url', 'value': 'https://1.2.3.4:3000'},
None)
@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
@ -987,12 +987,12 @@ class TestCephadm(object):
assert "Host 'test' not found" in err
out = wait(cephadm_module, cephadm_module.get_hosts())[0].to_json()
assert out == HostSpec('test', 'test', status='Offline').to_json()
assert out == HostSpec('test', '1.2.3.4', status='Offline').to_json()
_get_connection.side_effect = None
assert CephadmServe(cephadm_module)._check_host('test') is None
out = wait(cephadm_module, cephadm_module.get_hosts())[0].to_json()
assert out == HostSpec('test', 'test').to_json()
assert out == HostSpec('test', '1.2.3.4').to_json()
@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
def test_dont_touch_offline_or_maintenance_host_daemons(self, cephadm_module):

View File

@ -16,6 +16,11 @@ from orchestrator import OrchestratorError
from orchestrator._interface import DaemonDescription
class FakeInventory:
def get_addr(self, name: str) -> str:
return '1.2.3.4'
class FakeMgr:
def __init__(self):
self.config = ''
@ -23,6 +28,7 @@ class FakeMgr:
self.mon_command = MagicMock(side_effect=self._check_mon_command)
self.template = MagicMock()
self.log = MagicMock()
self.inventory = FakeInventory()
def _check_mon_command(self, cmd_dict, inbuf=None):
prefix = cmd_dict.get('prefix')

View File

@ -6,7 +6,6 @@ import collections
import errno
import logging
import os
import socket
import ssl
import sys
import tempfile
@ -107,6 +106,8 @@ class CherryPyConfig(object):
else:
server_port = self.get_localized_module_option('ssl_server_port', 8443) # type: ignore
if server_addr == '::':
server_addr = self.get_mgr_ip() # type: ignore
if server_addr is None:
raise ServerConfigException(
'no server_addr configured; '
@ -191,7 +192,7 @@ class CherryPyConfig(object):
uri = "{0}://{1}:{2}{3}/".format(
'https' if use_ssl else 'http',
socket.getfqdn(server_addr if server_addr != '::' else ''),
server_addr,
server_port,
self.url_prefix
)

View File

@ -19,6 +19,7 @@ from collections import defaultdict
from enum import IntEnum
import rados
import re
import socket
import sys
import time
from ceph_argparse import CephArgtype
@ -795,6 +796,21 @@ class MgrStandbyModule(ceph_module.BaseMgrStandbyModule, MgrModuleLoggingMixin):
def get_active_uri(self) -> str:
return self._ceph_get_active_uri()
def get_mgr_ip(self) -> str:
hostname = socket.gethostname()
try:
r = socket.getaddrinfo(hostname, None, flags=socket.AI_CANONNAME,
type=socket.SOCK_STREAM)
# pick first v4 IP, if present, as long as it is not 127.0.{0,1}.1
for a in r:
if a[4][0] in ['127.0.1.1', '127.0.0.1']:
continue
if a[0] == socket.AF_INET:
return a[4][0]
except socket.gaierror as e:
pass
return hostname
def get_localized_module_option(self, key: str, default: OptionValue = None) -> OptionValue:
r = self._ceph_get_module_option(key, self.get_mgr_id())
if r is None:
@ -1376,6 +1392,21 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin):
def get_ceph_conf_path(self) -> str:
return self._ceph_get_ceph_conf_path()
def get_mgr_ip(self) -> str:
hostname = socket.gethostname()
try:
r = socket.getaddrinfo(hostname, None, flags=socket.AI_CANONNAME,
type=socket.SOCK_STREAM)
# pick first v4 IP, if present, as long as it is not 127.0.{0,1}.1
for a in r:
if a[4][0] in ['127.0.1.1', '127.0.0.1']:
continue
if a[0] == socket.AF_INET:
return a[4][0]
except socket.gaierror as e:
pass
return hostname
def get_ceph_option(self, key: str) -> OptionValue:
return self._ceph_get_option(key)

View File

@ -146,9 +146,21 @@ class NFSCluster:
for cluster in completion.result:
if self.cluster_id == cluster.service_id():
try:
if cluster.ip:
ip = cluster.ip
else:
c = self.mgr.get_hosts()
orchestrator.raise_if_exception(c)
hosts = [h for h in c.result
if h.hostname == cluster.hostname]
if hosts:
ip = resolve_ip(hosts[0].addr)
else:
# sigh
ip = resolve_ip(cluster.hostname)
backends.append({
"hostname": cluster.hostname,
"ip": cluster.ip or resolve_ip(cluster.hostname),
"ip": ip,
"port": cluster.ports[0]
})
except orchestrator.OrchestratorError:

View File

@ -329,7 +329,11 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
return cast(str, self.get_module_option("orchestrator"))
@_cli_write_command('orch host add')
def _add_host(self, hostname: str, addr: Optional[str] = None, labels: Optional[List[str]] = None, maintenance: Optional[bool] = False) -> HandleCommandResult:
def _add_host(self,
hostname: str,
addr: Optional[str] = None,
labels: Optional[List[str]] = None,
maintenance: Optional[bool] = False) -> HandleCommandResult:
"""Add a host"""
_status = 'maintenance' if maintenance else ''

View File

@ -5,7 +5,6 @@ import json
import math
import os
import re
import socket
import threading
import time
from mgr_module import CLIReadCommand, MgrModule, MgrStandbyModule, PG_STATES, Option, ServiceInfoT
@ -1383,10 +1382,9 @@ class Module(MgrModule):
# Publish the URI that others may use to access the service we're
# about to start serving
self.set_uri('http://{0}:{1}/'.format(
socket.getfqdn() if server_addr in ['::', '0.0.0.0'] else server_addr,
server_port
))
if server_addr in ['::', '0.0.0.0']:
server_addr = self.get_mgr_ip()
self.set_uri('http://{0}:{1}/'.format(server_addr, server_port))
cherrypy.config.update({
'server.socket_host': server_addr,