Merge pull request #52691 from adk3798/nvme-of-cleanup

mgr/cephadm: nvme-of follow up work

Reviewed-by: John Mulligan <jmulligan@redhat.com>
This commit is contained in:
Adam King 2023-08-15 15:37:37 -04:00 committed by GitHub
commit 307cc750eb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 36 additions and 15 deletions

View File

@ -0,0 +1,8 @@
tasks:
- cephadm.shell:
host.a:
- ceph osd pool create foo
- rbd pool init foo
- ceph orch apply nvmeof foo
- cephadm.wait_for_service:
service: nvmeof.foo

View File

@ -1024,8 +1024,7 @@ class CephNvmeof(object):
def get_container_mounts(data_dir: str) -> Dict[str, str]:
mounts = dict()
mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
# mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
mounts['/etc/ceph/ceph.client.admin.keyring'] = '/etc/ceph/keyring:z' # TODO: FIXME
mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
mounts[os.path.join(data_dir, 'ceph-nvmeof.conf')] = '/src/ceph-nvmeof.conf:z'
mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
mounts['/dev/hugepages'] = '/dev/hugepages'

View File

@ -107,7 +107,7 @@ os._exit = os_exit_noop # type: ignore
DEFAULT_IMAGE = 'quay.io/ceph/ceph'
DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.43.0'
DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.5.0'
DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:0.0.1'
DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:0.0.2'
DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0'
DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0'
DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.25.0'

View File

@ -920,7 +920,18 @@ class CephadmServe:
while daemons_to_remove and not _ok_to_stop(daemons_to_remove):
# let's find a subset that is ok-to-stop
daemons_to_remove.pop()
non_error_daemon_index = -1
# prioritize removing daemons in error state
for i, dmon in enumerate(daemons_to_remove):
if dmon.status != DaemonDescriptionStatus.error:
non_error_daemon_index = i
break
if non_error_daemon_index != -1:
daemons_to_remove.pop(non_error_daemon_index)
else:
# all daemons in list are in error state
# we should be able to remove all of them
break
for d in daemons_to_remove:
r = True
assert d.hostname is not None

View File

@ -25,25 +25,26 @@ class NvmeofService(CephService):
assert self.TYPE == daemon_spec.daemon_type
spec = cast(NvmeofServiceSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
igw_id = daemon_spec.daemon_id
nvmeof_gw_id = daemon_spec.daemon_id
host_ip = self.mgr.inventory.get_addr(daemon_spec.host)
# TODO: fixme, we should restrict the permissions here to only the necessary ones
keyring = self.get_keyring_with_caps(self.get_auth_entity(igw_id),
['mon', 'allow *',
'mds', 'allow *',
'mgr', 'allow *',
'osd', 'allow *'])
keyring = self.get_keyring_with_caps(self.get_auth_entity(nvmeof_gw_id),
['mon', 'profile rbd',
'osd', 'allow all tag rbd *=*'])
# TODO: check if we can force jinja2 to generate dicts with double quotes instead of using json.dumps
transport_tcp_options = json.dumps(spec.transport_tcp_options) if spec.transport_tcp_options else None
name = '{}.{}'.format(utils.name_to_config_section('nvmeof'), nvmeof_gw_id)
rados_id = name[len('client.'):] if name.startswith('client.') else name
context = {
'spec': spec,
'name': '{}.{}'.format(utils.name_to_config_section('nvmeof'), igw_id),
'addr': self.mgr.get_mgr_ip(),
'name': name,
'addr': host_ip,
'port': spec.port,
'log_level': 'WARN',
'rpc_socket': '/var/tmp/spdk.sock',
'transport_tcp_options': transport_tcp_options
'transport_tcp_options': transport_tcp_options,
'rados_id': rados_id
}
gw_conf = self.mgr.template.render('services/nvmeof/ceph-nvmeof.conf.j2', context)

View File

@ -11,6 +11,7 @@ state_update_interval_sec = 5
[ceph]
pool = {{ spec.pool }}
config_file = /etc/ceph/ceph.conf
id = {{ rados_id }}
[mtls]
server_key = {{ spec.server_key }}

View File

@ -368,7 +368,7 @@ class TestNVMEOFService:
def test_nvmeof_dashboard_config(self, mock_resolve_ip):
pass
@patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '192.168.100.100')
@patch("cephadm.inventory.Inventory.get_addr", lambda _, __: '192.168.100.100')
@patch("cephadm.serve.CephadmServe._run_cephadm")
@patch("cephadm.module.CephadmOrchestrator.get_unique_name")
def test_nvmeof_config(self, _get_name, _run_cephadm, cephadm_module: CephadmOrchestrator):
@ -393,6 +393,7 @@ state_update_interval_sec = 5
[ceph]
pool = {pool}
config_file = /etc/ceph/ceph.conf
id = nvmeof.{nvmeof_daemon_id}
[mtls]
server_key = ./server.key