From 99a33b1e1ec57fd7599021bd4fec69fb84944364 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 3 Jan 2023 13:55:16 +0100 Subject: [PATCH 1/9] cephadm/box: change network_mode to bridge Signed-off-by: Pere Diaz Bou --- src/cephadm/box/DockerfileDocker | 2 +- src/cephadm/box/box.py | 16 ++++++++++++---- src/cephadm/box/docker-compose-podman.yml | 4 ++-- src/cephadm/box/docker/ceph/Dockerfile | 2 +- src/cephadm/box/util.py | 10 +++++----- 5 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/cephadm/box/DockerfileDocker b/src/cephadm/box/DockerfileDocker index c1aed6832d8..f64b48e4c4d 100644 --- a/src/cephadm/box/DockerfileDocker +++ b/src/cephadm/box/DockerfileDocker @@ -30,4 +30,4 @@ EXPOSE 22 FROM centos-systemd-docker WORKDIR /root -CMD [ "/usr/sbin/init" ] \ No newline at end of file +CMD [ "/usr/sbin/init" ] diff --git a/src/cephadm/box/box.py b/src/cephadm/box/box.py index 347df553f8f..3549d9c9e69 100755 --- a/src/cephadm/box/box.py +++ b/src/cephadm/box/box.py @@ -106,7 +106,7 @@ def check_selinux(): class Cluster(Target): _help = 'Manage docker cephadm boxes' - actions = ['bootstrap', 'start', 'down', 'list', 'sh', 'setup', 'cleanup'] + actions = ['bootstrap', 'start', 'down', 'list', 'sh', 'setup', 'cleanup', 'doctor'] def set_args(self): self.parser.add_argument( @@ -136,12 +136,15 @@ class Cluster(Target): def cleanup(self): cleanup_box() + def _set_cephadm_path(self): + cephadm_path = os.environ.get('CEPHADM_PATH') + os.symlink('/cephadm/cephadm.py', cephadm_path) + @ensure_inside_container def bootstrap(self): print('Running bootstrap on seed') + self._set_cephadm_path() cephadm_path = os.environ.get('CEPHADM_PATH') - os.symlink('/cephadm/cephadm', cephadm_path) - if engine() == 'docker': # restart to ensure docker is using daemon.json @@ -254,7 +257,7 @@ class Cluster(Target): dcflags += f' -f {Config.get("docker_v1_yaml")}' run_shell_command(f'{engine_compose()} {dcflags} up --scale hosts={hosts} -d') else: - run_shell_command(f'{engine_compose()} -f {Config.get("podman_yaml")} --podman-run-args "--group-add keep-groups --network=host --device /dev/fuse -it {loop_device_arg}" up --scale hosts={hosts} -d') + run_shell_command(f'{engine_compose()} -f {Config.get("podman_yaml")} --podman-run-args "--group-add keep-groups --device /dev/fuse -it {loop_device_arg}" up --scale hosts={hosts} -d') run_shell_command('sudo sysctl net.ipv4.conf.all.forwarding=1') run_shell_command('sudo iptables -P FORWARD ACCEPT') @@ -321,6 +324,11 @@ class Cluster(Target): print('Bootstrap finished successfully') + @ensure_outside_container + def doctor(self): + self._set_cephadm_path() + + @ensure_outside_container def down(self): if engine() == 'podman': diff --git a/src/cephadm/box/docker-compose-podman.yml b/src/cephadm/box/docker-compose-podman.yml index 4e941a3eb0e..b96a16a873d 100644 --- a/src/cephadm/box/docker-compose-podman.yml +++ b/src/cephadm/box/docker-compose-podman.yml @@ -51,10 +51,10 @@ services: hosts: extends: service: cephadm-host-base - scale: 1 + scale: 2 volumes: var-lib-docker: -network_mode: public +network_mode: bridge diff --git a/src/cephadm/box/docker/ceph/Dockerfile b/src/cephadm/box/docker/ceph/Dockerfile index b18aee4957f..c8a8d6fec6a 100644 --- a/src/cephadm/box/docker/ceph/Dockerfile +++ b/src/cephadm/box/docker/ceph/Dockerfile @@ -1,2 +1,2 @@ -FROM quay.ceph.io/ceph-ci/ceph:master +FROM quay.ceph.io/ceph-ci/ceph:main EXPOSE 8443 diff --git a/src/cephadm/box/util.py b/src/cephadm/box/util.py index b5a291d3985..a2284d475af 100644 --- a/src/cephadm/box/util.py +++ b/src/cephadm/box/util.py @@ -100,6 +100,7 @@ def run_shell_command(command: str, expect_error=False) -> str: ) out = '' + err = '' # let's read when output comes so it is in real time while True: # TODO: improve performance of this part, I think this part is a problem @@ -111,15 +112,14 @@ def run_shell_command(command: str, expect_error=False) -> str: sys.stdout.write(pout) sys.stdout.flush() out += pout + process.wait() - # no last break line - err = ( - process.stderr.read().decode().rstrip() - ) # remove trailing whitespaces and new lines + err += process.stderr.read().decode('latin1').strip() out = out.strip() if process.returncode != 0 and not expect_error: + err = colored(err, Colors.FAIL); raise RuntimeError(f'Failed command: {command}\n{err}') sys.exit(1) return out @@ -144,7 +144,7 @@ def run_cephadm_shell_command(command: str, expect_error=False) -> str: config = Config.get('config') keyring = Config.get('keyring') - with_cephadm_image = 'CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:master' + with_cephadm_image = 'CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:main' out = run_shell_command( f'{with_cephadm_image} cephadm --verbose shell --config {config} --keyring {keyring} -- {command}', expect_error, From eadca3a8936bc61c7109a251670fdd26bd4a20b2 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 3 Jan 2023 17:50:05 +0100 Subject: [PATCH 2/9] cephadm/box: remove podman-compose podman compose was hard to maintain and it loooks like it is no longer mainted (or stalled). Furthermore, the --scale flag didn't work with one pod alone, eventhough there was an option to disable creation of pods removing podman-compose is great to have more "low-level" flexibility. Signed-off-by: Pere Diaz Bou --- src/cephadm/box/DockerfilePodman | 1 + src/cephadm/box/box.py | 81 ++++++++++++++++------ src/cephadm/box/docker-compose-podman.yml | 60 ----------------- src/cephadm/box/host.py | 17 +++-- src/cephadm/box/osd.py | 3 +- src/cephadm/box/util.py | 82 +++++++++++++++++------ 6 files changed, 137 insertions(+), 107 deletions(-) delete mode 100644 src/cephadm/box/docker-compose-podman.yml diff --git a/src/cephadm/box/DockerfilePodman b/src/cephadm/box/DockerfilePodman index 04d428ae338..61db237f818 100644 --- a/src/cephadm/box/DockerfilePodman +++ b/src/cephadm/box/DockerfilePodman @@ -9,6 +9,7 @@ FROM fedora:34 ENV CEPHADM_PATH=/usr/local/sbin/cephadm +RUN ln -s /ceph/src/cephadm/cephadm.py $CEPHADM_PATH # NOTE: assume path of ceph volume # Don't include container-selinux and remove # directories used by yum that are just taking # up space. diff --git a/src/cephadm/box/box.py b/src/cephadm/box/box.py index 3549d9c9e69..1e42a888797 100755 --- a/src/cephadm/box/box.py +++ b/src/cephadm/box/box.py @@ -2,10 +2,12 @@ import argparse import os import stat +import json import sys import host import osd from util import ( + BoxType, Config, Target, ensure_inside_container, @@ -19,7 +21,8 @@ from util import ( colored, engine, engine_compose, - Colors + Colors, + get_seed_name ) CEPH_IMAGE = 'quay.ceph.io/ceph-ci/ceph:main' @@ -103,6 +106,38 @@ def check_selinux(): print(colored('selinux should be disabled, please disable it if you ' 'don\'t want unexpected behaviour.', Colors.WARNING)) +def setup_podman_env(hosts: int = 1): + network_name = 'box_network' + networks = run_shell_command('podman network ls') + if network_name not in networks: + run_shell_command(f'podman network create -d bridge {network_name}') + + run_default_options = """--group-add keep-groups --device /dev/fuse -it -d \\ + --cap-add SYS_ADMIN --cap-add NET_ADMIN --cap-add SYS_TIME --cap-add SYS_RAWIO --cap-add MKNOD \\ + --cap-add NET_RAW --cap-add SETUID --cap-add SETGID --cap-add CHOWN --cap-add SYS_PTRACE \\ + --cap-add SYS_TTY_CONFIG --cap-add CAP_AUDIT_WRITE --cap-add CAP_AUDIT_CONTROL \\ + -e CEPH_BRANCH=main -v /home/peristocles/redhat/cephbare/origin/main:/ceph:z \\ + -v /home/peristocles/redhat/cephbare/origin/main/src/cephadm:/cephadm:z \\ + -v /run/udev:/run/udev -v /sys/dev/block:/sys/dev/block -v /sys/fs/cgroup:/sys/fs/cgroup \\ + -v /dev/fuse:/dev/fuse -v /dev/disk:/dev/disk -v /dev/mapper:/dev/mapper \\ + -v /dev/mapper/control:/dev/mapper/control \\ + --stop-signal RTMIN+3 -m 20g cephadm-box \\ + """ + def add_option(dest, src): + dest = f'{src} {dest}' + return dest + + for host in range(hosts+1): # 0 will be the seed + options = run_default_options + options = add_option(options, f'--name box_hosts_{host}') + if host == 0: + options = add_option(options, f'-p 8443:8443') # dashboard + options = add_option(options, f'-p 3000:3000') # grafana + options = add_option(options, f'-p 9093:9093') # alertmanager + options = add_option(options, f'-p 9095:9095') # prometheus + options = add_option(options, f'--network {network_name}') + + run_shell_command(f'podman run {options}') class Cluster(Target): _help = 'Manage docker cephadm boxes' @@ -123,9 +158,6 @@ class Cluster(Target): @ensure_outside_container def setup(self): - if engine() == 'podman': - run_shell_command('pip3 install https://github.com/containers/podman-compose/archive/devel.tar.gz') - check_cgroups() check_selinux() @@ -144,7 +176,7 @@ class Cluster(Target): def bootstrap(self): print('Running bootstrap on seed') self._set_cephadm_path() - cephadm_path = os.environ.get('CEPHADM_PATH') + cephadm_path = str(os.environ.get('CEPHADM_PATH')) if engine() == 'docker': # restart to ensure docker is using daemon.json @@ -179,9 +211,9 @@ class Cluster(Target): skip_dashboard = '--skip-dashboard' if Config.get('skip-dashboard') else '' fsid = Config.get('fsid') - config_folder = Config.get('config_folder') - config = Config.get('config') - keyring = Config.get('keyring') + config_folder = str(Config.get('config_folder')) + config = str(Config.get('config')) + keyring = str(Config.get('keyring')) if not os.path.exists(config_folder): os.mkdir(config_folder) @@ -219,8 +251,8 @@ class Cluster(Target): def start(self): check_cgroups() check_selinux() - osds = Config.get('osds') - hosts = Config.get('hosts') + osds = int(Config.get('osds')) + hosts = int(Config.get('hosts')) # ensure boxes don't exist self.down() @@ -257,7 +289,7 @@ class Cluster(Target): dcflags += f' -f {Config.get("docker_v1_yaml")}' run_shell_command(f'{engine_compose()} {dcflags} up --scale hosts={hosts} -d') else: - run_shell_command(f'{engine_compose()} -f {Config.get("podman_yaml")} --podman-run-args "--group-add keep-groups --device /dev/fuse -it {loop_device_arg}" up --scale hosts={hosts} -d') + setup_podman_env(hosts=hosts) run_shell_command('sudo sysctl net.ipv4.conf.all.forwarding=1') run_shell_command('sudo iptables -P FORWARD ACCEPT') @@ -270,14 +302,14 @@ class Cluster(Target): systemctl status --no-pager chronyd """ for h in range(hosts): - run_dc_shell_commands(h + 1, 'hosts', chronyd_setup) - run_dc_shell_commands(1, 'seed', chronyd_setup) + run_dc_shell_commands(h + 1, BoxType.HOST, chronyd_setup) + run_dc_shell_commands(1, BoxType.SEED, chronyd_setup) print('Seting up host ssh servers') for h in range(hosts): - host._setup_ssh('hosts', h + 1) + host._setup_ssh(BoxType.HOST, h + 1) - host._setup_ssh('seed', 1) + host._setup_ssh(BoxType.SEED, 1) verbose = '-v' if Config.get('verbose') else '' skip_deploy = '--skip-deploy-osds' if Config.get('skip-deploy-osds') else '' @@ -293,7 +325,7 @@ class Cluster(Target): f'{skip_dashboard} ' f'{skip_monitoring_stack} ' ) - run_dc_shell_command(box_bootstrap_command, 1, 'seed') + run_dc_shell_command(box_bootstrap_command, 1, BoxType.SEED) info = get_boxes_container_info() ips = info['ips'] @@ -318,7 +350,7 @@ class Cluster(Target): info = get_boxes_container_info(with_seed=True) if engine() == 'docker': for i in range(info['size']): - if 'seed' in info['container_names'][i]: + if get_seed_name() in info['container_names'][i]: dashboard_ip = info["ips"][i] print(colored(f'dashboard available at https://{dashboard_ip}:8443', Colors.OKGREEN)) @@ -332,7 +364,18 @@ class Cluster(Target): @ensure_outside_container def down(self): if engine() == 'podman': - run_shell_command(f'{engine_compose()} -f {Config.get("podman_yaml")} down') + containers = json.loads(run_shell_command('podman container ls --format json')) + for container in containers: + for name in container['Names']: + if name.startswith('box_hosts_'): + run_shell_command(f'podman container kill {name}') + run_shell_command(f'podman container rm {name}') + pods = json.loads(run_shell_command('podman pod ls --format json')) + for pod in pods: + if 'Name' in pod and pod['Name'].startswith('box_pod_host'): + name = pod['Name'] + run_shell_command(f'podman pod kill {name}') + run_shell_command(f'podman pod rm {name}') else: run_shell_command(f'{engine_compose()} -f {Config.get("docker_yaml")} down') print('Successfully killed all boxes') @@ -351,7 +394,7 @@ class Cluster(Target): # we need verbose to see the prompt after running shell command Config.set('verbose', True) print('Seed bash') - run_shell_command(f'{engine_compose()} -f {Config.get("docker_yaml")} exec seed bash') + run_shell_command(f'{engine_compose()} -f {Config.get("docker_yaml")} exec {get_seed_name()} bash') targets = { diff --git a/src/cephadm/box/docker-compose-podman.yml b/src/cephadm/box/docker-compose-podman.yml deleted file mode 100644 index b96a16a873d..00000000000 --- a/src/cephadm/box/docker-compose-podman.yml +++ /dev/null @@ -1,60 +0,0 @@ -version: "2.4" -services: - cephadm-host-base: - build: - context: . - environment: - - CEPH_BRANCH=master - image: cephadm-box - # probably not needed with rootless Docker and cgroups v2 - # privileged: true - cap_add: - - SYS_ADMIN - - NET_ADMIN - - SYS_TIME - - SYS_RAWIO - - MKNOD - - NET_RAW - - SETUID - - SETGID - - CHOWN - - SYS_PTRACE - - SYS_TTY_CONFIG - - CAP_AUDIT_WRITE - - CAP_AUDIT_CONTROL - stop_signal: RTMIN+3 - volumes: - - ../../../:/ceph:z - - ..:/cephadm:z - # - ./daemon.json:/etc/docker/daemon.json - # dangerous, maybe just map the loopback - # https://stackoverflow.com/questions/36880565/why-dont-my-udev-rules-work-inside-of-a-running-docker-container - - /run/udev:/run/udev - - /sys/dev/block:/sys/dev/block - - /sys/fs/cgroup:/sys/fs/cgroup - - /dev/fuse:/dev/fuse - - /dev/disk:/dev/disk - - /dev/mapper:/dev/mapper - - /dev/mapper/control:/dev/mapper/control - mem_limit: "20g" - scale: -1 - seed: - extends: - service: cephadm-host-base - ports: - - "2222:22" - - "3000:3000" - - "8888:8888" - - "8443:8443" - - "9095:9095" - scale: 1 - hosts: - extends: - service: cephadm-host-base - scale: 2 - - -volumes: - var-lib-docker: - -network_mode: bridge diff --git a/src/cephadm/box/host.py b/src/cephadm/box/host.py index 6eb0aed6bda..cb663a61d95 100644 --- a/src/cephadm/box/host.py +++ b/src/cephadm/box/host.py @@ -10,10 +10,11 @@ from util import ( run_dc_shell_command, run_shell_command, engine, + BoxType ) -def _setup_ssh(container_type, container_index): +def _setup_ssh(container_type: BoxType, container_index): if inside_container(): if not os.path.exists('/root/.ssh/known_hosts'): run_shell_command('ssh-keygen -b 2048 -t rsa -f /root/.ssh/id_rsa -q -N ""') @@ -28,7 +29,7 @@ def _setup_ssh(container_type, container_index): print('Redirecting to _setup_ssh to container') verbose = '-v' if Config.get('verbose') else '' run_dc_shell_command( - f'/cephadm/box/box.py {verbose} --engine {engine()} host setup_ssh {container_type} {container_index}', + f'/cephadm/box/box.py {verbose} --engine {engine()} host setup_ssh {BoxType.to_string(container_type)} {container_index}', container_index, container_type, ) @@ -48,9 +49,9 @@ def _add_hosts(ips: Union[List[str], str], hostnames: Union[List[str], str]): hostnames = ' '.join(hostnames) hostnames = f'{hostnames}' run_dc_shell_command( - f'/cephadm/box/box.py {verbose} --engine {engine()} host add_hosts seed 1 --ips {ips} --hostnames {hostnames}', + f'/cephadm/box/box.py {verbose} --engine {engine()} host add_hosts seed {BoxType.to_string(BoxType.SEED)} --ips {ips} --hostnames {hostnames}', 1, - 'seed', + BoxType.SEED, ) @@ -74,9 +75,9 @@ def _copy_cluster_ssh_key(ips: Union[List[str], str]): ips = f'{ips}' # assume we only have one seed run_dc_shell_command( - f'/cephadm/box/box.py {verbose} --engine {engine()} host copy_cluster_ssh_key seed 1 --ips {ips}', + f'/cephadm/box/box.py {verbose} --engine {engine()} host copy_cluster_ssh_key {BoxType.to_string(BoxType.SEED)} 1 --ips {ips}', 1, - 'seed', + BoxType.SEED, ) @@ -98,7 +99,9 @@ class Host(Target): ) def setup_ssh(self): - _setup_ssh(Config.get('container_type') ,Config.get('container_index')) + type_ = Config.get('container_type') + index = Config.get('container_index') + _setup_ssh(type_, index) def add_hosts(self): ips = Config.get('ips') diff --git a/src/cephadm/box/osd.py b/src/cephadm/box/osd.py index c07a926e0e0..f9add66046c 100644 --- a/src/cephadm/box/osd.py +++ b/src/cephadm/box/osd.py @@ -3,6 +3,7 @@ import os from typing import Dict from util import ( + BoxType, Config, Target, ensure_inside_container, @@ -129,7 +130,7 @@ def deploy_osds_in_vg(vg: str): verbose = '-v' if Config.get('verbose') else '' print('Redirecting deploy osd in vg to inside container') run_dc_shell_command( - f'/cephadm/box/box.py {verbose} --engine {engine()} osd deploy --vg {vg}', 1, 'seed' + f'/cephadm/box/box.py {verbose} --engine {engine()} osd deploy --vg {vg}', 1, BoxType.SEED ) diff --git a/src/cephadm/box/util.py b/src/cephadm/box/util.py index a2284d475af..eb69c475917 100644 --- a/src/cephadm/box/util.py +++ b/src/cephadm/box/util.py @@ -2,6 +2,7 @@ import json import os import subprocess import sys +import enum from typing import Any, Callable, Dict class Colors: @@ -42,7 +43,6 @@ class Config: def add_args(args: Dict[str, str]) -> None: Config.args.update(args) - class Target: def __init__(self, argv, subparsers): self.argv = argv @@ -91,7 +91,7 @@ def ensure_inside_container(func) -> bool: def colored(msg, color: Colors): return color + msg + Colors.ENDC -def run_shell_command(command: str, expect_error=False) -> str: +def run_shell_command(command: str, expect_error=False, verbose=True) -> str: if Config.get('verbose'): print(f'{colored("Running command", Colors.HEADER)}: {colored(command, Colors.OKBLUE)}') @@ -108,7 +108,7 @@ def run_shell_command(command: str, expect_error=False) -> str: if pout == '' and process.poll() is not None: break if pout: - if Config.get('verbose'): + if Config.get('verbose') and verbose: sys.stdout.write(pout) sys.stdout.flush() out += pout @@ -125,7 +125,31 @@ def run_shell_command(command: str, expect_error=False) -> str: return out -def run_dc_shell_commands(index, box_type, commands: str, expect_error=False) -> str: +class BoxType(enum.IntEnum): + SEED = 0 # where we bootstrap cephadm + HOST = 1 + @staticmethod + def to_enum(value: str): + if value == 'seed': + return BoxType.SEED + elif value == 'host': + return BoxType.HOST + else: + print(f'Wrong container type {value}') + sys.exit(1) + + @staticmethod + def to_string(box_type): + if box_type == BoxType.SEED: + return 'seed' + elif box_type == BoxType.HOST: + return 'host' + else: + print(f'Wrong container type {type_}') + sys.exit(1) + + +def run_dc_shell_commands(index, box_type: BoxType, commands: str, expect_error=False) -> str: for command in commands.split('\n'): command = command.strip() if not command: @@ -153,9 +177,16 @@ def run_cephadm_shell_command(command: str, expect_error=False) -> str: def run_dc_shell_command( - command: str, index: int, box_type: str, expect_error=False + command: str, index: int, box_type: BoxType, expect_error=False ) -> str: - container_id = get_container_id(f'{box_type}_{index}') + box_type_str = 'box_hosts' + if box_type == BoxType.SEED: + index = 0 + if engine() == 'docker': + box_type_str = 'seed' + index = 1 + + container_id = get_container_id(f'{box_type_str}_{index}') print(container_id) out = run_shell_command( f'{engine()} exec -it {container_id} {command}', expect_error @@ -174,27 +205,38 @@ def engine(): def engine_compose(): return f'{engine()}-compose' +def get_seed_name(): + if engine() == 'docker': + return 'seed' + elif engine() == 'podman': + return 'box_hosts_0' + else: + print(f'unkown engine {engine()}') + sys.exit(1) + + @ensure_outside_container def get_boxes_container_info(with_seed: bool = False) -> Dict[str, Any]: # NOTE: this could be cached - IP = 0 - CONTAINER_NAME = 1 - HOSTNAME = 2 - # fstring extrapolation will mistakenly try to extrapolate inspect options - ips_query = engine() + " inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}} %tab% {{.Name}} %tab% {{.Config.Hostname}}' $("+ engine() + " ps -aq) | sed 's#%tab%#\t#g' | sed 's#/##g' | sort -t . -k 1,1n -k 2,2n -k 3,3n -k 4,4n" - out = run_shell_command(ips_query) + ips_query = engine() + " inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}} %tab% {{.Name}} %tab% {{.Config.Hostname}}' $("+ engine() + " ps -aq) --format json" + containers = json.loads(run_shell_command(ips_query, verbose=False)) # FIXME: if things get more complex a class representing a container info might be useful, # for now representing data this way is faster. info = {'size': 0, 'ips': [], 'container_names': [], 'hostnames': []} - for line in out.split('\n'): - container = line.split() + for container in containers: # Most commands use hosts only - name_filter = 'box_' if with_seed else 'box_hosts' - if container[1].strip()[: len(name_filter)] == name_filter: + name = container['Name'] + if name.startswith('box_hosts'): + if not with_seed and name == get_seed_name(): + continue info['size'] += 1 - info['ips'].append(container[IP]) - info['container_names'].append(container[CONTAINER_NAME]) - info['hostnames'].append(container[HOSTNAME]) + print(container['NetworkSettings']) + if 'Networks' in container['NetworkSettings']: + info['ips'].append(container['NetworkSettings']['Networks']['box_network']['IPAddress']) + else: + info['ips'].append('n/a') + info['container_names'].append(name) + info['hostnames'].append(container['Config']['Hostname']) return info @@ -202,7 +244,7 @@ def get_orch_hosts(): if inside_container(): orch_host_ls_out = run_cephadm_shell_command('ceph orch host ls --format json') else: - orch_host_ls_out = run_dc_shell_command('cephadm shell --keyring /etc/ceph/ceph.keyring --config /etc/ceph/ceph.conf -- ceph orch host ls --format json', 1, 'seed') + orch_host_ls_out = run_dc_shell_command('cephadm shell --keyring /etc/ceph/ceph.keyring --config /etc/ceph/ceph.conf -- ceph orch host ls --format json', 1, get_seed_name()) sp = orch_host_ls_out.split('\n') orch_host_ls_out = sp[len(sp) - 1] print('xd', orch_host_ls_out) From 23c737fd71384a4413f015378793f5d7a60cc637 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 4 Jan 2023 09:24:29 +0100 Subject: [PATCH 3/9] cephadm/box: podman osd creation with raw loopback Based on Zack's changes on https://github.com/zmc/ceph/tree/wip-box-rootless-podman this changes introduce the creation of OSDs on podman by creating loopback devices. For this to work there are severeral things necessary: * mount /sys/dev/block * --privileged to mount /sys/dev/block fs. CAP SYS_ADMIN is not enough. * mount /sys/block * mount /sys/devices/virtual/block. In conjunction with mounts above this abilitates lslbk to work which is needed by ceph-volume. * created OSD loopback images are saved on loop-images by default * devices.json provides information of loopback devices currently used Signed-off-by: Pere Diaz Bou --- src/cephadm/box/box.py | 67 +++++++++--------- src/cephadm/box/osd.py | 150 +++++++++++++++++----------------------- src/cephadm/box/util.py | 7 +- 3 files changed, 100 insertions(+), 124 deletions(-) diff --git a/src/cephadm/box/box.py b/src/cephadm/box/box.py index 1e42a888797..d43d050c549 100755 --- a/src/cephadm/box/box.py +++ b/src/cephadm/box/box.py @@ -106,36 +106,47 @@ def check_selinux(): print(colored('selinux should be disabled, please disable it if you ' 'don\'t want unexpected behaviour.', Colors.WARNING)) -def setup_podman_env(hosts: int = 1): +def setup_podman_env(hosts: int = 1, osd_devs={}): network_name = 'box_network' networks = run_shell_command('podman network ls') if network_name not in networks: run_shell_command(f'podman network create -d bridge {network_name}') run_default_options = """--group-add keep-groups --device /dev/fuse -it -d \\ - --cap-add SYS_ADMIN --cap-add NET_ADMIN --cap-add SYS_TIME --cap-add SYS_RAWIO --cap-add MKNOD \\ - --cap-add NET_RAW --cap-add SETUID --cap-add SETGID --cap-add CHOWN --cap-add SYS_PTRACE \\ - --cap-add SYS_TTY_CONFIG --cap-add CAP_AUDIT_WRITE --cap-add CAP_AUDIT_CONTROL \\ - -e CEPH_BRANCH=main -v /home/peristocles/redhat/cephbare/origin/main:/ceph:z \\ + --privileged \\ + --cpus 12 \\ + -e CEPH_BRANCH=main \\ + -v /home/peristocles/redhat/cephbare/origin/main:/ceph:z \\ -v /home/peristocles/redhat/cephbare/origin/main/src/cephadm:/cephadm:z \\ - -v /run/udev:/run/udev -v /sys/dev/block:/sys/dev/block -v /sys/fs/cgroup:/sys/fs/cgroup \\ - -v /dev/fuse:/dev/fuse -v /dev/disk:/dev/disk -v /dev/mapper:/dev/mapper \\ + -v /run/udev:/run/udev \\ + -v /sys/dev/block:/sys/dev/block \\ + -v /sys/fs/cgroup:/sys/fs/cgroup \\ + -v /dev/fuse:/dev/fuse \\ + -v /dev/disk:/dev/disk \\ + -v /sys/devices/virtual/block:/sys/devices/virtual/block \\ + -v /sys/dev/block:/dev/dev/block:rshared \\ + -v /sys/block:/dev/block \\ + -v /dev/mapper:/dev/mapper \\ -v /dev/mapper/control:/dev/mapper/control \\ --stop-signal RTMIN+3 -m 20g cephadm-box \\ """ def add_option(dest, src): dest = f'{src} {dest}' return dest + for osd_dev in osd_devs.values(): + device = osd_dev["device"] + run_default_options = add_option(run_default_options, f'--device {device}:{device}') + for host in range(hosts+1): # 0 will be the seed options = run_default_options options = add_option(options, f'--name box_hosts_{host}') + options = add_option(options, f'--network {network_name}') if host == 0: options = add_option(options, f'-p 8443:8443') # dashboard options = add_option(options, f'-p 3000:3000') # grafana options = add_option(options, f'-p 9093:9093') # alertmanager options = add_option(options, f'-p 9095:9095') # prometheus - options = add_option(options, f'--network {network_name}') run_shell_command(f'podman run {options}') @@ -168,14 +179,9 @@ class Cluster(Target): def cleanup(self): cleanup_box() - def _set_cephadm_path(self): - cephadm_path = os.environ.get('CEPHADM_PATH') - os.symlink('/cephadm/cephadm.py', cephadm_path) - @ensure_inside_container def bootstrap(self): print('Running bootstrap on seed') - self._set_cephadm_path() cephadm_path = str(os.environ.get('CEPHADM_PATH')) if engine() == 'docker': @@ -272,14 +278,9 @@ class Cluster(Target): used_loop = "" if not Config.get('skip_create_loop'): - print('Adding logical volumes (block devices) in loopback device...') + print('Creating OSD devices...') used_loop = osd.create_loopback_devices(osds) print(f'Added {osds} logical volumes in a loopback device') - loop_device_arg = "" - if used_loop: - loop_device_arg = f'--device {used_loop} -v /dev/vg1:/dev/vg1:Z' - for o in range(osds): - loop_device_arg += f' --device /dev/dm-{o}' print('Starting containers') @@ -289,7 +290,7 @@ class Cluster(Target): dcflags += f' -f {Config.get("docker_v1_yaml")}' run_shell_command(f'{engine_compose()} {dcflags} up --scale hosts={hosts} -d') else: - setup_podman_env(hosts=hosts) + setup_podman_env(hosts=hosts, osd_devs=osd.load_osd_devices()) run_shell_command('sudo sysctl net.ipv4.conf.all.forwarding=1') run_shell_command('sudo iptables -P FORWARD ACCEPT') @@ -327,25 +328,22 @@ class Cluster(Target): ) run_dc_shell_command(box_bootstrap_command, 1, BoxType.SEED) - info = get_boxes_container_info() - ips = info['ips'] - hostnames = info['hostnames'] - print(ips) - host._copy_cluster_ssh_key(ips) expanded = Config.get('expanded') if expanded: - host._add_hosts(ips, hostnames) - - # TODO: add osds - if expanded and not Config.get('skip-deploy-osds'): - if engine() == 'podman': - print('osd deployment not supported in podman') - else: + info = get_boxes_container_info() + ips = info['ips'] + hostnames = info['hostnames'] + print(ips) + if hosts > 0: + host._copy_cluster_ssh_key(ips) + host._add_hosts(ips, hostnames) + if not Config.get('skip-deploy-osds'): print('Deploying osds... This could take up to minutes') - osd.deploy_osds_in_vg('vg1') + osd.deploy_osds(osds) print('Osds deployed') + dashboard_ip = 'localhost' info = get_boxes_container_info(with_seed=True) if engine() == 'docker': @@ -358,8 +356,7 @@ class Cluster(Target): @ensure_outside_container def doctor(self): - self._set_cephadm_path() - + pass @ensure_outside_container def down(self): diff --git a/src/cephadm/box/osd.py b/src/cephadm/box/osd.py index f9add66046c..bb716e13f46 100644 --- a/src/cephadm/box/osd.py +++ b/src/cephadm/box/osd.py @@ -1,5 +1,6 @@ import json import os +import time from typing import Dict from util import ( @@ -9,62 +10,47 @@ from util import ( ensure_inside_container, ensure_outside_container, get_orch_hosts, - inside_container, run_cephadm_shell_command, run_dc_shell_command, run_shell_command, - engine ) +DEVICES_FILE="./devices.json" def remove_loop_img() -> None: loop_image = Config.get('loop_img') if os.path.exists(loop_image): os.remove(loop_image) - - def create_loopback_devices(osds: int) -> None: assert osds - size = (5 * osds) + 1 - print(f'Using {size}GB of data to store osds') - # loop_dev = run_shell_command('sudo losetup -f') - loop_dev = '/dev/loop111' - run_shell_command(f'sudo rm -f {loop_dev}') - run_shell_command(f'sudo mknod -m 0777 {loop_dev} b 7 111') - - # cleanup last call cleanup() + osd_devs = dict() + for i in range(osds): + img_name = f'osd{i}' + loop_dev = create_loopback_device(img_name) + osd_devs[i] = dict(img_name=img_name, device=loop_dev) + with open(DEVICES_FILE, 'w') as dev_file: + dev_file.write(json.dumps(osd_devs)) + return osd_devs + +def create_loopback_device(img_name, size_gb=5): + loop_img_dir = Config.get('loop_img_dir') + run_shell_command(f'mkdir -p {loop_img_dir}') + loop_img = os.path.join(loop_img_dir, img_name) + run_shell_command(f'rm -f {loop_img}') + run_shell_command(f'dd if=/dev/zero of={loop_img} bs=1 count=0 seek={size_gb}G') + loop_dev = run_shell_command(f'sudo losetup -f') + if not os.path.exists(loop_dev): + dev_minor = re.match(r'\/dev\/[^\d]+(\d+)', loop_dev).groups()[0] + run_shell_command(f'sudo mknod -m777 {loop_dev} b 7 {dev_minor}') + run_shell_command(f'sudo chown {os.getuid()}:{os.getgid()} {loop_dev}') if os.path.ismount(loop_dev): os.umount(loop_dev) - - loop_devices = json.loads(run_shell_command('losetup -l -J', expect_error=True)) - for dev in loop_devices['loopdevices']: - if dev['name'] == loop_dev: - run_shell_command(f'sudo losetup -d {loop_dev}') - - if not os.path.exists('./loop-images'): - os.mkdir('loop-images') - - remove_loop_img() - - loop_image = Config.get('loop_img') - run_shell_command(f'sudo dd if=/dev/zero of={loop_image} bs=1 count=0 seek={size}G') - run_shell_command(f'sudo losetup {loop_dev} {loop_image}') - - - run_shell_command(f'sudo pvcreate {loop_dev} ') - run_shell_command(f'sudo vgcreate vg1 {loop_dev}') - - p = int(100 / osds) # FIXME: 100 osds is the maximum because of lvcreate pct (it doesn't seem to work with lots more decimals) - for i in range(osds): - run_shell_command('sudo vgchange --refresh') - run_shell_command(f'sudo lvcreate -l {p}%VG --name lv{i} vg1') - - # FIXME: use /dev/vg1/lv* links as it is less hacky (there could be unrelated dm devices) - run_shell_command(f'sudo chmod 777 /dev/dm-*') + run_shell_command(f'sudo losetup {loop_dev} {loop_img}') + run_shell_command(f'sudo chown {os.getuid()}:{os.getgid()} {loop_dev}') return loop_dev @@ -83,55 +69,51 @@ def get_lvm_osd_data(data: str) -> Dict[str, str]: osd_data[key] = line[-1] return osd_data +def load_osd_devices(): + if not os.path.exists(DEVICES_FILE): + return dict() + with open(DEVICES_FILE) as dev_file: + devs = json.loads(dev_file.read()) + return devs + @ensure_inside_container def deploy_osd(data: str, hostname: str) -> bool: - out = run_cephadm_shell_command(f'ceph orch daemon add osd "{hostname}:{data}"') + out = run_cephadm_shell_command(f'ceph orch daemon add osd {hostname}:{data} raw') return 'Created osd(s)' in out def cleanup() -> None: - vg = 'vg1' - pvs = json.loads(run_shell_command('sudo pvs --reportformat json')) - for pv in pvs['report'][0]['pv']: - if pv['vg_name'] == vg: - device = pv['pv_name'] - run_shell_command(f'sudo vgremove -f --yes {vg}') - run_shell_command(f'sudo losetup -d {device}') - run_shell_command(f'sudo wipefs -af {device}') - # FIX: this can fail with excluded filter - run_shell_command(f'sudo pvremove -f --yes {device}', expect_error=True) - break - - remove_loop_img() + loop_img_dir = Config.get('loop_img_dir') + osd_devs = load_osd_devices() + for osd in osd_devs.values(): + device = osd['device'] + if 'loop' in device: + loop_img = os.path.join(loop_img_dir, osd['img_name']) + run_shell_command(f'sudo losetup -d {device}', expect_error=True) + if os.path.exists(loop_img): + os.remove(loop_img) + run_shell_command(f'rm -rf {loop_img_dir}') -def deploy_osds_in_vg(vg: str): - """ - rotate host will deploy each osd in a different host - - deploying osds will not succeed with starting services so this - makes another process to run on the background - """ - if inside_container(): - lvs = json.loads(run_shell_command('lvs --reportformat json')) - # distribute osds per host - hosts = get_orch_hosts() - host_index = 0 - for lv in lvs['report'][0]['lv']: - if lv['vg_name'] == vg: - deployed = False - while not deployed: - deployed = deploy_osd( - f'{vg}/{lv["lv_name"]}', hosts[host_index]['hostname'] - ) - host_index = (host_index + 1) % len(hosts) - else: - verbose = '-v' if Config.get('verbose') else '' - print('Redirecting deploy osd in vg to inside container') - run_dc_shell_command( - f'/cephadm/box/box.py {verbose} --engine {engine()} osd deploy --vg {vg}', 1, BoxType.SEED - ) +def deploy_osds(count: int): + osd_devs = load_osd_devices() + hosts = get_orch_hosts() + host_index = 0 + v = '-v' if Config.get('verbose') else '' + for osd in osd_devs.values(): + deployed = False + while not deployed: + print(hosts) + hostname = hosts[host_index]['hostname'] + deployed = run_dc_shell_command( + f'/cephadm/box/box.py {v} osd deploy --data {osd["device"]} --hostname {hostname}', + 1, + BoxType.SEED + ) + deployed = 'created osd' in deployed.lower() + time.sleep(2) + host_index = (host_index + 1) % len(hosts) class Osd(Target): @@ -149,24 +131,20 @@ class Osd(Target): self.parser.add_argument('--data', type=str, help='path to a block device') self.parser.add_argument('--hostname', type=str, help='host to deploy osd') self.parser.add_argument('--osds', type=int, default=0, help='number of osds') - self.parser.add_argument( - '--vg', type=str, help='Deploy with all lv from virtual group' - ) def deploy(self): data = Config.get('data') hostname = Config.get('hostname') - vg = Config.get('vg') if not hostname: # assume this host hostname = run_shell_command('hostname') - if vg: - deploy_osds_in_vg(vg) + if not data: + deploy_osds(Config.get('osds')) else: deploy_osd(data, hostname) @ensure_outside_container def create_loop(self): osds = Config.get('osds') - create_loopback_devices(osds) - print('Successfully added logical volumes in loopback devices') + create_loopback_devices(int(osds)) + print('Successfully created loopback devices') diff --git a/src/cephadm/box/util.py b/src/cephadm/box/util.py index eb69c475917..e2d802b21ed 100644 --- a/src/cephadm/box/util.py +++ b/src/cephadm/box/util.py @@ -27,6 +27,7 @@ class Config: 'docker_yaml': 'docker-compose-docker.yml', 'docker_v1_yaml': 'docker-compose.cgroup1.yml', 'podman_yaml': 'docker-compose-podman.yml', + 'loop_img_dir': 'loop-images', } @staticmethod @@ -167,10 +168,11 @@ def run_shell_commands(commands: str, expect_error=False) -> str: def run_cephadm_shell_command(command: str, expect_error=False) -> str: config = Config.get('config') keyring = Config.get('keyring') + fsid = Config.get('fsid') with_cephadm_image = 'CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:main' out = run_shell_command( - f'{with_cephadm_image} cephadm --verbose shell --config {config} --keyring {keyring} -- {command}', + f'{with_cephadm_image} cephadm --verbose shell --fsid {fsid} --config {config} --keyring {keyring} -- {command}', expect_error, ) return out @@ -244,9 +246,8 @@ def get_orch_hosts(): if inside_container(): orch_host_ls_out = run_cephadm_shell_command('ceph orch host ls --format json') else: - orch_host_ls_out = run_dc_shell_command('cephadm shell --keyring /etc/ceph/ceph.keyring --config /etc/ceph/ceph.conf -- ceph orch host ls --format json', 1, get_seed_name()) + orch_host_ls_out = run_dc_shell_command(f'cephadm shell --keyring /etc/ceph/ceph.keyring --config /etc/ceph/ceph.conf -- ceph orch host ls --format json', 1, BoxType.SEED) sp = orch_host_ls_out.split('\n') orch_host_ls_out = sp[len(sp) - 1] - print('xd', orch_host_ls_out) hosts = json.loads(orch_host_ls_out) return hosts From e06e0ce92959a0bc60383c3850f4f9a0923df1c7 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 4 Jan 2023 10:25:28 +0100 Subject: [PATCH 4/9] cephadm/box: remove absolute paths Signed-off-by: Pere Diaz Bou --- src/cephadm/box/box.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cephadm/box/box.py b/src/cephadm/box/box.py index d43d050c549..8942e300cea 100755 --- a/src/cephadm/box/box.py +++ b/src/cephadm/box/box.py @@ -116,8 +116,8 @@ def setup_podman_env(hosts: int = 1, osd_devs={}): --privileged \\ --cpus 12 \\ -e CEPH_BRANCH=main \\ - -v /home/peristocles/redhat/cephbare/origin/main:/ceph:z \\ - -v /home/peristocles/redhat/cephbare/origin/main/src/cephadm:/cephadm:z \\ + -v ../../../:/ceph:z \\ + -v ../:/cephadm:z \\ -v /run/udev:/run/udev \\ -v /sys/dev/block:/sys/dev/block \\ -v /sys/fs/cgroup:/sys/fs/cgroup \\ From 6f40f6e6227f8407a6632b1b2e312401bdda8a75 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 4 Jan 2023 11:47:48 +0100 Subject: [PATCH 5/9] cephadm/box: add packaging python dep Signed-off-by: Pere Diaz Bou --- src/cephadm/box/docker/ceph/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cephadm/box/docker/ceph/Dockerfile b/src/cephadm/box/docker/ceph/Dockerfile index c8a8d6fec6a..b950750e9af 100644 --- a/src/cephadm/box/docker/ceph/Dockerfile +++ b/src/cephadm/box/docker/ceph/Dockerfile @@ -1,2 +1,3 @@ FROM quay.ceph.io/ceph-ci/ceph:main +RUN pip3 install packaging EXPOSE 8443 From 28ec4ae0c17fce569c4f50dd37ebafd913a95ad5 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Mon, 9 Jan 2023 11:45:55 +0100 Subject: [PATCH 6/9] cephadm/box: remove --privileged flag Without --privileged flag mounting /sys/dev/block wasn't possible. Nevertherless, when I checked the permissions of the empty /sys/dev/block inside the container, I noticed that I had permissions, therefore umount was possible. With `umount /sys/dev/block` the real fs was exposed. Signed-off-by: Pere Diaz Bou --- src/cephadm/box/DockerfilePodman | 3 ++- src/cephadm/box/box.py | 25 +++++++++++++++++++++---- src/cephadm/box/osd.py | 5 +++-- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/cephadm/box/DockerfilePodman b/src/cephadm/box/DockerfilePodman index 61db237f818..440267bc6ec 100644 --- a/src/cephadm/box/DockerfilePodman +++ b/src/cephadm/box/DockerfilePodman @@ -14,7 +14,7 @@ RUN ln -s /ceph/src/cephadm/cephadm.py $CEPHADM_PATH # NOTE: assume path of ceph # directories used by yum that are just taking # up space. RUN dnf -y update; rpm --restore shadow-utils 2>/dev/null; \ -yum -y install strace podman fuse-overlayfs --exclude container-selinux; \ +yum -y install podman fuse-overlayfs --exclude container-selinux; \ rm -rf /var/cache /var/log/dnf* /var/log/yum.* RUN dnf install which firewalld chrony procps systemd openssh openssh-server openssh-clients sshpass lvm2 -y @@ -37,6 +37,7 @@ RUN echo 'root:root' | chpasswd RUN dnf install -y adjtimex # adjtimex syscall doesn't exist in fedora 35+ therefore we have to install it manually # so chronyd works +RUN dnf install -y strace sysstat # debugging tools RUN dnf -y install hostname iproute udev ENV _CONTAINERS_USERNS_CONFIGURED="" diff --git a/src/cephadm/box/box.py b/src/cephadm/box/box.py index 8942e300cea..ee6ad168d97 100755 --- a/src/cephadm/box/box.py +++ b/src/cephadm/box/box.py @@ -113,18 +113,30 @@ def setup_podman_env(hosts: int = 1, osd_devs={}): run_shell_command(f'podman network create -d bridge {network_name}') run_default_options = """--group-add keep-groups --device /dev/fuse -it -d \\ - --privileged \\ - --cpus 12 \\ + --cap-add SYS_ADMIN \\ + --cap-add NET_ADMIN \\ + --cap-add SYS_TIME \\ + --cap-add SYS_RAWIO \\ + --cap-add MKNOD \\ + --cap-add NET_RAW \\ + --cap-add SETUID \\ + --cap-add SETGID \\ + --cap-add CHOWN \\ + --cap-add SYS_PTRACE \\ + --cap-add SYS_TTY_CONFIG \\ + --cap-add CAP_AUDIT_WRITE \\ + --cap-add CAP_AUDIT_CONTROL \\ -e CEPH_BRANCH=main \\ -v ../../../:/ceph:z \\ -v ../:/cephadm:z \\ -v /run/udev:/run/udev \\ + --tmpfs /run \\ + --tmpfs /tmp \\ -v /sys/dev/block:/sys/dev/block \\ - -v /sys/fs/cgroup:/sys/fs/cgroup \\ + -v /sys/fs/cgroup:/sys/fs/cgroup:ro \\ -v /dev/fuse:/dev/fuse \\ -v /dev/disk:/dev/disk \\ -v /sys/devices/virtual/block:/sys/devices/virtual/block \\ - -v /sys/dev/block:/dev/dev/block:rshared \\ -v /sys/block:/dev/block \\ -v /dev/mapper:/dev/mapper \\ -v /dev/mapper/control:/dev/mapper/control \\ @@ -292,6 +304,11 @@ class Cluster(Target): else: setup_podman_env(hosts=hosts, osd_devs=osd.load_osd_devices()) + # Umounting somehow brings back the contents of the host /sys/dev/block. + # On startup /sys/dev/block is empty. After umount, we can see symlinks again + # so that lsblk is able to run as expected + run_dc_shell_command('umount /sys/dev/block', 1, BoxType.SEED) + run_shell_command('sudo sysctl net.ipv4.conf.all.forwarding=1') run_shell_command('sudo iptables -P FORWARD ACCEPT') diff --git a/src/cephadm/box/osd.py b/src/cephadm/box/osd.py index bb716e13f46..6ce3a1d6fea 100644 --- a/src/cephadm/box/osd.py +++ b/src/cephadm/box/osd.py @@ -111,8 +111,9 @@ def deploy_osds(count: int): 1, BoxType.SEED ) - deployed = 'created osd' in deployed.lower() - time.sleep(2) + deployed = 'created osd' in deployed.lower() or 'already created?' in deployed.lower() + print('Waiting 5 seconds to re-run deploy osd...') + time.sleep(5) host_index = (host_index + 1) % len(hosts) From e70de13cf87c99a066867438d6b18dac93563bf2 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Mon, 9 Jan 2023 18:02:12 +0100 Subject: [PATCH 7/9] cephadm/box: add container engine class ContainerEngine is the baseclass of PodmanEngine and DockerEngine. Furthermore, a HostContainer class was added as struct of info related to a container. Signed-off-by: Pere Diaz Bou --- src/cephadm/box/box.py | 147 +++++++---------------- src/cephadm/box/host.py | 39 +++--- src/cephadm/box/osd.py | 5 +- src/cephadm/box/util.py | 258 +++++++++++++++++++++++++++++++++------- 4 files changed, 279 insertions(+), 170 deletions(-) diff --git a/src/cephadm/box/box.py b/src/cephadm/box/box.py index ee6ad168d97..fca55403c6e 100755 --- a/src/cephadm/box/box.py +++ b/src/cephadm/box/box.py @@ -16,8 +16,12 @@ from util import ( run_cephadm_shell_command, run_dc_shell_command, run_dc_shell_commands, + get_container_engine, run_shell_command, run_shell_commands, + ContainerEngine, + DockerEngine, + PodmanEngine, colored, engine, engine_compose, @@ -55,7 +59,8 @@ def image_exists(image_name: str): # extract_tag assert image_name.find(':') image_name, tag = image_name.split(':') - images = run_shell_command(f'{engine()} image ls').split('\n') + engine = get_container_engine() + images = engine.run('image ls').split('\n') IMAGE_NAME = 0 TAG = 1 for image in images: @@ -69,25 +74,24 @@ def image_exists(image_name: str): def get_ceph_image(): print('Getting ceph image') - run_shell_command(f'{engine()} pull {CEPH_IMAGE}') + engine = get_container_engine() + engine.run('pull {CEPH_IMAGE}') # update - run_shell_command(f'{engine()} build -t {CEPH_IMAGE} docker/ceph') + engine.run('build -t {CEPH_IMAGE} docker/ceph') if not os.path.exists('docker/ceph/image'): os.mkdir('docker/ceph/image') remove_ceph_image_tar() - run_shell_command(f'{engine()} save {CEPH_IMAGE} -o {CEPH_IMAGE_TAR}') + engine.run('save {CEPH_IMAGE} -o {CEPH_IMAGE_TAR}') run_shell_command(f'chmod 777 {CEPH_IMAGE_TAR}') print('Ceph image added') def get_box_image(): print('Getting box image') - if engine() == 'docker': - run_shell_command(f'{engine()} build -t cephadm-box -f DockerfileDocker .') - else: - run_shell_command(f'{engine()} build -t cephadm-box -f DockerfilePodman .') + engine = get_container_engine() + engine.run(f'build -t cephadm-box -f {engine.dockerfile} .') print('Box image added') def check_dashboard(): @@ -106,65 +110,9 @@ def check_selinux(): print(colored('selinux should be disabled, please disable it if you ' 'don\'t want unexpected behaviour.', Colors.WARNING)) -def setup_podman_env(hosts: int = 1, osd_devs={}): - network_name = 'box_network' - networks = run_shell_command('podman network ls') - if network_name not in networks: - run_shell_command(f'podman network create -d bridge {network_name}') - - run_default_options = """--group-add keep-groups --device /dev/fuse -it -d \\ - --cap-add SYS_ADMIN \\ - --cap-add NET_ADMIN \\ - --cap-add SYS_TIME \\ - --cap-add SYS_RAWIO \\ - --cap-add MKNOD \\ - --cap-add NET_RAW \\ - --cap-add SETUID \\ - --cap-add SETGID \\ - --cap-add CHOWN \\ - --cap-add SYS_PTRACE \\ - --cap-add SYS_TTY_CONFIG \\ - --cap-add CAP_AUDIT_WRITE \\ - --cap-add CAP_AUDIT_CONTROL \\ - -e CEPH_BRANCH=main \\ - -v ../../../:/ceph:z \\ - -v ../:/cephadm:z \\ - -v /run/udev:/run/udev \\ - --tmpfs /run \\ - --tmpfs /tmp \\ - -v /sys/dev/block:/sys/dev/block \\ - -v /sys/fs/cgroup:/sys/fs/cgroup:ro \\ - -v /dev/fuse:/dev/fuse \\ - -v /dev/disk:/dev/disk \\ - -v /sys/devices/virtual/block:/sys/devices/virtual/block \\ - -v /sys/block:/dev/block \\ - -v /dev/mapper:/dev/mapper \\ - -v /dev/mapper/control:/dev/mapper/control \\ - --stop-signal RTMIN+3 -m 20g cephadm-box \\ - """ - def add_option(dest, src): - dest = f'{src} {dest}' - return dest - for osd_dev in osd_devs.values(): - device = osd_dev["device"] - run_default_options = add_option(run_default_options, f'--device {device}:{device}') - - - for host in range(hosts+1): # 0 will be the seed - options = run_default_options - options = add_option(options, f'--name box_hosts_{host}') - options = add_option(options, f'--network {network_name}') - if host == 0: - options = add_option(options, f'-p 8443:8443') # dashboard - options = add_option(options, f'-p 3000:3000') # grafana - options = add_option(options, f'-p 9093:9093') # alertmanager - options = add_option(options, f'-p 9095:9095') # prometheus - - run_shell_command(f'podman run {options}') - class Cluster(Target): _help = 'Manage docker cephadm boxes' - actions = ['bootstrap', 'start', 'down', 'list', 'sh', 'setup', 'cleanup', 'doctor'] + actions = ['bootstrap', 'start', 'down', 'list', 'sh', 'setup', 'cleanup'] def set_args(self): self.parser.add_argument( @@ -196,16 +144,13 @@ class Cluster(Target): print('Running bootstrap on seed') cephadm_path = str(os.environ.get('CEPHADM_PATH')) - if engine() == 'docker': - # restart to ensure docker is using daemon.json - run_shell_command( - 'systemctl restart docker' - ) - + engine = get_container_engine() + if isinstance(engine, DockerEngine): + engine.restart() st = os.stat(cephadm_path) os.chmod(cephadm_path, st.st_mode | stat.S_IEXEC) - run_shell_command(f'{engine()} load < /cephadm/box/docker/ceph/image/quay.ceph.image.tar') + engine.run('load < /cephadm/box/docker/ceph/image/quay.ceph.image.tar') # cephadm guid error because it sometimes tries to use quay.ceph.io/ceph-ci/ceph: # instead of main branch's tag run_shell_command('export CEPH_SOURCE_FOLDER=/ceph') @@ -254,7 +199,7 @@ class Cluster(Target): ) print('Running cephadm bootstrap...') - run_shell_command(cephadm_bootstrap_command) + run_shell_command(cephadm_bootstrap_command, expect_exit_code=120) print('Cephadm bootstrap complete') run_shell_command('sudo vgchange --refresh') @@ -271,12 +216,13 @@ class Cluster(Target): check_selinux() osds = int(Config.get('osds')) hosts = int(Config.get('hosts')) + engine = get_container_engine() # ensure boxes don't exist self.down() # podman is ran without sudo - if engine() == 'podman': + if isinstance(engine, PodmanEngine): I_am = run_shell_command('whoami') if 'root' in I_am: print(root_error_msg) @@ -296,18 +242,14 @@ class Cluster(Target): print('Starting containers') - if engine() == 'docker': - dcflags = f'-f {Config.get("docker_yaml")}' - if not os.path.exists('/sys/fs/cgroup/cgroup.controllers'): - dcflags += f' -f {Config.get("docker_v1_yaml")}' - run_shell_command(f'{engine_compose()} {dcflags} up --scale hosts={hosts} -d') - else: - setup_podman_env(hosts=hosts, osd_devs=osd.load_osd_devices()) + engine.up(hosts) + containers = engine.get_containers() + seed = engine.get_seed() # Umounting somehow brings back the contents of the host /sys/dev/block. # On startup /sys/dev/block is empty. After umount, we can see symlinks again # so that lsblk is able to run as expected - run_dc_shell_command('umount /sys/dev/block', 1, BoxType.SEED) + run_dc_shell_command('umount /sys/dev/block', seed) run_shell_command('sudo sysctl net.ipv4.conf.all.forwarding=1') run_shell_command('sudo iptables -P FORWARD ACCEPT') @@ -319,15 +261,15 @@ class Cluster(Target): systemctl start chronyd systemctl status --no-pager chronyd """ - for h in range(hosts): - run_dc_shell_commands(h + 1, BoxType.HOST, chronyd_setup) - run_dc_shell_commands(1, BoxType.SEED, chronyd_setup) + for container in containers: + print(colored('Got container:', Colors.OKCYAN), str(container)) + for container in containers: + run_dc_shell_commands(chronyd_setup, container) print('Seting up host ssh servers') - for h in range(hosts): - host._setup_ssh(BoxType.HOST, h + 1) - - host._setup_ssh(BoxType.SEED, 1) + for container in containers: + print(colored('Setting up ssh server for:', Colors.OKCYAN), str(container)) + host._setup_ssh(container) verbose = '-v' if Config.get('verbose') else '' skip_deploy = '--skip-deploy-osds' if Config.get('skip-deploy-osds') else '' @@ -336,15 +278,15 @@ class Cluster(Target): ) skip_dashboard = '--skip-dashboard' if Config.get('skip-dashboard') else '' box_bootstrap_command = ( - f'/cephadm/box/box.py {verbose} --engine {engine()} cluster bootstrap ' + f'/cephadm/box/box.py {verbose} --engine {engine.command} cluster bootstrap ' f'--osds {osds} ' f'--hosts {hosts} ' f'{skip_deploy} ' f'{skip_dashboard} ' f'{skip_monitoring_stack} ' ) - run_dc_shell_command(box_bootstrap_command, 1, BoxType.SEED) - + print(box_bootstrap_command) + run_dc_shell_command(box_bootstrap_command, seed) expanded = Config.get('expanded') if expanded: @@ -363,7 +305,7 @@ class Cluster(Target): dashboard_ip = 'localhost' info = get_boxes_container_info(with_seed=True) - if engine() == 'docker': + if isinstance(engine, DockerEngine): for i in range(info['size']): if get_seed_name() in info['container_names'][i]: dashboard_ip = info["ips"][i] @@ -371,25 +313,22 @@ class Cluster(Target): print('Bootstrap finished successfully') - @ensure_outside_container - def doctor(self): - pass - @ensure_outside_container def down(self): - if engine() == 'podman': - containers = json.loads(run_shell_command('podman container ls --format json')) + engine = get_container_engine() + if isinstance(engine, PodmanEngine): + containers = json.loads(engine.run('container ls --format json')) for container in containers: for name in container['Names']: if name.startswith('box_hosts_'): - run_shell_command(f'podman container kill {name}') - run_shell_command(f'podman container rm {name}') - pods = json.loads(run_shell_command('podman pod ls --format json')) + engine.run(f'container kill {name}') + engine.run(f'container rm {name}') + pods = json.loads(engine.run('pod ls --format json')) for pod in pods: if 'Name' in pod and pod['Name'].startswith('box_pod_host'): name = pod['Name'] - run_shell_command(f'podman pod kill {name}') - run_shell_command(f'podman pod rm {name}') + engine.run(f'pod kill {name}') + engine.run(f'pod rm {name}') else: run_shell_command(f'{engine_compose()} -f {Config.get("docker_yaml")} down') print('Successfully killed all boxes') diff --git a/src/cephadm/box/host.py b/src/cephadm/box/host.py index cb663a61d95..aae16d07f45 100644 --- a/src/cephadm/box/host.py +++ b/src/cephadm/box/host.py @@ -3,8 +3,10 @@ from typing import List, Union from util import ( Config, + HostContainer, Target, get_boxes_container_info, + get_container_engine, inside_container, run_cephadm_shell_command, run_dc_shell_command, @@ -14,10 +16,11 @@ from util import ( ) -def _setup_ssh(container_type: BoxType, container_index): +def _setup_ssh(container: HostContainer): if inside_container(): if not os.path.exists('/root/.ssh/known_hosts'): - run_shell_command('ssh-keygen -b 2048 -t rsa -f /root/.ssh/id_rsa -q -N ""') + run_shell_command('echo "y" | ssh-keygen -b 2048 -t rsa -f /root/.ssh/id_rsa -q -N ""', + expect_error=True) run_shell_command('echo "root:root" | chpasswd') with open('/etc/ssh/sshd_config', 'a+') as f: @@ -29,9 +32,8 @@ def _setup_ssh(container_type: BoxType, container_index): print('Redirecting to _setup_ssh to container') verbose = '-v' if Config.get('verbose') else '' run_dc_shell_command( - f'/cephadm/box/box.py {verbose} --engine {engine()} host setup_ssh {BoxType.to_string(container_type)} {container_index}', - container_index, - container_type, + f'/cephadm/box/box.py {verbose} --engine {engine()} host setup_ssh {container.name}', + container ) @@ -48,11 +50,11 @@ def _add_hosts(ips: Union[List[str], str], hostnames: Union[List[str], str]): ips = f'{ips}' hostnames = ' '.join(hostnames) hostnames = f'{hostnames}' + seed = get_container_engine().get_seed() run_dc_shell_command( - f'/cephadm/box/box.py {verbose} --engine {engine()} host add_hosts seed {BoxType.to_string(BoxType.SEED)} --ips {ips} --hostnames {hostnames}', - 1, - BoxType.SEED, - ) + f'/cephadm/box/box.py {verbose} --engine {engine()} host add_hosts {seed.name} --ips {ips} --hostnames {hostnames}', + seed + ) def _copy_cluster_ssh_key(ips: Union[List[str], str]): @@ -74,10 +76,10 @@ def _copy_cluster_ssh_key(ips: Union[List[str], str]): ips = ' '.join(ips) ips = f'{ips}' # assume we only have one seed + seed = get_container_engine().get_seed() run_dc_shell_command( - f'/cephadm/box/box.py {verbose} --engine {engine()} host copy_cluster_ssh_key {BoxType.to_string(BoxType.SEED)} 1 --ips {ips}', - 1, - BoxType.SEED, + f'/cephadm/box/box.py {verbose} --engine {engine()} host copy_cluster_ssh_key {seed.name} --ips {ips}', + seed ) @@ -88,10 +90,9 @@ class Host(Target): def set_args(self): self.parser.add_argument('action', choices=Host.actions) self.parser.add_argument( - 'container_type', type=str, help='box_{type}_{index}' - ) - self.parser.add_argument( - 'container_index', type=str, help='box_{type}_{index}' + 'container_name', + type=str, + help='box_{type}_{index}. In docker, type can be seed or hosts. In podman only hosts.' ) self.parser.add_argument('--ips', nargs='*', help='List of host ips') self.parser.add_argument( @@ -99,9 +100,9 @@ class Host(Target): ) def setup_ssh(self): - type_ = Config.get('container_type') - index = Config.get('container_index') - _setup_ssh(type_, index) + container_name = Config.get('container_name') + engine = get_container_engine() + _setup_ssh(engine.get_container(container_name)) def add_hosts(self): ips = Config.get('ips') diff --git a/src/cephadm/box/osd.py b/src/cephadm/box/osd.py index 6ce3a1d6fea..b57af42434a 100644 --- a/src/cephadm/box/osd.py +++ b/src/cephadm/box/osd.py @@ -12,6 +12,7 @@ from util import ( get_orch_hosts, run_cephadm_shell_command, run_dc_shell_command, + get_container_engine, run_shell_command, ) @@ -100,6 +101,7 @@ def deploy_osds(count: int): osd_devs = load_osd_devices() hosts = get_orch_hosts() host_index = 0 + seed = get_container_engine().get_seed() v = '-v' if Config.get('verbose') else '' for osd in osd_devs.values(): deployed = False @@ -108,8 +110,7 @@ def deploy_osds(count: int): hostname = hosts[host_index]['hostname'] deployed = run_dc_shell_command( f'/cephadm/box/box.py {v} osd deploy --data {osd["device"]} --hostname {hostname}', - 1, - BoxType.SEED + seed ) deployed = 'created osd' in deployed.lower() or 'already created?' in deployed.lower() print('Waiting 5 seconds to re-run deploy osd...') diff --git a/src/cephadm/box/util.py b/src/cephadm/box/util.py index e2d802b21ed..7dcf883f8a3 100644 --- a/src/cephadm/box/util.py +++ b/src/cephadm/box/util.py @@ -2,8 +2,10 @@ import json import os import subprocess import sys -import enum -from typing import Any, Callable, Dict +import copy +from abc import ABCMeta, abstractmethod +from enum import Enum +from typing import Any, Callable, Dict, List class Colors: HEADER = '\033[95m' @@ -92,7 +94,26 @@ def ensure_inside_container(func) -> bool: def colored(msg, color: Colors): return color + msg + Colors.ENDC -def run_shell_command(command: str, expect_error=False, verbose=True) -> str: +class BoxType(str, Enum): + SEED = 'seed' + HOST = 'host' + +class HostContainer: + def __init__(self, _name, _type) -> None: + self._name: str = _name + self._type: BoxType = _type + + @property + def name(self) -> str: + return self._name + + @property + def type(self) -> BoxType: + return self._type + def __str__(self) -> str: + return f'{self.name} {self.type}' + +def run_shell_command(command: str, expect_error=False, verbose=True, expect_exit_code=0) -> str: if Config.get('verbose'): print(f'{colored("Running command", Colors.HEADER)}: {colored(command, Colors.OKBLUE)}') @@ -119,43 +140,20 @@ def run_shell_command(command: str, expect_error=False, verbose=True) -> str: err += process.stderr.read().decode('latin1').strip() out = out.strip() - if process.returncode != 0 and not expect_error: + if process.returncode != 0 and not expect_error and process.returncode != expect_exit_code: err = colored(err, Colors.FAIL); - raise RuntimeError(f'Failed command: {command}\n{err}') + + raise RuntimeError(f'Failed command: {command}\n{err}\nexit code: {process.returncode}') sys.exit(1) return out -class BoxType(enum.IntEnum): - SEED = 0 # where we bootstrap cephadm - HOST = 1 - @staticmethod - def to_enum(value: str): - if value == 'seed': - return BoxType.SEED - elif value == 'host': - return BoxType.HOST - else: - print(f'Wrong container type {value}') - sys.exit(1) - - @staticmethod - def to_string(box_type): - if box_type == BoxType.SEED: - return 'seed' - elif box_type == BoxType.HOST: - return 'host' - else: - print(f'Wrong container type {type_}') - sys.exit(1) - - -def run_dc_shell_commands(index, box_type: BoxType, commands: str, expect_error=False) -> str: +def run_dc_shell_commands(commands: str, container: HostContainer, expect_error=False) -> str: for command in commands.split('\n'): command = command.strip() if not command: continue - run_dc_shell_command(command.strip(), index, box_type, expect_error=expect_error) + run_dc_shell_command(command.strip(), container, expect_error=expect_error) def run_shell_commands(commands: str, expect_error=False) -> str: for command in commands.split('\n'): @@ -179,20 +177,9 @@ def run_cephadm_shell_command(command: str, expect_error=False) -> str: def run_dc_shell_command( - command: str, index: int, box_type: BoxType, expect_error=False + command: str, container: HostContainer, expect_error=False ) -> str: - box_type_str = 'box_hosts' - if box_type == BoxType.SEED: - index = 0 - if engine() == 'docker': - box_type_str = 'seed' - index = 1 - - container_id = get_container_id(f'{box_type_str}_{index}') - print(container_id) - out = run_shell_command( - f'{engine()} exec -it {container_id} {command}', expect_error - ) + out = get_container_engine().run_exec(container, command, expect_error=expect_error) return out def inside_container() -> bool: @@ -246,8 +233,189 @@ def get_orch_hosts(): if inside_container(): orch_host_ls_out = run_cephadm_shell_command('ceph orch host ls --format json') else: - orch_host_ls_out = run_dc_shell_command(f'cephadm shell --keyring /etc/ceph/ceph.keyring --config /etc/ceph/ceph.conf -- ceph orch host ls --format json', 1, BoxType.SEED) + orch_host_ls_out = run_dc_shell_command(f'cephadm shell --keyring /etc/ceph/ceph.keyring --config /etc/ceph/ceph.conf -- ceph orch host ls --format json', + get_container_engine().get_seed()) sp = orch_host_ls_out.split('\n') orch_host_ls_out = sp[len(sp) - 1] hosts = json.loads(orch_host_ls_out) return hosts + + +class ContainerEngine(metaclass=ABCMeta): + @property + @abstractmethod + def command(self) -> str: pass + + @property + @abstractmethod + def seed_name(self) -> str: pass + + @property + @abstractmethod + def dockerfile(self) -> str: pass + + @property + def host_name_prefix(self) -> str: + return 'box_hosts_' + + @abstractmethod + def up(self, hosts: int): pass + + def run_exec(self, container: HostContainer, command: str, expect_error: bool = False): + return run_shell_command(' '.join([self.command, 'exec', container.name, command]), + expect_error=expect_error) + + def run(self, engine_command: str, expect_error: bool = False): + return run_shell_command(' '.join([self.command, engine_command]), expect_error=expect_error) + + def get_containers(self) -> List[HostContainer]: + ps_out = json.loads(run_shell_command('podman ps --format json')) + containers = [] + for container in ps_out: + if not container['Names']: + raise RuntimeError(f'Container {container} missing name') + name = container['Names'][0] + if name == self.seed_name: + containers.append(HostContainer(name, BoxType.SEED)) + elif name.startswith(self.host_name_prefix): + containers.append(HostContainer(name, BoxType.HOST)) + return containers + + def get_seed(self) -> HostContainer: + for container in self.get_containers(): + if container.type == BoxType.SEED: + return container + raise RuntimeError('Missing seed container') + + def get_container(self, container_name: str): + containers = self.get_containers() + for container in containers: + if container.name == container_name: + return container + return None + + + def restart(self): + pass + + +class DockerEngine(ContainerEngine): + command = 'docker' + seed_name = 'seed' + dockerfile = 'DockerfileDocker' + + def restart(self): + run_shell_command('systemctl restart docker') + + def up(self, hosts: int): + dcflags = f'-f {Config.get("docker_yaml")}' + if not os.path.exists('/sys/fs/cgroup/cgroup.controllers'): + dcflags += f' -f {Config.get("docker_v1_yaml")}' + run_shell_command(f'{engine_compose()} {dcflags} up --scale hosts={hosts} -d') + +class PodmanEngine(ContainerEngine): + command = 'podman' + seed_name = 'box_hosts_0' + dockerfile = 'DockerfilePodman' + + CAPS = [ + "SYS_ADMIN", + "NET_ADMIN", + "SYS_TIME", + "SYS_RAWIO", + "MKNOD", + "NET_RAW", + "SETUID", + "SETGID", + "CHOWN", + "SYS_PTRACE", + "SYS_TTY_CONFIG", + "CAP_AUDIT_WRITE", + "CAP_AUDIT_CONTROL", + ] + + VOLUMES = [ + '../../../:/ceph:z', + '../:/cephadm:z', + '/run/udev:/run/udev', + '/sys/dev/block:/sys/dev/block', + '/sys/fs/cgroup:/sys/fs/cgroup:ro', + '/dev/fuse:/dev/fuse', + '/dev/disk:/dev/disk', + '/sys/devices/virtual/block:/sys/devices/virtual/block', + '/sys/block:/dev/block', + '/dev/mapper:/dev/mapper', + '/dev/mapper/control:/dev/mapper/control', + ] + + TMPFS = ['/run', '/tmp'] + + # FIXME: right now we are assuming every service will be exposed through the seed, but this is far + # from the truth. Services can be deployed on different hosts so we need a system to manage this. + SEED_PORTS = [ + 8443, # dashboard + 3000, # grafana + 9093, # alertmanager + 9095 # prometheus + ] + + + def setup_podman_env(self, hosts: int = 1, osd_devs={}): + network_name = 'box_network' + networks = run_shell_command('podman network ls') + if network_name not in networks: + run_shell_command(f'podman network create -d bridge {network_name}') + + args = [ + '--group-add', 'keep-groups', + '--device', '/dev/fuse' , + '-it' , + '-d', + '-e', 'CEPH_BRANCH=main', + '--stop-signal', 'RTMIN+3' + ] + + for cap in self.CAPS: + args.append('--cap-add') + args.append(cap) + + for volume in self.VOLUMES: + args.append('-v') + args.append(volume) + + for tmp in self.TMPFS: + args.append('--tmpfs') + args.append(tmp) + + + for osd_dev in osd_devs.values(): + device = osd_dev["device"] + args.append('--device') + args.append(f'{device}:{device}') + + + for host in range(hosts+1): # 0 will be the seed + options = copy.copy(args) + options.append('--name') + options.append(f'box_hosts_{host}') + options.append('--network') + options.append(f'{network_name}') + if host == 0: + for port in self.SEED_PORTS: + options.append('-p') + options.append(f'{port}:{port}') + + options.append('cephadm-box') + options = ' '.join(options) + + run_shell_command(f'podman run {options}') + + def up(self, hosts: int): + import osd + self.setup_podman_env(hosts=hosts, osd_devs=osd.load_osd_devices()) + +def get_container_engine() -> ContainerEngine: + if engine() == 'docker': + return DockerEngine() + else: + return PodmanEngine() From 63ae933a4bcec1caa81e615aa25d706bc2281dc6 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Mon, 9 Jan 2023 18:36:54 +0100 Subject: [PATCH 8/9] cephadm/box: setup dashboard and multiprocess setup Signed-off-by: Pere Diaz Bou --- src/cephadm/box/box.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/src/cephadm/box/box.py b/src/cephadm/box/box.py index fca55403c6e..b485bc16aab 100755 --- a/src/cephadm/box/box.py +++ b/src/cephadm/box/box.py @@ -6,6 +6,7 @@ import json import sys import host import osd +from multiprocessing import Process, Pool from util import ( BoxType, Config, @@ -75,15 +76,15 @@ def image_exists(image_name: str): def get_ceph_image(): print('Getting ceph image') engine = get_container_engine() - engine.run('pull {CEPH_IMAGE}') + engine.run(f'pull {CEPH_IMAGE}') # update - engine.run('build -t {CEPH_IMAGE} docker/ceph') + engine.run(f'build -t {CEPH_IMAGE} docker/ceph') if not os.path.exists('docker/ceph/image'): os.mkdir('docker/ceph/image') remove_ceph_image_tar() - engine.run('save {CEPH_IMAGE} -o {CEPH_IMAGE_TAR}') + engine.run(f'save {CEPH_IMAGE} -o {CEPH_IMAGE_TAR}') run_shell_command(f'chmod 777 {CEPH_IMAGE_TAR}') print('Ceph image added') @@ -109,6 +110,11 @@ def check_selinux(): if 'Disabled' not in selinux: print(colored('selinux should be disabled, please disable it if you ' 'don\'t want unexpected behaviour.', Colors.WARNING)) +def dashboard_setup(): + command = f'cd {DASHBOARD_PATH} && npm install' + run_shell_command(command) + command = f'cd {DASHBOARD_PATH} && npm run build' + run_shell_command(command) class Cluster(Target): _help = 'Manage docker cephadm boxes' @@ -126,14 +132,31 @@ class Cluster(Target): self.parser.add_argument('--skip-monitoring-stack', action='store_true', help='skip monitoring stack') self.parser.add_argument('--skip-dashboard', action='store_true', help='skip dashboard') self.parser.add_argument('--expanded', action='store_true', help='deploy 3 hosts and 3 osds') + self.parser.add_argument('--jobs', type=int, help='Number of jobs scheduled in parallel') @ensure_outside_container def setup(self): check_cgroups() check_selinux() - get_ceph_image() - get_box_image() + targets = [ + get_ceph_image, + get_box_image, + dashboard_setup + ] + results = [] + jobs = Config.get('jobs') + if jobs: + jobs = int(jobs) + else: + jobs = None + pool = Pool(jobs) + for target in targets: + results.append(pool.apply_async(target)) + + for result in results: + result.wait() + @ensure_outside_container def cleanup(self): From 0a07a82f7061a4c0f18deb1160668586e43cba53 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 19 Jan 2023 10:12:09 +0100 Subject: [PATCH 9/9] cephadm/box: minor fixes Signed-off-by: Pere Diaz Bou --- doc/dev/cephadm/developing-cephadm.rst | 3 ++- src/cephadm/box/DockerfilePodman | 1 + src/cephadm/box/box.py | 9 +++++---- src/cephadm/box/osd.py | 15 ++++++++++----- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/doc/dev/cephadm/developing-cephadm.rst b/doc/dev/cephadm/developing-cephadm.rst index d4da4066520..fe6abf4ee31 100644 --- a/doc/dev/cephadm/developing-cephadm.rst +++ b/doc/dev/cephadm/developing-cephadm.rst @@ -289,8 +289,9 @@ of the cluster. After bootstraping the cluster you can go inside the seed box in which you'll be able to run Cephadm commands:: - ./box.py -v cluster sh + ./box.py -v cluster bash [root@8d52a7860245] cephadm --help + [root@8d52a7860245] cephadm shell ... diff --git a/src/cephadm/box/DockerfilePodman b/src/cephadm/box/DockerfilePodman index 440267bc6ec..115c3c730fa 100644 --- a/src/cephadm/box/DockerfilePodman +++ b/src/cephadm/box/DockerfilePodman @@ -10,6 +10,7 @@ FROM fedora:34 ENV CEPHADM_PATH=/usr/local/sbin/cephadm RUN ln -s /ceph/src/cephadm/cephadm.py $CEPHADM_PATH # NOTE: assume path of ceph volume + # Don't include container-selinux and remove # directories used by yum that are just taking # up space. diff --git a/src/cephadm/box/box.py b/src/cephadm/box/box.py index b485bc16aab..db2f2423351 100755 --- a/src/cephadm/box/box.py +++ b/src/cephadm/box/box.py @@ -52,7 +52,7 @@ def remove_ceph_image_tar(): def cleanup_box() -> None: - osd.cleanup() + osd.cleanup_osds() remove_ceph_image_tar() @@ -118,7 +118,7 @@ def dashboard_setup(): class Cluster(Target): _help = 'Manage docker cephadm boxes' - actions = ['bootstrap', 'start', 'down', 'list', 'sh', 'setup', 'cleanup'] + actions = ['bootstrap', 'start', 'down', 'list', 'bash', 'setup', 'cleanup'] def set_args(self): self.parser.add_argument( @@ -366,11 +366,12 @@ class Cluster(Target): print(f'{name} \t{ip} \t{hostname}') @ensure_outside_container - def sh(self): + def bash(self): # we need verbose to see the prompt after running shell command Config.set('verbose', True) print('Seed bash') - run_shell_command(f'{engine_compose()} -f {Config.get("docker_yaml")} exec {get_seed_name()} bash') + engine = get_container_engine() + engine.run(f'exec -it {engine.seed_name} bash') targets = { diff --git a/src/cephadm/box/osd.py b/src/cephadm/box/osd.py index b57af42434a..827a4de36c0 100644 --- a/src/cephadm/box/osd.py +++ b/src/cephadm/box/osd.py @@ -1,6 +1,7 @@ import json import os import time +import re from typing import Dict from util import ( @@ -23,10 +24,9 @@ def remove_loop_img() -> None: if os.path.exists(loop_image): os.remove(loop_image) -def create_loopback_devices(osds: int) -> None: - +def create_loopback_devices(osds: int) -> Dict[int, Dict[str, str]]: assert osds - cleanup() + cleanup_osds() osd_devs = dict() for i in range(osds): @@ -84,7 +84,7 @@ def deploy_osd(data: str, hostname: str) -> bool: return 'Created osd(s)' in out -def cleanup() -> None: +def cleanup_osds() -> None: loop_img_dir = Config.get('loop_img_dir') osd_devs = load_osd_devices() for osd in osd_devs.values(): @@ -125,8 +125,9 @@ class Osd(Target): - deploy: Deploy an osd given a block device - create_loop: Create needed loopback devices and block devices in logical volumes for a number of osds. + - destroy: Remove all osds and the underlying loopback devices. """ - actions = ['deploy', 'create_loop'] + actions = ['deploy', 'create_loop', 'destroy'] def set_args(self): self.parser.add_argument('action', choices=Osd.actions) @@ -150,3 +151,7 @@ class Osd(Target): osds = Config.get('osds') create_loopback_devices(int(osds)) print('Successfully created loopback devices') + + @ensure_outside_container + def destroy(self): + cleanup_osds()