Merge pull request #49627 from rhcs-dashboard/box-podman-osd

cephadm/box: create osds with podman.

Reviewed-by: Ernesto Puerta <epuertat@redhat.com>
Reviewed-by: Juan Miguel Olmo <jolmomar@redhat.com>
Reviewed-by: Nizamudeen A <nia@redhat.com>
This commit is contained in:
Pere Diaz Bou 2023-02-08 11:10:51 +01:00 committed by GitHub
commit 5cb32e8b15
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 456 additions and 282 deletions

View File

@ -289,8 +289,9 @@ of the cluster.
After bootstraping the cluster you can go inside the seed box in which you'll be
able to run Cephadm commands::
./box.py -v cluster sh
./box.py -v cluster bash
[root@8d52a7860245] cephadm --help
[root@8d52a7860245] cephadm shell
...

View File

@ -30,4 +30,4 @@ EXPOSE 22
FROM centos-systemd-docker
WORKDIR /root
CMD [ "/usr/sbin/init" ]
CMD [ "/usr/sbin/init" ]

View File

@ -9,11 +9,13 @@
FROM fedora:34
ENV CEPHADM_PATH=/usr/local/sbin/cephadm
RUN ln -s /ceph/src/cephadm/cephadm.py $CEPHADM_PATH # NOTE: assume path of ceph volume
# Don't include container-selinux and remove
# directories used by yum that are just taking
# up space.
RUN dnf -y update; rpm --restore shadow-utils 2>/dev/null; \
yum -y install strace podman fuse-overlayfs --exclude container-selinux; \
yum -y install podman fuse-overlayfs --exclude container-selinux; \
rm -rf /var/cache /var/log/dnf* /var/log/yum.*
RUN dnf install which firewalld chrony procps systemd openssh openssh-server openssh-clients sshpass lvm2 -y
@ -36,6 +38,7 @@ RUN echo 'root:root' | chpasswd
RUN dnf install -y adjtimex # adjtimex syscall doesn't exist in fedora 35+ therefore we have to install it manually
# so chronyd works
RUN dnf install -y strace sysstat # debugging tools
RUN dnf -y install hostname iproute udev
ENV _CONTAINERS_USERNS_CONFIGURED=""

View File

@ -2,10 +2,13 @@
import argparse
import os
import stat
import json
import sys
import host
import osd
from multiprocessing import Process, Pool
from util import (
BoxType,
Config,
Target,
ensure_inside_container,
@ -14,12 +17,17 @@ from util import (
run_cephadm_shell_command,
run_dc_shell_command,
run_dc_shell_commands,
get_container_engine,
run_shell_command,
run_shell_commands,
ContainerEngine,
DockerEngine,
PodmanEngine,
colored,
engine,
engine_compose,
Colors
Colors,
get_seed_name
)
CEPH_IMAGE = 'quay.ceph.io/ceph-ci/ceph:main'
@ -44,7 +52,7 @@ def remove_ceph_image_tar():
def cleanup_box() -> None:
osd.cleanup()
osd.cleanup_osds()
remove_ceph_image_tar()
@ -52,7 +60,8 @@ def image_exists(image_name: str):
# extract_tag
assert image_name.find(':')
image_name, tag = image_name.split(':')
images = run_shell_command(f'{engine()} image ls').split('\n')
engine = get_container_engine()
images = engine.run('image ls').split('\n')
IMAGE_NAME = 0
TAG = 1
for image in images:
@ -66,25 +75,24 @@ def image_exists(image_name: str):
def get_ceph_image():
print('Getting ceph image')
run_shell_command(f'{engine()} pull {CEPH_IMAGE}')
engine = get_container_engine()
engine.run(f'pull {CEPH_IMAGE}')
# update
run_shell_command(f'{engine()} build -t {CEPH_IMAGE} docker/ceph')
engine.run(f'build -t {CEPH_IMAGE} docker/ceph')
if not os.path.exists('docker/ceph/image'):
os.mkdir('docker/ceph/image')
remove_ceph_image_tar()
run_shell_command(f'{engine()} save {CEPH_IMAGE} -o {CEPH_IMAGE_TAR}')
engine.run(f'save {CEPH_IMAGE} -o {CEPH_IMAGE_TAR}')
run_shell_command(f'chmod 777 {CEPH_IMAGE_TAR}')
print('Ceph image added')
def get_box_image():
print('Getting box image')
if engine() == 'docker':
run_shell_command(f'{engine()} build -t cephadm-box -f DockerfileDocker .')
else:
run_shell_command(f'{engine()} build -t cephadm-box -f DockerfilePodman .')
engine = get_container_engine()
engine.run(f'build -t cephadm-box -f {engine.dockerfile} .')
print('Box image added')
def check_dashboard():
@ -102,11 +110,15 @@ def check_selinux():
if 'Disabled' not in selinux:
print(colored('selinux should be disabled, please disable it if you '
'don\'t want unexpected behaviour.', Colors.WARNING))
def dashboard_setup():
command = f'cd {DASHBOARD_PATH} && npm install'
run_shell_command(command)
command = f'cd {DASHBOARD_PATH} && npm run build'
run_shell_command(command)
class Cluster(Target):
_help = 'Manage docker cephadm boxes'
actions = ['bootstrap', 'start', 'down', 'list', 'sh', 'setup', 'cleanup']
actions = ['bootstrap', 'start', 'down', 'list', 'bash', 'setup', 'cleanup']
def set_args(self):
self.parser.add_argument(
@ -120,17 +132,31 @@ class Cluster(Target):
self.parser.add_argument('--skip-monitoring-stack', action='store_true', help='skip monitoring stack')
self.parser.add_argument('--skip-dashboard', action='store_true', help='skip dashboard')
self.parser.add_argument('--expanded', action='store_true', help='deploy 3 hosts and 3 osds')
self.parser.add_argument('--jobs', type=int, help='Number of jobs scheduled in parallel')
@ensure_outside_container
def setup(self):
if engine() == 'podman':
run_shell_command('pip3 install https://github.com/containers/podman-compose/archive/devel.tar.gz')
check_cgroups()
check_selinux()
get_ceph_image()
get_box_image()
targets = [
get_ceph_image,
get_box_image,
dashboard_setup
]
results = []
jobs = Config.get('jobs')
if jobs:
jobs = int(jobs)
else:
jobs = None
pool = Pool(jobs)
for target in targets:
results.append(pool.apply_async(target))
for result in results:
result.wait()
@ensure_outside_container
def cleanup(self):
@ -139,20 +165,15 @@ class Cluster(Target):
@ensure_inside_container
def bootstrap(self):
print('Running bootstrap on seed')
cephadm_path = os.environ.get('CEPHADM_PATH')
os.symlink('/cephadm/cephadm', cephadm_path)
if engine() == 'docker':
# restart to ensure docker is using daemon.json
run_shell_command(
'systemctl restart docker'
)
cephadm_path = str(os.environ.get('CEPHADM_PATH'))
engine = get_container_engine()
if isinstance(engine, DockerEngine):
engine.restart()
st = os.stat(cephadm_path)
os.chmod(cephadm_path, st.st_mode | stat.S_IEXEC)
run_shell_command(f'{engine()} load < /cephadm/box/docker/ceph/image/quay.ceph.image.tar')
engine.run('load < /cephadm/box/docker/ceph/image/quay.ceph.image.tar')
# cephadm guid error because it sometimes tries to use quay.ceph.io/ceph-ci/ceph:<none>
# instead of main branch's tag
run_shell_command('export CEPH_SOURCE_FOLDER=/ceph')
@ -176,9 +197,9 @@ class Cluster(Target):
skip_dashboard = '--skip-dashboard' if Config.get('skip-dashboard') else ''
fsid = Config.get('fsid')
config_folder = Config.get('config_folder')
config = Config.get('config')
keyring = Config.get('keyring')
config_folder = str(Config.get('config_folder'))
config = str(Config.get('config'))
keyring = str(Config.get('keyring'))
if not os.path.exists(config_folder):
os.mkdir(config_folder)
@ -201,7 +222,7 @@ class Cluster(Target):
)
print('Running cephadm bootstrap...')
run_shell_command(cephadm_bootstrap_command)
run_shell_command(cephadm_bootstrap_command, expect_exit_code=120)
print('Cephadm bootstrap complete')
run_shell_command('sudo vgchange --refresh')
@ -216,14 +237,15 @@ class Cluster(Target):
def start(self):
check_cgroups()
check_selinux()
osds = Config.get('osds')
hosts = Config.get('hosts')
osds = int(Config.get('osds'))
hosts = int(Config.get('hosts'))
engine = get_container_engine()
# ensure boxes don't exist
self.down()
# podman is ran without sudo
if engine() == 'podman':
if isinstance(engine, PodmanEngine):
I_am = run_shell_command('whoami')
if 'root' in I_am:
print(root_error_msg)
@ -237,24 +259,20 @@ class Cluster(Target):
used_loop = ""
if not Config.get('skip_create_loop'):
print('Adding logical volumes (block devices) in loopback device...')
print('Creating OSD devices...')
used_loop = osd.create_loopback_devices(osds)
print(f'Added {osds} logical volumes in a loopback device')
loop_device_arg = ""
if used_loop:
loop_device_arg = f'--device {used_loop} -v /dev/vg1:/dev/vg1:Z'
for o in range(osds):
loop_device_arg += f' --device /dev/dm-{o}'
print('Starting containers')
if engine() == 'docker':
dcflags = f'-f {Config.get("docker_yaml")}'
if not os.path.exists('/sys/fs/cgroup/cgroup.controllers'):
dcflags += f' -f {Config.get("docker_v1_yaml")}'
run_shell_command(f'{engine_compose()} {dcflags} up --scale hosts={hosts} -d')
else:
run_shell_command(f'{engine_compose()} -f {Config.get("podman_yaml")} --podman-run-args "--group-add keep-groups --network=host --device /dev/fuse -it {loop_device_arg}" up --scale hosts={hosts} -d')
engine.up(hosts)
containers = engine.get_containers()
seed = engine.get_seed()
# Umounting somehow brings back the contents of the host /sys/dev/block.
# On startup /sys/dev/block is empty. After umount, we can see symlinks again
# so that lsblk is able to run as expected
run_dc_shell_command('umount /sys/dev/block', seed)
run_shell_command('sudo sysctl net.ipv4.conf.all.forwarding=1')
run_shell_command('sudo iptables -P FORWARD ACCEPT')
@ -266,15 +284,15 @@ class Cluster(Target):
systemctl start chronyd
systemctl status --no-pager chronyd
"""
for h in range(hosts):
run_dc_shell_commands(h + 1, 'hosts', chronyd_setup)
run_dc_shell_commands(1, 'seed', chronyd_setup)
for container in containers:
print(colored('Got container:', Colors.OKCYAN), str(container))
for container in containers:
run_dc_shell_commands(chronyd_setup, container)
print('Seting up host ssh servers')
for h in range(hosts):
host._setup_ssh('hosts', h + 1)
host._setup_ssh('seed', 1)
for container in containers:
print(colored('Setting up ssh server for:', Colors.OKCYAN), str(container))
host._setup_ssh(container)
verbose = '-v' if Config.get('verbose') else ''
skip_deploy = '--skip-deploy-osds' if Config.get('skip-deploy-osds') else ''
@ -283,39 +301,36 @@ class Cluster(Target):
)
skip_dashboard = '--skip-dashboard' if Config.get('skip-dashboard') else ''
box_bootstrap_command = (
f'/cephadm/box/box.py {verbose} --engine {engine()} cluster bootstrap '
f'/cephadm/box/box.py {verbose} --engine {engine.command} cluster bootstrap '
f'--osds {osds} '
f'--hosts {hosts} '
f'{skip_deploy} '
f'{skip_dashboard} '
f'{skip_monitoring_stack} '
)
run_dc_shell_command(box_bootstrap_command, 1, 'seed')
info = get_boxes_container_info()
ips = info['ips']
hostnames = info['hostnames']
print(ips)
host._copy_cluster_ssh_key(ips)
print(box_bootstrap_command)
run_dc_shell_command(box_bootstrap_command, seed)
expanded = Config.get('expanded')
if expanded:
host._add_hosts(ips, hostnames)
# TODO: add osds
if expanded and not Config.get('skip-deploy-osds'):
if engine() == 'podman':
print('osd deployment not supported in podman')
else:
info = get_boxes_container_info()
ips = info['ips']
hostnames = info['hostnames']
print(ips)
if hosts > 0:
host._copy_cluster_ssh_key(ips)
host._add_hosts(ips, hostnames)
if not Config.get('skip-deploy-osds'):
print('Deploying osds... This could take up to minutes')
osd.deploy_osds_in_vg('vg1')
osd.deploy_osds(osds)
print('Osds deployed')
dashboard_ip = 'localhost'
info = get_boxes_container_info(with_seed=True)
if engine() == 'docker':
if isinstance(engine, DockerEngine):
for i in range(info['size']):
if 'seed' in info['container_names'][i]:
if get_seed_name() in info['container_names'][i]:
dashboard_ip = info["ips"][i]
print(colored(f'dashboard available at https://{dashboard_ip}:8443', Colors.OKGREEN))
@ -323,8 +338,20 @@ class Cluster(Target):
@ensure_outside_container
def down(self):
if engine() == 'podman':
run_shell_command(f'{engine_compose()} -f {Config.get("podman_yaml")} down')
engine = get_container_engine()
if isinstance(engine, PodmanEngine):
containers = json.loads(engine.run('container ls --format json'))
for container in containers:
for name in container['Names']:
if name.startswith('box_hosts_'):
engine.run(f'container kill {name}')
engine.run(f'container rm {name}')
pods = json.loads(engine.run('pod ls --format json'))
for pod in pods:
if 'Name' in pod and pod['Name'].startswith('box_pod_host'):
name = pod['Name']
engine.run(f'pod kill {name}')
engine.run(f'pod rm {name}')
else:
run_shell_command(f'{engine_compose()} -f {Config.get("docker_yaml")} down')
print('Successfully killed all boxes')
@ -339,11 +366,12 @@ class Cluster(Target):
print(f'{name} \t{ip} \t{hostname}')
@ensure_outside_container
def sh(self):
def bash(self):
# we need verbose to see the prompt after running shell command
Config.set('verbose', True)
print('Seed bash')
run_shell_command(f'{engine_compose()} -f {Config.get("docker_yaml")} exec seed bash')
engine = get_container_engine()
engine.run(f'exec -it {engine.seed_name} bash')
targets = {

View File

@ -1,60 +0,0 @@
version: "2.4"
services:
cephadm-host-base:
build:
context: .
environment:
- CEPH_BRANCH=master
image: cephadm-box
# probably not needed with rootless Docker and cgroups v2
# privileged: true
cap_add:
- SYS_ADMIN
- NET_ADMIN
- SYS_TIME
- SYS_RAWIO
- MKNOD
- NET_RAW
- SETUID
- SETGID
- CHOWN
- SYS_PTRACE
- SYS_TTY_CONFIG
- CAP_AUDIT_WRITE
- CAP_AUDIT_CONTROL
stop_signal: RTMIN+3
volumes:
- ../../../:/ceph:z
- ..:/cephadm:z
# - ./daemon.json:/etc/docker/daemon.json
# dangerous, maybe just map the loopback
# https://stackoverflow.com/questions/36880565/why-dont-my-udev-rules-work-inside-of-a-running-docker-container
- /run/udev:/run/udev
- /sys/dev/block:/sys/dev/block
- /sys/fs/cgroup:/sys/fs/cgroup
- /dev/fuse:/dev/fuse
- /dev/disk:/dev/disk
- /dev/mapper:/dev/mapper
- /dev/mapper/control:/dev/mapper/control
mem_limit: "20g"
scale: -1
seed:
extends:
service: cephadm-host-base
ports:
- "2222:22"
- "3000:3000"
- "8888:8888"
- "8443:8443"
- "9095:9095"
scale: 1
hosts:
extends:
service: cephadm-host-base
scale: 1
volumes:
var-lib-docker:
network_mode: public

View File

@ -1,2 +1,3 @@
FROM quay.ceph.io/ceph-ci/ceph:master
FROM quay.ceph.io/ceph-ci/ceph:main
RUN pip3 install packaging
EXPOSE 8443

View File

@ -3,20 +3,24 @@ from typing import List, Union
from util import (
Config,
HostContainer,
Target,
get_boxes_container_info,
get_container_engine,
inside_container,
run_cephadm_shell_command,
run_dc_shell_command,
run_shell_command,
engine,
BoxType
)
def _setup_ssh(container_type, container_index):
def _setup_ssh(container: HostContainer):
if inside_container():
if not os.path.exists('/root/.ssh/known_hosts'):
run_shell_command('ssh-keygen -b 2048 -t rsa -f /root/.ssh/id_rsa -q -N ""')
run_shell_command('echo "y" | ssh-keygen -b 2048 -t rsa -f /root/.ssh/id_rsa -q -N ""',
expect_error=True)
run_shell_command('echo "root:root" | chpasswd')
with open('/etc/ssh/sshd_config', 'a+') as f:
@ -28,9 +32,8 @@ def _setup_ssh(container_type, container_index):
print('Redirecting to _setup_ssh to container')
verbose = '-v' if Config.get('verbose') else ''
run_dc_shell_command(
f'/cephadm/box/box.py {verbose} --engine {engine()} host setup_ssh {container_type} {container_index}',
container_index,
container_type,
f'/cephadm/box/box.py {verbose} --engine {engine()} host setup_ssh {container.name}',
container
)
@ -47,11 +50,11 @@ def _add_hosts(ips: Union[List[str], str], hostnames: Union[List[str], str]):
ips = f'{ips}'
hostnames = ' '.join(hostnames)
hostnames = f'{hostnames}'
seed = get_container_engine().get_seed()
run_dc_shell_command(
f'/cephadm/box/box.py {verbose} --engine {engine()} host add_hosts seed 1 --ips {ips} --hostnames {hostnames}',
1,
'seed',
)
f'/cephadm/box/box.py {verbose} --engine {engine()} host add_hosts {seed.name} --ips {ips} --hostnames {hostnames}',
seed
)
def _copy_cluster_ssh_key(ips: Union[List[str], str]):
@ -73,10 +76,10 @@ def _copy_cluster_ssh_key(ips: Union[List[str], str]):
ips = ' '.join(ips)
ips = f'{ips}'
# assume we only have one seed
seed = get_container_engine().get_seed()
run_dc_shell_command(
f'/cephadm/box/box.py {verbose} --engine {engine()} host copy_cluster_ssh_key seed 1 --ips {ips}',
1,
'seed',
f'/cephadm/box/box.py {verbose} --engine {engine()} host copy_cluster_ssh_key {seed.name} --ips {ips}',
seed
)
@ -87,10 +90,9 @@ class Host(Target):
def set_args(self):
self.parser.add_argument('action', choices=Host.actions)
self.parser.add_argument(
'container_type', type=str, help='box_{type}_{index}'
)
self.parser.add_argument(
'container_index', type=str, help='box_{type}_{index}'
'container_name',
type=str,
help='box_{type}_{index}. In docker, type can be seed or hosts. In podman only hosts.'
)
self.parser.add_argument('--ips', nargs='*', help='List of host ips')
self.parser.add_argument(
@ -98,7 +100,9 @@ class Host(Target):
)
def setup_ssh(self):
_setup_ssh(Config.get('container_type') ,Config.get('container_index'))
container_name = Config.get('container_name')
engine = get_container_engine()
_setup_ssh(engine.get_container(container_name))
def add_hosts(self):
ips = Config.get('ips')

View File

@ -1,69 +1,57 @@
import json
import os
import time
import re
from typing import Dict
from util import (
BoxType,
Config,
Target,
ensure_inside_container,
ensure_outside_container,
get_orch_hosts,
inside_container,
run_cephadm_shell_command,
run_dc_shell_command,
get_container_engine,
run_shell_command,
engine
)
DEVICES_FILE="./devices.json"
def remove_loop_img() -> None:
loop_image = Config.get('loop_img')
if os.path.exists(loop_image):
os.remove(loop_image)
def create_loopback_devices(osds: int) -> None:
def create_loopback_devices(osds: int) -> Dict[int, Dict[str, str]]:
assert osds
size = (5 * osds) + 1
print(f'Using {size}GB of data to store osds')
# loop_dev = run_shell_command('sudo losetup -f')
loop_dev = '/dev/loop111'
run_shell_command(f'sudo rm -f {loop_dev}')
run_shell_command(f'sudo mknod -m 0777 {loop_dev} b 7 111')
cleanup_osds()
osd_devs = dict()
# cleanup last call
cleanup()
for i in range(osds):
img_name = f'osd{i}'
loop_dev = create_loopback_device(img_name)
osd_devs[i] = dict(img_name=img_name, device=loop_dev)
with open(DEVICES_FILE, 'w') as dev_file:
dev_file.write(json.dumps(osd_devs))
return osd_devs
def create_loopback_device(img_name, size_gb=5):
loop_img_dir = Config.get('loop_img_dir')
run_shell_command(f'mkdir -p {loop_img_dir}')
loop_img = os.path.join(loop_img_dir, img_name)
run_shell_command(f'rm -f {loop_img}')
run_shell_command(f'dd if=/dev/zero of={loop_img} bs=1 count=0 seek={size_gb}G')
loop_dev = run_shell_command(f'sudo losetup -f')
if not os.path.exists(loop_dev):
dev_minor = re.match(r'\/dev\/[^\d]+(\d+)', loop_dev).groups()[0]
run_shell_command(f'sudo mknod -m777 {loop_dev} b 7 {dev_minor}')
run_shell_command(f'sudo chown {os.getuid()}:{os.getgid()} {loop_dev}')
if os.path.ismount(loop_dev):
os.umount(loop_dev)
loop_devices = json.loads(run_shell_command('losetup -l -J', expect_error=True))
for dev in loop_devices['loopdevices']:
if dev['name'] == loop_dev:
run_shell_command(f'sudo losetup -d {loop_dev}')
if not os.path.exists('./loop-images'):
os.mkdir('loop-images')
remove_loop_img()
loop_image = Config.get('loop_img')
run_shell_command(f'sudo dd if=/dev/zero of={loop_image} bs=1 count=0 seek={size}G')
run_shell_command(f'sudo losetup {loop_dev} {loop_image}')
run_shell_command(f'sudo pvcreate {loop_dev} ')
run_shell_command(f'sudo vgcreate vg1 {loop_dev}')
p = int(100 / osds) # FIXME: 100 osds is the maximum because of lvcreate pct (it doesn't seem to work with lots more decimals)
for i in range(osds):
run_shell_command('sudo vgchange --refresh')
run_shell_command(f'sudo lvcreate -l {p}%VG --name lv{i} vg1')
# FIXME: use /dev/vg1/lv* links as it is less hacky (there could be unrelated dm devices)
run_shell_command(f'sudo chmod 777 /dev/dm-*')
run_shell_command(f'sudo losetup {loop_dev} {loop_img}')
run_shell_command(f'sudo chown {os.getuid()}:{os.getgid()} {loop_dev}')
return loop_dev
@ -82,55 +70,52 @@ def get_lvm_osd_data(data: str) -> Dict[str, str]:
osd_data[key] = line[-1]
return osd_data
def load_osd_devices():
if not os.path.exists(DEVICES_FILE):
return dict()
with open(DEVICES_FILE) as dev_file:
devs = json.loads(dev_file.read())
return devs
@ensure_inside_container
def deploy_osd(data: str, hostname: str) -> bool:
out = run_cephadm_shell_command(f'ceph orch daemon add osd "{hostname}:{data}"')
out = run_cephadm_shell_command(f'ceph orch daemon add osd {hostname}:{data} raw')
return 'Created osd(s)' in out
def cleanup() -> None:
vg = 'vg1'
pvs = json.loads(run_shell_command('sudo pvs --reportformat json'))
for pv in pvs['report'][0]['pv']:
if pv['vg_name'] == vg:
device = pv['pv_name']
run_shell_command(f'sudo vgremove -f --yes {vg}')
run_shell_command(f'sudo losetup -d {device}')
run_shell_command(f'sudo wipefs -af {device}')
# FIX: this can fail with excluded filter
run_shell_command(f'sudo pvremove -f --yes {device}', expect_error=True)
break
remove_loop_img()
def cleanup_osds() -> None:
loop_img_dir = Config.get('loop_img_dir')
osd_devs = load_osd_devices()
for osd in osd_devs.values():
device = osd['device']
if 'loop' in device:
loop_img = os.path.join(loop_img_dir, osd['img_name'])
run_shell_command(f'sudo losetup -d {device}', expect_error=True)
if os.path.exists(loop_img):
os.remove(loop_img)
run_shell_command(f'rm -rf {loop_img_dir}')
def deploy_osds_in_vg(vg: str):
"""
rotate host will deploy each osd in a different host
deploying osds will not succeed with starting services so this
makes another process to run on the background
"""
if inside_container():
lvs = json.loads(run_shell_command('lvs --reportformat json'))
# distribute osds per host
hosts = get_orch_hosts()
host_index = 0
for lv in lvs['report'][0]['lv']:
if lv['vg_name'] == vg:
deployed = False
while not deployed:
deployed = deploy_osd(
f'{vg}/{lv["lv_name"]}', hosts[host_index]['hostname']
)
host_index = (host_index + 1) % len(hosts)
else:
verbose = '-v' if Config.get('verbose') else ''
print('Redirecting deploy osd in vg to inside container')
run_dc_shell_command(
f'/cephadm/box/box.py {verbose} --engine {engine()} osd deploy --vg {vg}', 1, 'seed'
)
def deploy_osds(count: int):
osd_devs = load_osd_devices()
hosts = get_orch_hosts()
host_index = 0
seed = get_container_engine().get_seed()
v = '-v' if Config.get('verbose') else ''
for osd in osd_devs.values():
deployed = False
while not deployed:
print(hosts)
hostname = hosts[host_index]['hostname']
deployed = run_dc_shell_command(
f'/cephadm/box/box.py {v} osd deploy --data {osd["device"]} --hostname {hostname}',
seed
)
deployed = 'created osd' in deployed.lower() or 'already created?' in deployed.lower()
print('Waiting 5 seconds to re-run deploy osd...')
time.sleep(5)
host_index = (host_index + 1) % len(hosts)
class Osd(Target):
@ -140,32 +125,33 @@ class Osd(Target):
- deploy: Deploy an osd given a block device
- create_loop: Create needed loopback devices and block devices in logical volumes
for a number of osds.
- destroy: Remove all osds and the underlying loopback devices.
"""
actions = ['deploy', 'create_loop']
actions = ['deploy', 'create_loop', 'destroy']
def set_args(self):
self.parser.add_argument('action', choices=Osd.actions)
self.parser.add_argument('--data', type=str, help='path to a block device')
self.parser.add_argument('--hostname', type=str, help='host to deploy osd')
self.parser.add_argument('--osds', type=int, default=0, help='number of osds')
self.parser.add_argument(
'--vg', type=str, help='Deploy with all lv from virtual group'
)
def deploy(self):
data = Config.get('data')
hostname = Config.get('hostname')
vg = Config.get('vg')
if not hostname:
# assume this host
hostname = run_shell_command('hostname')
if vg:
deploy_osds_in_vg(vg)
if not data:
deploy_osds(Config.get('osds'))
else:
deploy_osd(data, hostname)
@ensure_outside_container
def create_loop(self):
osds = Config.get('osds')
create_loopback_devices(osds)
print('Successfully added logical volumes in loopback devices')
create_loopback_devices(int(osds))
print('Successfully created loopback devices')
@ensure_outside_container
def destroy(self):
cleanup_osds()

View File

@ -2,7 +2,10 @@ import json
import os
import subprocess
import sys
from typing import Any, Callable, Dict
import copy
from abc import ABCMeta, abstractmethod
from enum import Enum
from typing import Any, Callable, Dict, List
class Colors:
HEADER = '\033[95m'
@ -26,6 +29,7 @@ class Config:
'docker_yaml': 'docker-compose-docker.yml',
'docker_v1_yaml': 'docker-compose.cgroup1.yml',
'podman_yaml': 'docker-compose-podman.yml',
'loop_img_dir': 'loop-images',
}
@staticmethod
@ -42,7 +46,6 @@ class Config:
def add_args(args: Dict[str, str]) -> None:
Config.args.update(args)
class Target:
def __init__(self, argv, subparsers):
self.argv = argv
@ -91,7 +94,26 @@ def ensure_inside_container(func) -> bool:
def colored(msg, color: Colors):
return color + msg + Colors.ENDC
def run_shell_command(command: str, expect_error=False) -> str:
class BoxType(str, Enum):
SEED = 'seed'
HOST = 'host'
class HostContainer:
def __init__(self, _name, _type) -> None:
self._name: str = _name
self._type: BoxType = _type
@property
def name(self) -> str:
return self._name
@property
def type(self) -> BoxType:
return self._type
def __str__(self) -> str:
return f'{self.name} {self.type}'
def run_shell_command(command: str, expect_error=False, verbose=True, expect_exit_code=0) -> str:
if Config.get('verbose'):
print(f'{colored("Running command", Colors.HEADER)}: {colored(command, Colors.OKBLUE)}')
@ -100,6 +122,7 @@ def run_shell_command(command: str, expect_error=False) -> str:
)
out = ''
err = ''
# let's read when output comes so it is in real time
while True:
# TODO: improve performance of this part, I think this part is a problem
@ -107,30 +130,30 @@ def run_shell_command(command: str, expect_error=False) -> str:
if pout == '' and process.poll() is not None:
break
if pout:
if Config.get('verbose'):
if Config.get('verbose') and verbose:
sys.stdout.write(pout)
sys.stdout.flush()
out += pout
process.wait()
# no last break line
err = (
process.stderr.read().decode().rstrip()
) # remove trailing whitespaces and new lines
err += process.stderr.read().decode('latin1').strip()
out = out.strip()
if process.returncode != 0 and not expect_error:
raise RuntimeError(f'Failed command: {command}\n{err}')
if process.returncode != 0 and not expect_error and process.returncode != expect_exit_code:
err = colored(err, Colors.FAIL);
raise RuntimeError(f'Failed command: {command}\n{err}\nexit code: {process.returncode}')
sys.exit(1)
return out
def run_dc_shell_commands(index, box_type, commands: str, expect_error=False) -> str:
def run_dc_shell_commands(commands: str, container: HostContainer, expect_error=False) -> str:
for command in commands.split('\n'):
command = command.strip()
if not command:
continue
run_dc_shell_command(command.strip(), index, box_type, expect_error=expect_error)
run_dc_shell_command(command.strip(), container, expect_error=expect_error)
def run_shell_commands(commands: str, expect_error=False) -> str:
for command in commands.split('\n'):
@ -143,23 +166,20 @@ def run_shell_commands(commands: str, expect_error=False) -> str:
def run_cephadm_shell_command(command: str, expect_error=False) -> str:
config = Config.get('config')
keyring = Config.get('keyring')
fsid = Config.get('fsid')
with_cephadm_image = 'CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:master'
with_cephadm_image = 'CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:main'
out = run_shell_command(
f'{with_cephadm_image} cephadm --verbose shell --config {config} --keyring {keyring} -- {command}',
f'{with_cephadm_image} cephadm --verbose shell --fsid {fsid} --config {config} --keyring {keyring} -- {command}',
expect_error,
)
return out
def run_dc_shell_command(
command: str, index: int, box_type: str, expect_error=False
command: str, container: HostContainer, expect_error=False
) -> str:
container_id = get_container_id(f'{box_type}_{index}')
print(container_id)
out = run_shell_command(
f'{engine()} exec -it {container_id} {command}', expect_error
)
out = get_container_engine().run_exec(container, command, expect_error=expect_error)
return out
def inside_container() -> bool:
@ -174,27 +194,38 @@ def engine():
def engine_compose():
return f'{engine()}-compose'
def get_seed_name():
if engine() == 'docker':
return 'seed'
elif engine() == 'podman':
return 'box_hosts_0'
else:
print(f'unkown engine {engine()}')
sys.exit(1)
@ensure_outside_container
def get_boxes_container_info(with_seed: bool = False) -> Dict[str, Any]:
# NOTE: this could be cached
IP = 0
CONTAINER_NAME = 1
HOSTNAME = 2
# fstring extrapolation will mistakenly try to extrapolate inspect options
ips_query = engine() + " inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}} %tab% {{.Name}} %tab% {{.Config.Hostname}}' $("+ engine() + " ps -aq) | sed 's#%tab%#\t#g' | sed 's#/##g' | sort -t . -k 1,1n -k 2,2n -k 3,3n -k 4,4n"
out = run_shell_command(ips_query)
ips_query = engine() + " inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}} %tab% {{.Name}} %tab% {{.Config.Hostname}}' $("+ engine() + " ps -aq) --format json"
containers = json.loads(run_shell_command(ips_query, verbose=False))
# FIXME: if things get more complex a class representing a container info might be useful,
# for now representing data this way is faster.
info = {'size': 0, 'ips': [], 'container_names': [], 'hostnames': []}
for line in out.split('\n'):
container = line.split()
for container in containers:
# Most commands use hosts only
name_filter = 'box_' if with_seed else 'box_hosts'
if container[1].strip()[: len(name_filter)] == name_filter:
name = container['Name']
if name.startswith('box_hosts'):
if not with_seed and name == get_seed_name():
continue
info['size'] += 1
info['ips'].append(container[IP])
info['container_names'].append(container[CONTAINER_NAME])
info['hostnames'].append(container[HOSTNAME])
print(container['NetworkSettings'])
if 'Networks' in container['NetworkSettings']:
info['ips'].append(container['NetworkSettings']['Networks']['box_network']['IPAddress'])
else:
info['ips'].append('n/a')
info['container_names'].append(name)
info['hostnames'].append(container['Config']['Hostname'])
return info
@ -202,9 +233,189 @@ def get_orch_hosts():
if inside_container():
orch_host_ls_out = run_cephadm_shell_command('ceph orch host ls --format json')
else:
orch_host_ls_out = run_dc_shell_command('cephadm shell --keyring /etc/ceph/ceph.keyring --config /etc/ceph/ceph.conf -- ceph orch host ls --format json', 1, 'seed')
orch_host_ls_out = run_dc_shell_command(f'cephadm shell --keyring /etc/ceph/ceph.keyring --config /etc/ceph/ceph.conf -- ceph orch host ls --format json',
get_container_engine().get_seed())
sp = orch_host_ls_out.split('\n')
orch_host_ls_out = sp[len(sp) - 1]
print('xd', orch_host_ls_out)
hosts = json.loads(orch_host_ls_out)
return hosts
class ContainerEngine(metaclass=ABCMeta):
@property
@abstractmethod
def command(self) -> str: pass
@property
@abstractmethod
def seed_name(self) -> str: pass
@property
@abstractmethod
def dockerfile(self) -> str: pass
@property
def host_name_prefix(self) -> str:
return 'box_hosts_'
@abstractmethod
def up(self, hosts: int): pass
def run_exec(self, container: HostContainer, command: str, expect_error: bool = False):
return run_shell_command(' '.join([self.command, 'exec', container.name, command]),
expect_error=expect_error)
def run(self, engine_command: str, expect_error: bool = False):
return run_shell_command(' '.join([self.command, engine_command]), expect_error=expect_error)
def get_containers(self) -> List[HostContainer]:
ps_out = json.loads(run_shell_command('podman ps --format json'))
containers = []
for container in ps_out:
if not container['Names']:
raise RuntimeError(f'Container {container} missing name')
name = container['Names'][0]
if name == self.seed_name:
containers.append(HostContainer(name, BoxType.SEED))
elif name.startswith(self.host_name_prefix):
containers.append(HostContainer(name, BoxType.HOST))
return containers
def get_seed(self) -> HostContainer:
for container in self.get_containers():
if container.type == BoxType.SEED:
return container
raise RuntimeError('Missing seed container')
def get_container(self, container_name: str):
containers = self.get_containers()
for container in containers:
if container.name == container_name:
return container
return None
def restart(self):
pass
class DockerEngine(ContainerEngine):
command = 'docker'
seed_name = 'seed'
dockerfile = 'DockerfileDocker'
def restart(self):
run_shell_command('systemctl restart docker')
def up(self, hosts: int):
dcflags = f'-f {Config.get("docker_yaml")}'
if not os.path.exists('/sys/fs/cgroup/cgroup.controllers'):
dcflags += f' -f {Config.get("docker_v1_yaml")}'
run_shell_command(f'{engine_compose()} {dcflags} up --scale hosts={hosts} -d')
class PodmanEngine(ContainerEngine):
command = 'podman'
seed_name = 'box_hosts_0'
dockerfile = 'DockerfilePodman'
CAPS = [
"SYS_ADMIN",
"NET_ADMIN",
"SYS_TIME",
"SYS_RAWIO",
"MKNOD",
"NET_RAW",
"SETUID",
"SETGID",
"CHOWN",
"SYS_PTRACE",
"SYS_TTY_CONFIG",
"CAP_AUDIT_WRITE",
"CAP_AUDIT_CONTROL",
]
VOLUMES = [
'../../../:/ceph:z',
'../:/cephadm:z',
'/run/udev:/run/udev',
'/sys/dev/block:/sys/dev/block',
'/sys/fs/cgroup:/sys/fs/cgroup:ro',
'/dev/fuse:/dev/fuse',
'/dev/disk:/dev/disk',
'/sys/devices/virtual/block:/sys/devices/virtual/block',
'/sys/block:/dev/block',
'/dev/mapper:/dev/mapper',
'/dev/mapper/control:/dev/mapper/control',
]
TMPFS = ['/run', '/tmp']
# FIXME: right now we are assuming every service will be exposed through the seed, but this is far
# from the truth. Services can be deployed on different hosts so we need a system to manage this.
SEED_PORTS = [
8443, # dashboard
3000, # grafana
9093, # alertmanager
9095 # prometheus
]
def setup_podman_env(self, hosts: int = 1, osd_devs={}):
network_name = 'box_network'
networks = run_shell_command('podman network ls')
if network_name not in networks:
run_shell_command(f'podman network create -d bridge {network_name}')
args = [
'--group-add', 'keep-groups',
'--device', '/dev/fuse' ,
'-it' ,
'-d',
'-e', 'CEPH_BRANCH=main',
'--stop-signal', 'RTMIN+3'
]
for cap in self.CAPS:
args.append('--cap-add')
args.append(cap)
for volume in self.VOLUMES:
args.append('-v')
args.append(volume)
for tmp in self.TMPFS:
args.append('--tmpfs')
args.append(tmp)
for osd_dev in osd_devs.values():
device = osd_dev["device"]
args.append('--device')
args.append(f'{device}:{device}')
for host in range(hosts+1): # 0 will be the seed
options = copy.copy(args)
options.append('--name')
options.append(f'box_hosts_{host}')
options.append('--network')
options.append(f'{network_name}')
if host == 0:
for port in self.SEED_PORTS:
options.append('-p')
options.append(f'{port}:{port}')
options.append('cephadm-box')
options = ' '.join(options)
run_shell_command(f'podman run {options}')
def up(self, hosts: int):
import osd
self.setup_podman_env(hosts=hosts, osd_devs=osd.load_osd_devices())
def get_container_engine() -> ContainerEngine:
if engine() == 'docker':
return DockerEngine()
else:
return PodmanEngine()