Merge PR #31094 into master

* refs/pull/31094/head:
	ceph-daemon: remove redundant --privileged
	test_ceph_daemon: test unit, enter, shell
	ceph-daemon: drop exec
	ceph-daemon: fix exit code for run, shell, enter, exec
	ceph-daemon: allow optional command for 'enter'
	ceph-daemon: fix LANG for 'enter' command
	ceph-daemon: allow shell to take optional command
	qa/suites/rados/singleton-nomsgr/ceph-daemon: run test_ceph_daemon.sh
	qa/standalone/test_ceph_daemon.sh: add new functional tests
	test_ceph_daemon.sh: use newer image
	ceph-daemon: unconditionally enable and start crash unit
	ceph-daemon: fix crash unit cleanup
	ceph-daemon: include 'crash' unit/item in 'ls' output
	ceph-daemon: fix 'ls'
	mgr/orchestrator: s/sdd/ssd/
	mgr/ssh: remove stdout/stderr kludges
	ceph-daemon: fix ceph-volume command to write stdout to stdout

Reviewed-by: Sebastian Wagner <swagner@suse.com>
This commit is contained in:
Sage Weil 2019-10-23 19:46:06 -05:00
commit bf09a04d22
6 changed files with 207 additions and 53 deletions

156
qa/standalone/test_ceph_daemon.sh Executable file
View File

@ -0,0 +1,156 @@
#!/bin/bash -ex
[ -z "$SUDO" ] && SUDO=sudo
[ -x ../src/ceph-daemon ] && CEPH_DAEMON=../src/ceph-daemon
[ -x ./ceph-daemon ] && CEPH_DAEMON=.ceph-daemon
which ceph-daemon && CEPH_DAEMON=$(which ceph-daemon)
FSID='00000000-0000-0000-0000-0000deadbeef'
IMAGE='ceph/daemon-base:latest-master'
# clean up previous run(s)?
$SUDO $CEPH_DAEMON rm-cluster --fsid $FSID --force
TMPDIR=`mktemp -d -p .`
trap "rm -rf $TMPDIR" TERM HUP INT
function expect_false()
{
set -x
if "$@"; then return 1; else return 0; fi
}
## version + --image
$SUDO $CEPH_DAEMON --image ceph/daemon-base:latest-nautilus version \
| grep 'ceph version 14'
$SUDO $CEPH_DAEMON --image ceph/daemon-base:latest-mimic version \
| grep 'ceph version 13'
$SUDO $CEPH_DAEMON --image $IMAGE version | grep 'ceph version'
# try force docker; this won't work if docker isn't installed
which docker && ( $SUDO $CEPH_DAEMON --docker version | grep 'ceph version' )
## bootstrap
ORIG_CONFIG=`mktemp -p $TMPDIR`
CONFIG=`mktemp -p $TMPDIR`
KEYRING=`mktemp -p $TMPDIR`
IP=127.0.0.1
cat <<EOF > $ORIG_CONFIG
[global]
log to file = true
EOF
$SUDO $CEPH_DAEMON --image $IMAGE bootstrap \
--mon-id a \
--mgr-id x \
--mon-ip $IP \
--fsid $FSID \
--config $ORIG_CONFIG \
--output-config $CONFIG \
--output-keyring $KEYRING \
--skip-ssh
test -e $CONFIG
test -e $KEYRING
rm -f $ORIG_CONFIG
$SUDO test -e /var/log/ceph/$FSID/ceph-mon.a.log
$SUDO test -e /var/log/ceph/$FSID/ceph-mgr.x.log
for u in ceph.target \
ceph-$FSID.target \
ceph-$FSID@mon.a \
ceph-$FSID@mgr.x; do
systemctl is-enabled $u
systemctl is-active $u
done
systemctl | grep system-ceph | grep -q .slice # naming is escaped and annoying
# check ceph -s works (via shell w/ passed config/keyring)
$SUDO $CEPH_DAEMON shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
ceph -s | grep $FSID
## ls
$SUDO $CEPH_DAEMON ls | jq '.[]' | jq 'select(.name == "mon.a").fsid' \
| grep $FSID
$SUDO $CEPH_DAEMON ls | jq '.[]' | jq 'select(.name == "mgr.x").fsid' \
| grep $FSID
## deploy
# add mon.b
$SUDO $CEPH_DAEMON --image $IMAGE deploy --name mon.b \
--fsid $FSID \
--mon-ip $IP:3301 \
--keyring /var/lib/ceph/$FSID/mon.a/keyring \
--config $CONFIG
for u in ceph-$FSID@mon.b; do
systemctl is-enabled $u
systemctl is-active $u
done
# add mgr.y
$SUDO $CEPH_DAEMON shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
ceph auth get-or-create mgr.y \
mon 'allow profile mgr' \
osd 'allow *' \
mds 'allow *' > $TMPDIR/keyring.mgr.y
$SUDO $CEPH_DAEMON --image $IMAGE deploy --name mgr.y \
--fsid $FSID \
--keyring $TMPDIR/keyring.mgr.y \
--config $CONFIG
for u in ceph-$FSID@mgr.y; do
systemctl is-enabled $u
systemctl is-active $u
done
for f in `seq 1 30`; do
if $SUDO $CEPH_DAEMON shell --fsid $FSID \
--config $CONFIG --keyring $KEYRING -- \
ceph -s -f json-pretty \
| jq '.mgrmap.num_standbys' | grep -q 1 ; then break; fi
sleep 1
done
$SUDO $CEPH_DAEMON shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
ceph -s -f json-pretty \
| jq '.mgrmap.num_standbys' | grep -q 1
## run
# WRITE ME
## adopt
# WRITE ME
## unit
$SUDO $CEPH_DAEMON unit --fsid $FSID --name mon.a -- is-enabled
$SUDO $CEPH_DAEMON unit --fsid $FSID --name mon.a -- is-active
expect_false $SUDO $CEPH_DAEMON unit --fsid $FSID --name mon.xyz -- is-active
$SUDO $CEPH_DAEMON unit --fsid $FSID --name mon.a -- disable
expect_false $SUDO $CEPH_DAEMON unit --fsid $FSID --name mon.a -- is-enabled
$SUDO $CEPH_DAEMON unit --fsid $FSID --name mon.a -- enable
$SUDO $CEPH_DAEMON unit --fsid $FSID --name mon.a -- is-enabled
## shell
$SUDO $CEPH_DAEMON --image $IMAGE shell -- true
$SUDO $CEPH_DAEMON --image $IMAGE shell --fsid $FSID -- test -d /var/log/ceph
## enter
expect_false $SUDO $CEPH_DAEMON enter
$SUDO $CEPH_DAEMON enter --fsid $FSID --name mon.a -- test -d /var/lib/ceph/mon/ceph-a
$SUDO $CEPH_DAEMON enter --fsid $FSID --name mgr.x -- test -d /var/lib/ceph/mgr/ceph-x
$SUDO $CEPH_DAEMON enter --fsid $FSID --name mon.a -- pidof ceph-mon
expect_false $SUDO $CEPH_DAEMON enter --fsid $FSID --name mgr.x -- pidof ceph-mon
$SUDO $CEPH_DAEMON enter --fsid $FSID --name mgr.x -- pidof ceph-mgr
## ceph-volume
$SUDO $CEPH_DAEMON --image $IMAGE ceph-volume --fsid $FSID -- inventory --format=json \
| jq '.[]'
## rm-daemon
# mon and osd require --force
expect_false $SUDO $CEPH_DAEMON rm-daemon --fsid $FSID --name mon.a
# mgr does not
$SUDO $CEPH_DAEMON rm-daemon --fsid $FSID --name mgr.x
## rm-cluster
expect_false $SUDO $CEPH_DAEMON rm-cluster --fsid $FSID
$SUDO $CEPH_DAEMON rm-cluster --fsid $FSID --force
rm -rf $TMPDIR
echo PASS

View File

@ -0,0 +1,9 @@
roles:
- [mon.a, mgr.x, osd.0, client.0]
tasks:
- install:
- workunit:
basedir: qa/standalone
clients:
client.0:
- test_ceph_daemon.sh

View File

@ -548,7 +548,6 @@ def deploy_crash(fsid, uid, gid, config, keyring):
f.write(config)
# ceph-crash unit
existed = os.path.exists(args.unit_dir + '/ceph-%s-crash.service' % fsid)
mounts = {
crash_dir: '/var/lib/ceph/crash:z',
os.path.join(crash_dir, 'config'): '/etc/ceph/ceph.conf:z',
@ -585,9 +584,8 @@ def deploy_crash(fsid, uid, gid, config, keyring):
)
os.rename(os.path.join(args.unit_dir, unit_name + '.new'),
os.path.join(args.unit_dir, unit_name))
if not existed:
subprocess.check_output(['systemctl', 'enable', unit_name])
subprocess.check_output(['systemctl', 'start', unit_name])
subprocess.check_output(['systemctl', 'enable', unit_name])
subprocess.check_output(['systemctl', 'start', unit_name])
def get_unit_file(fsid):
u = """[Unit]
@ -681,7 +679,7 @@ class CephContainer:
self.image
] + self.args
def shell_cmd(self):
def shell_cmd(self, cmd):
vols = sum(
[['-v', f'{host_dir}:{container_dir}']
for host_dir, container_dir in self.volume_mounts.items()], [])
@ -689,22 +687,25 @@ class CephContainer:
'-e', f'CONTAINER_IMAGE={self.image}',
'-e', f'NODE_NAME={get_hostname()}',
]
cmd_args = []
if cmd:
cmd_args = ['-c'] + cmd
return [
podman_path,
'run',
'-it',
'--net=host',
'--privileged',
'--env', 'LANG=C',
] + self.podman_args + envs + vols + [
'--entrypoint', '/bin/bash',
'--entrypoint', cmd[0],
self.image
]
] + cmd[1:]
def exec_cmd(self, cmd):
return [
podman_path,
'exec',
'--env', 'LANG=C',
'-it',
self.cname,
] + cmd
@ -922,6 +923,7 @@ def command_bootstrap():
deploy_daemon(fsid, 'mgr', mgr_id, mgr_c, uid, gid, config, mgr_keyring)
# crash unit
logger.info('Creating crash agent...')
deploy_crash(fsid, uid, gid, config,
'[client.crash.%s]\n\tkey = %s\n' % (hostname, crash_key))
@ -1093,7 +1095,7 @@ def command_deploy():
def command_run():
(daemon_type, daemon_id) = args.name.split('.')
c = get_container(args.fsid, daemon_type, daemon_id)
subprocess.call(c.run_cmd())
return subprocess.call(c.run_cmd())
##################################
@ -1120,22 +1122,14 @@ def command_shell():
args=[],
podman_args=['--privileged'],
volume_mounts=mounts)
subprocess.call(c.shell_cmd())
return subprocess.call(c.shell_cmd(args.command))
##################################
def command_enter():
(daemon_type, daemon_id) = args.name.split('.')
c = get_container(args.fsid, daemon_type, daemon_id)
subprocess.call(c.exec_cmd(['bash']))
##################################
def command_exec():
(daemon_type, daemon_id) = args.name.split('.')
c = get_container(args.fsid, daemon_type, daemon_id,
privileged=args.privileged)
subprocess.call(c.exec_cmd(args.command))
return subprocess.call(c.exec_cmd(args.command))
##################################
@ -1174,7 +1168,9 @@ def command_ceph_volume():
podman_args=['--privileged'],
volume_mounts=mounts,
)
call_throws(c.run_cmd(), verbose=True)
out, err, code = call_throws(c.run_cmd(), verbose=True)
if not code:
print(out)
##################################
@ -1214,13 +1210,22 @@ def command_ls():
elif is_fsid(i):
fsid = i
for j in os.listdir(os.path.join(args.data_dir, i)):
(daemon_type, daemon_id) = j.split('.', 1)
(enabled, active) = check_unit(get_unit_name(fsid,
daemon_type,
daemon_id))
if j == 'crash':
name = 'crash'
unit_name = 'ceph-%s-crash.service' % fsid
(enabled, active) = check_unit(unit_name)
else:
bits = j.split('.')
if len(bits) != 2:
continue
name = j
(daemon_type, daemon_id) = bits
(enabled, active) = check_unit(get_unit_name(fsid,
daemon_type,
daemon_id))
ls.append({
'style': 'ceph-daemon:v1',
'name': '%s.%s' % (daemon_type, daemon_id),
'name': name,
'fsid': fsid,
'enabled': enabled,
'active': active,
@ -1330,7 +1335,7 @@ def command_rm_cluster():
call_throws(['rm', '-f', args.unit_dir +
'/ceph-%s@.service' % args.fsid])
call_throws(['rm', '-f', args.unit_dir +
'/ceph-%s-crash@.service' % args.fsid])
'/ceph-%s-crash.service' % args.fsid])
call_throws(['rm', '-f', args.unit_dir +
'/ceph-%s.target' % args.fsid])
call_throws(['rm', '-rf',
@ -1459,6 +1464,10 @@ parser_shell.add_argument(
parser_shell.add_argument(
'--keyring', '-k',
help='ceph.keyring to pass through to the container')
parser_shell.add_argument(
'command', nargs='*',
default=['bash'],
help='command (optional)')
parser_enter = subparsers.add_parser(
'enter', help='run an interactive shell inside a running daemon container')
@ -1471,24 +1480,9 @@ parser_enter.add_argument(
'--name', '-n',
required=True,
help='daemon name (type.id)')
parser_exec = subparsers.add_parser(
'exec', help='run command inside a running daemon container')
parser_exec.set_defaults(func=command_exec)
parser_exec.add_argument(
'--fsid',
required=True,
help='cluster FSID')
parser_exec.add_argument(
'--name', '-n',
required=True,
help='daemon name (type.id)')
parser_exec.add_argument(
'--privileged',
action='store_true',
help='use a privileged container')
parser_exec.add_argument(
'command', nargs='+',
parser_enter.add_argument(
'command', nargs='*',
default=['bash'],
help='command')
parser_ceph_volume = subparsers.add_parser(

View File

@ -868,7 +868,7 @@ class InventoryDevice(object):
dev = InventoryDevice()
dev.id = data["path"]
dev.type = 'hdd' if data["sys_api"]["rotational"] == "1" else 'sdd/nvme'
dev.type = 'hdd' if data["sys_api"]["rotational"] == "1" else 'ssd/nvme'
dev.size = data["sys_api"]["size"]
dev.rotates = data["sys_api"]["rotational"] == "1"
dev.available = data["available"]

View File

@ -377,10 +377,7 @@ class SSHOrchestrator(MgrModule, orchestrator.Orchestrator):
conn,
['/usr/bin/python3', '-u'],
stdin=script.encode('utf-8'))
if code:
self.log.debug('code %s, err %s' % (code, err))
# ceph-daemon combines stdout and stderr, so ignore err.
self.log.debug('code %s out %s' % (code, out))
self.log.debug('exit code %s out %s err %s' % (code, out, err))
return out, code
except Exception as ex:
@ -421,9 +418,7 @@ class SSHOrchestrator(MgrModule, orchestrator.Orchestrator):
host, 'osd',
'ceph-volume',
['--', 'inventory', '--format=json'])
# stdout and stderr get combined; assume last line is the real
# output and everything preceding it is an error.
data = json.loads(out[-1])
data = json.loads(''.join(out))
host_info = orchestrator.OutdatableData(data)
self.inventory_cache[host] = host_info
else:

View File

@ -1,7 +1,7 @@
#!/bin/bash -ex
fsid=2a833e3f-53e4-49a7-a7a0-bd89d193ab62
image=ceph/daemon-base:latest-master-devel
image=ceph/daemon-base:latest-master
[ -z "$ip" ] && ip=127.0.0.1
#A="-d"