mirror of
https://github.com/ceph/ceph
synced 2025-03-11 02:39:05 +00:00
verify all osds start before checking health
Just checking health isn't good enough, since it races with OSD startup: we can have a healthy cluster with 0 (or something else < total) OSDs.
This commit is contained in:
parent
f4883ebf09
commit
50463ffddd
@ -9,6 +9,7 @@ import time
|
||||
import urllib2
|
||||
import urlparse
|
||||
import yaml
|
||||
import json
|
||||
|
||||
from .orchestra import run
|
||||
|
||||
@ -286,6 +287,31 @@ def wait_until_healthy(remote):
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
def wait_until_osds_up(cluster, remote):
|
||||
"""Wait until all Ceph OSDs are booted."""
|
||||
num_osds = num_instances_of_type(cluster, 'osd')
|
||||
while True:
|
||||
r = remote.run(
|
||||
args=[
|
||||
'/tmp/cephtest/enable-coredump',
|
||||
'/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
|
||||
'/tmp/cephtest/archive/coverage',
|
||||
'/tmp/cephtest/binary/usr/local/bin/ceph',
|
||||
'-c', '/tmp/cephtest/ceph.conf',
|
||||
'--concise',
|
||||
'osd', 'dump', '--format=json'
|
||||
],
|
||||
stdout=StringIO(),
|
||||
logger=log.getChild('health'),
|
||||
)
|
||||
out = r.stdout.getvalue()
|
||||
j = json.loads('\n'.join(out.split('\n')[1:]))
|
||||
up = len(j['osds'])
|
||||
log.debug('%d of %d OSDs are up' % (up, num_osds))
|
||||
if up == num_osds:
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
def wait_until_fuse_mounted(remote, fuse, mountpoint):
|
||||
while True:
|
||||
proc = remote.run(
|
||||
|
@ -904,6 +904,10 @@ def healthy(ctx, config):
|
||||
log.info('Waiting until ceph is healthy...')
|
||||
firstmon = teuthology.get_first_mon(ctx, config)
|
||||
(mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
|
||||
teuthology.wait_until_osds_up(
|
||||
cluster=ctx.cluster,
|
||||
remote=mon0_remote
|
||||
)
|
||||
teuthology.wait_until_healthy(
|
||||
remote=mon0_remote,
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user