mirror of
https://github.com/ceph/ceph
synced 2025-04-01 00:26:47 +00:00
qa: add test for snap format upgrade
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
This commit is contained in:
parent
aedd5301dc
commit
91942df5a6
0
qa/suites/fs/upgrade/snaps/%
Normal file
0
qa/suites/fs/upgrade/snaps/%
Normal file
1
qa/suites/fs/upgrade/snaps/clusters/3-mds.yaml
Symbolic link
1
qa/suites/fs/upgrade/snaps/clusters/3-mds.yaml
Symbolic link
@ -0,0 +1 @@
|
||||
../../../../../cephfs/clusters/3-mds.yaml
|
1
qa/suites/fs/upgrade/snaps/objectstore-ec
Symbolic link
1
qa/suites/fs/upgrade/snaps/objectstore-ec
Symbolic link
@ -0,0 +1 @@
|
||||
../../../../cephfs/objectstore-ec/
|
0
qa/suites/fs/upgrade/snaps/overrides/+
Normal file
0
qa/suites/fs/upgrade/snaps/overrides/+
Normal file
1
qa/suites/fs/upgrade/snaps/overrides/debug.yaml
Symbolic link
1
qa/suites/fs/upgrade/snaps/overrides/debug.yaml
Symbolic link
@ -0,0 +1 @@
|
||||
../../../../../cephfs/overrides/debug.yaml
|
1
qa/suites/fs/upgrade/snaps/overrides/frag_enable.yaml
Symbolic link
1
qa/suites/fs/upgrade/snaps/overrides/frag_enable.yaml
Symbolic link
@ -0,0 +1 @@
|
||||
../../../../../cephfs/overrides/frag_enable.yaml
|
3
qa/suites/fs/upgrade/snaps/overrides/no_multimds.yaml
Normal file
3
qa/suites/fs/upgrade/snaps/overrides/no_multimds.yaml
Normal file
@ -0,0 +1,3 @@
|
||||
overrides:
|
||||
ceph:
|
||||
max_mds: 1
|
1
qa/suites/fs/upgrade/snaps/overrides/whitelist_health.yaml
Symbolic link
1
qa/suites/fs/upgrade/snaps/overrides/whitelist_health.yaml
Symbolic link
@ -0,0 +1 @@
|
||||
../../../../../cephfs/overrides/whitelist_health.yaml
|
@ -0,0 +1 @@
|
||||
../../../../../cephfs/overrides/whitelist_wrongly_marked_down.yaml
|
0
qa/suites/fs/upgrade/snaps/tasks/%
Normal file
0
qa/suites/fs/upgrade/snaps/tasks/%
Normal file
30
qa/suites/fs/upgrade/snaps/tasks/0-luminous.yaml
Normal file
30
qa/suites/fs/upgrade/snaps/tasks/0-luminous.yaml
Normal file
@ -0,0 +1,30 @@
|
||||
meta:
|
||||
- desc: |
|
||||
install ceph/luminous latest
|
||||
tasks:
|
||||
- install:
|
||||
branch: luminous
|
||||
- print: "**** done installing luminous"
|
||||
- ceph:
|
||||
log-whitelist:
|
||||
- overall HEALTH_
|
||||
- \(FS_
|
||||
- \(MDS_
|
||||
- \(OSD_
|
||||
- \(MON_DOWN\)
|
||||
- \(CACHE_POOL_
|
||||
- \(POOL_
|
||||
- \(MGR_DOWN\)
|
||||
- \(PG_
|
||||
- \(SMALLER_PGP_NUM\)
|
||||
- Monitor daemon marked osd
|
||||
- Behind on trimming
|
||||
- Manager daemon
|
||||
conf:
|
||||
global:
|
||||
mon warn on pool no app: false
|
||||
- exec:
|
||||
osd.0:
|
||||
- ceph osd require-osd-release luminous
|
||||
- ceph osd set-require-min-compat-client luminous
|
||||
- print: "**** done ceph"
|
13
qa/suites/fs/upgrade/snaps/tasks/1-client.yaml
Normal file
13
qa/suites/fs/upgrade/snaps/tasks/1-client.yaml
Normal file
@ -0,0 +1,13 @@
|
||||
tasks:
|
||||
- ceph-fuse:
|
||||
- print: "**** done luminous client"
|
||||
- exec:
|
||||
mon.a:
|
||||
- ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it
|
||||
- workunit:
|
||||
timeout: 5m
|
||||
cleanup: false
|
||||
clients:
|
||||
client.0:
|
||||
- fs/snaps/snap-hierarchy.sh
|
||||
- print: "**** done snap hierarchy"
|
12
qa/suites/fs/upgrade/snaps/tasks/2-upgrade.yaml
Normal file
12
qa/suites/fs/upgrade/snaps/tasks/2-upgrade.yaml
Normal file
@ -0,0 +1,12 @@
|
||||
tasks:
|
||||
- mds_pre_upgrade:
|
||||
- print: "**** done mds pre-upgrade sequence"
|
||||
- install.upgrade:
|
||||
mon.a:
|
||||
mon.b:
|
||||
- print: "**** done install.upgrade both hosts"
|
||||
- ceph.stop: [mds.*]
|
||||
- ceph.restart:
|
||||
daemons: [mon.*, mgr.*, osd.*, mds.*]
|
||||
mon-health-to-clog: false
|
||||
- print: "**** done ceph.restart"
|
10
qa/suites/fs/upgrade/snaps/tasks/3-sanity.yaml
Normal file
10
qa/suites/fs/upgrade/snaps/tasks/3-sanity.yaml
Normal file
@ -0,0 +1,10 @@
|
||||
tasks:
|
||||
- exec:
|
||||
mon.a:
|
||||
- ceph status
|
||||
- ceph fs dump --format=json-pretty
|
||||
- ceph fs set cephfs max_mds 2 && exit 1 || true
|
||||
- print: "**** confirmed cannot set max_mds=2"
|
||||
- exec:
|
||||
mon.a:
|
||||
- ceph fs set cephfs allow_new_snaps true
|
10
qa/suites/fs/upgrade/snaps/tasks/4-client-upgrade/yes.yaml
Normal file
10
qa/suites/fs/upgrade/snaps/tasks/4-client-upgrade/yes.yaml
Normal file
@ -0,0 +1,10 @@
|
||||
tasks:
|
||||
- install.upgrade:
|
||||
client.0:
|
||||
- print: "**** done install.upgrade on client.0"
|
||||
- ceph-fuse:
|
||||
client.0:
|
||||
mounted: false
|
||||
- ceph-fuse:
|
||||
client.0:
|
||||
- print: "**** done remount client"
|
10
qa/suites/fs/upgrade/snaps/tasks/5-client-sanity.yaml
Normal file
10
qa/suites/fs/upgrade/snaps/tasks/5-client-sanity.yaml
Normal file
@ -0,0 +1,10 @@
|
||||
tasks:
|
||||
- workunit:
|
||||
timeout: 5m
|
||||
cleanup: false
|
||||
env:
|
||||
VERIFY: verify
|
||||
clients:
|
||||
client.0:
|
||||
- fs/snaps/snap-hierarchy.sh
|
||||
- print: "**** done verify snap hierarchy"
|
16
qa/suites/fs/upgrade/snaps/tasks/6-snap-upgrade.yaml
Normal file
16
qa/suites/fs/upgrade/snaps/tasks/6-snap-upgrade.yaml
Normal file
@ -0,0 +1,16 @@
|
||||
overrides:
|
||||
ceph:
|
||||
log-whitelist:
|
||||
- bad backtrace on inode
|
||||
tasks:
|
||||
- cephfs_upgrade_snap:
|
||||
- print: "**** upgraded snapshot metadata"
|
||||
- exec:
|
||||
mon.a:
|
||||
- ceph fs set cephfs max_mds 2
|
||||
- print: "**** increased max_mds=2"
|
||||
- sleep:
|
||||
duration: 10
|
||||
- exec:
|
||||
mon.a:
|
||||
- ceph fs dump | grep '^max_mds.*2'
|
1
qa/suites/fs/upgrade/snaps/tasks/7-client-sanity.yaml
Symbolic link
1
qa/suites/fs/upgrade/snaps/tasks/7-client-sanity.yaml
Symbolic link
@ -0,0 +1 @@
|
||||
5-client-sanity.yaml
|
@ -376,7 +376,7 @@ def cephfs_setup(ctx, config):
|
||||
all_roles = [item for remote_roles in mdss.remotes.values() for item in remote_roles]
|
||||
num_active = len([r for r in all_roles if is_active_mds(r)])
|
||||
|
||||
fs.set_max_mds(num_active)
|
||||
fs.set_max_mds(config.get('max_mds', num_active))
|
||||
|
||||
yield
|
||||
|
||||
|
@ -728,6 +728,16 @@ class Filesystem(MDSCluster):
|
||||
|
||||
return result
|
||||
|
||||
def get_rank(self, rank=0, status=None):
|
||||
if status is None:
|
||||
status = self.getinfo()
|
||||
return status.get_rank(self.id, rank)
|
||||
|
||||
def get_ranks(self, status=None):
|
||||
if status is None:
|
||||
status = self.getinfo()
|
||||
return status.get_ranks(self.id)
|
||||
|
||||
def get_rank_names(self, status=None):
|
||||
"""
|
||||
Return MDS daemon names of those daemons holding a rank,
|
||||
@ -854,6 +864,10 @@ class Filesystem(MDSCluster):
|
||||
|
||||
return self.json_asok(command, 'mds', mds_id)
|
||||
|
||||
def rank_asok(self, command, rank=0):
|
||||
info = self.get_rank(rank=rank)
|
||||
return self.json_asok(command, 'mds', info['name'])
|
||||
|
||||
def read_cache(self, path, depth=None):
|
||||
cmd = ["dump", "tree", path]
|
||||
if depth is not None:
|
||||
|
45
qa/tasks/cephfs_upgrade_snap.py
Normal file
45
qa/tasks/cephfs_upgrade_snap.py
Normal file
@ -0,0 +1,45 @@
|
||||
"""
|
||||
Upgrade cluster snap format.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
from tasks.cephfs.filesystem import Filesystem
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
def task(ctx, config):
|
||||
"""
|
||||
Upgrade CephFS file system snap format.
|
||||
"""
|
||||
|
||||
if config is None:
|
||||
config = {}
|
||||
assert isinstance(config, dict), \
|
||||
'snap-upgrade task only accepts a dict for configuration'
|
||||
|
||||
fs = Filesystem(ctx)
|
||||
|
||||
mds_map = fs.get_mds_map()
|
||||
assert(mds_map['max_mds'] == 1)
|
||||
|
||||
json = fs.rank_asok(["scrub_path", "/", "force", "recursive", "repair"])
|
||||
if not json or json['return_code'] == 0:
|
||||
log.info("scrub / completed")
|
||||
else:
|
||||
log.info("scrub / failed: {}".format(json))
|
||||
|
||||
json = fs.rank_asok(["scrub_path", "~mdsdir", "force", "recursive", "repair"])
|
||||
if not json or json['return_code'] == 0:
|
||||
log.info("scrub ~mdsdir completed")
|
||||
else:
|
||||
log.info("scrub / failed: {}".format(json))
|
||||
|
||||
for i in range(0, 10):
|
||||
mds_map = fs.get_mds_map()
|
||||
if (mds_map['flags'] & (1<<1)) != 0 and (mds_map['flags'] & (1<<4)) != 0:
|
||||
break
|
||||
time.sleep(10)
|
||||
assert((mds_map['flags'] & (1<<1)) != 0) # Test CEPH_MDSMAP_ALLOW_SNAPS
|
||||
assert((mds_map['flags'] & (1<<4)) != 0) # Test CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS
|
56
qa/tasks/mds_pre_upgrade.py
Normal file
56
qa/tasks/mds_pre_upgrade.py
Normal file
@ -0,0 +1,56 @@
|
||||
"""
|
||||
Prepare MDS cluster for upgrade.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
from tasks.cephfs.filesystem import Filesystem
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
def task(ctx, config):
|
||||
"""
|
||||
Prepare MDS cluster for upgrade.
|
||||
|
||||
This task reduces ranks to 1 and stops all standbys.
|
||||
"""
|
||||
|
||||
if config is None:
|
||||
config = {}
|
||||
assert isinstance(config, dict), \
|
||||
'snap-upgrade task only accepts a dict for configuration'
|
||||
|
||||
fs = Filesystem(ctx)
|
||||
status = fs.getinfo()
|
||||
|
||||
fs.set_max_mds(1)
|
||||
status = fs.getinfo()
|
||||
targets = filter(lambda r: r['rank'] >= 1, fs.get_ranks(status=status))
|
||||
if len(targets) > 0:
|
||||
# deactivate mds in decending order
|
||||
targets = sorted(targets, key=lambda r: r['rank'], reverse=True)
|
||||
for target in targets:
|
||||
self.log("deactivating rank %d" % target['rank'])
|
||||
self.fs.deactivate(target['rank'])
|
||||
status = self.wait_for_stable()[0]
|
||||
else:
|
||||
status = self.wait_for_stable()[0]
|
||||
|
||||
assert(fs.get_mds_map(status=status)['max_mds'] == 1)
|
||||
assert(fs.get_mds_map(status=status)['in'] == [0])
|
||||
|
||||
# Stop standbys now to minimize time rank 0 is down in subsequent:
|
||||
# tasks:
|
||||
# - ceph.stop: [mds.*]
|
||||
rank0 = fs.get_rank(rank=0, status=status)
|
||||
for daemon in ctx.daemons.iter_daemons_of_role('mds', fs.mon_manager.cluster):
|
||||
if rank0['name'] != daemon.id_:
|
||||
daemon.stop()
|
||||
|
||||
for i in range(1, 10):
|
||||
time.sleep(5) # time for FSMap to update
|
||||
status = fs.getinfo()
|
||||
if len(list(status.get_standbys())) == 0:
|
||||
break
|
||||
assert(len(list(status.get_standbys())) == 0)
|
@ -160,6 +160,7 @@ def task(ctx, config):
|
||||
refspec = Head()
|
||||
|
||||
timeout = config.get('timeout', '3h')
|
||||
cleanup = config.get('cleanup', True)
|
||||
|
||||
log.info('Pulling workunits from ref %s', refspec)
|
||||
|
||||
@ -181,24 +182,28 @@ def task(ctx, config):
|
||||
created_mountpoint[role] = created_mnt_dir
|
||||
|
||||
# Execute any non-all workunits
|
||||
log.info("timeout={}".format(timeout))
|
||||
log.info("cleanup={}".format(cleanup))
|
||||
with parallel() as p:
|
||||
for role, tests in clients.iteritems():
|
||||
if role != "all":
|
||||
p.spawn(_run_tests, ctx, refspec, role, tests,
|
||||
config.get('env'),
|
||||
basedir=config.get('basedir','qa/workunits'),
|
||||
timeout=timeout)
|
||||
timeout=timeout,cleanup=cleanup)
|
||||
|
||||
# Clean up dirs from any non-all workunits
|
||||
for role, created in created_mountpoint.items():
|
||||
_delete_dir(ctx, role, created)
|
||||
if cleanup:
|
||||
# Clean up dirs from any non-all workunits
|
||||
for role, created in created_mountpoint.items():
|
||||
_delete_dir(ctx, role, created)
|
||||
|
||||
# Execute any 'all' workunits
|
||||
if 'all' in clients:
|
||||
all_tasks = clients["all"]
|
||||
_spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'),
|
||||
config.get('basedir', 'qa/workunits'),
|
||||
config.get('subdir'), timeout=timeout)
|
||||
config.get('subdir'), timeout=timeout,
|
||||
cleanup=cleanup)
|
||||
|
||||
|
||||
def _client_mountpoint(ctx, cluster, id_):
|
||||
@ -326,7 +331,7 @@ def _make_scratch_dir(ctx, role, subdir):
|
||||
return created_mountpoint
|
||||
|
||||
|
||||
def _spawn_on_all_clients(ctx, refspec, tests, env, basedir, subdir, timeout=None):
|
||||
def _spawn_on_all_clients(ctx, refspec, tests, env, basedir, subdir, timeout=None, cleanup=True):
|
||||
"""
|
||||
Make a scratch directory for each client in the cluster, and then for each
|
||||
test spawn _run_tests() for each role.
|
||||
@ -351,12 +356,13 @@ def _spawn_on_all_clients(ctx, refspec, tests, env, basedir, subdir, timeout=Non
|
||||
timeout=timeout)
|
||||
|
||||
# cleanup the generated client directories
|
||||
for role, _ in client_remotes.items():
|
||||
_delete_dir(ctx, role, created_mountpoint[role])
|
||||
if cleanup:
|
||||
for role, _ in client_remotes.items():
|
||||
_delete_dir(ctx, role, created_mountpoint[role])
|
||||
|
||||
|
||||
def _run_tests(ctx, refspec, role, tests, env, basedir,
|
||||
subdir=None, timeout=None):
|
||||
subdir=None, timeout=None, cleanup=True):
|
||||
"""
|
||||
Run the individual test. Create a scratch directory and then extract the
|
||||
workunits from git. Make the executables, and then run the tests.
|
||||
@ -472,10 +478,11 @@ def _run_tests(ctx, refspec, role, tests, env, basedir,
|
||||
args=args,
|
||||
label="workunit test {workunit}".format(workunit=workunit)
|
||||
)
|
||||
remote.run(
|
||||
logger=log.getChild(role),
|
||||
args=['sudo', 'rm', '-rf', '--', scratch_tmp],
|
||||
)
|
||||
if cleanup:
|
||||
remote.run(
|
||||
logger=log.getChild(role),
|
||||
args=['sudo', 'rm', '-rf', '--', scratch_tmp],
|
||||
)
|
||||
finally:
|
||||
log.info('Stopping %s on %s...', tests, role)
|
||||
remote.run(
|
||||
|
24
qa/workunits/fs/snaps/snap-hierarchy.sh
Executable file
24
qa/workunits/fs/snaps/snap-hierarchy.sh
Executable file
@ -0,0 +1,24 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -ex
|
||||
|
||||
if [ -d "$1" ]; then
|
||||
mkdir -p -- "$1" && cd "$1"
|
||||
fi
|
||||
|
||||
[ "$VERIFY" != verify ] && mkdir 1
|
||||
[ "$VERIFY" != verify ] && mkdir 1/.snap/first
|
||||
stat 1/.snap/first
|
||||
[ "$VERIFY" != verify ] && mkdir 1/2
|
||||
stat 1/.snap/first/2 && exit 1
|
||||
[ "$VERIFY" != verify ] && mkdir 1/2/.snap/second
|
||||
stat 1/2/.snap/second
|
||||
[ "$VERIFY" != verify ] && touch 1/foo
|
||||
stat 1/.snap/first/foo && exit 1
|
||||
[ "$VERIFY" != verify ] && mkdir 1/.snap/third
|
||||
stat 1/.snap/third/foo || exit 1
|
||||
[ "$VERIFY" != verify ] && mkdir 1/2/3
|
||||
[ "$VERIFY" != verify ] && mkdir 1/2/.snap/fourth
|
||||
stat 1/2/.snap/fourth/3
|
||||
|
||||
exit 0
|
Loading…
Reference in New Issue
Block a user