:qa/tasks/rbd: test qemu on top of rbd encryption

This commit adds new qemu xfstests workloads that run on top of librbd luks1/luks2 encryption.
This is currently done via nbd, instead of the qemu rbd driver.

Signed-off-by: Or Ozeri <oro@il.ibm.com>
This commit is contained in:
Or Ozeri 2021-01-14 17:03:42 +02:00
parent 0e7a7be5c0
commit 3754c665a1
29 changed files with 331 additions and 37 deletions

View File

View File

@ -0,0 +1 @@
../.qa/

1
qa/suites/rbd/encryption/cache/.qa vendored Symbolic link
View File

@ -0,0 +1 @@
../.qa/

View File

@ -0,0 +1,6 @@
tasks:
- install:
- ceph:
conf:
client:
rbd cache: false

View File

@ -0,0 +1,7 @@
tasks:
- install:
- ceph:
conf:
client:
rbd cache: true
rbd cache policy: writearound

View File

@ -0,0 +1,7 @@
tasks:
- install:
- ceph:
conf:
client:
rbd cache: true
rbd cache policy: writeback

View File

@ -0,0 +1,7 @@
tasks:
- install:
- ceph:
conf:
client:
rbd cache: true
rbd cache max dirty: 0

View File

View File

@ -0,0 +1 @@
../.qa/

View File

@ -0,0 +1 @@
.qa/clusters/fixed-3.yaml

View File

@ -0,0 +1,8 @@
openstack:
- machine:
disk: 40 # GB
ram: 30000 # MB
cpus: 1
volumes: # attached to each instance
count: 4
size: 30 # GB

View File

@ -0,0 +1 @@
../.qa/

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default features: 61

View File

@ -0,0 +1,11 @@
overrides:
ceph:
conf:
client:
rbd read from replica policy: balance
tasks:
- exec:
osd.0:
- ceph osd require-osd-release pacific
- ceph osd set-require-min-compat-client octopus

View File

@ -0,0 +1 @@
../.qa/

View File

@ -0,0 +1,9 @@
overrides:
ceph:
conf:
global:
ms inject socket failures: 5000
mon client directed command retry: 5
log-ignorelist:
- but it is still running
- \(OSD_SLOW_PING_TIME

View File

@ -0,0 +1 @@
.qa/objectstore

View File

@ -0,0 +1 @@
../.qa/

View File

@ -0,0 +1,21 @@
overrides:
ceph:
log-ignorelist:
- overall HEALTH_
- \(CACHE_POOL_NEAR_FULL\)
- \(CACHE_POOL_NO_HIT_SET\)
tasks:
- exec:
client.0:
- sudo ceph osd erasure-code-profile set teuthologyprofile crush-failure-domain=osd m=1 k=2
- sudo ceph osd pool delete rbd rbd --yes-i-really-really-mean-it
- sudo ceph osd pool create rbd 4 4 erasure teuthologyprofile
- sudo ceph osd pool create cache 4
- sudo ceph osd tier add rbd cache
- sudo ceph osd tier cache-mode cache writeback
- sudo ceph osd tier set-overlay rbd cache
- sudo ceph osd pool set cache hit_set_type bloom
- sudo ceph osd pool set cache hit_set_count 8
- sudo ceph osd pool set cache hit_set_period 60
- sudo ceph osd pool set cache target_max_objects 250
- rbd pool init rbd

View File

@ -0,0 +1,24 @@
tasks:
- exec:
client.0:
- sudo ceph osd erasure-code-profile set teuthologyprofile crush-failure-domain=osd m=1 k=2
- sudo ceph osd pool create datapool 4 4 erasure teuthologyprofile
- sudo ceph osd pool set datapool allow_ec_overwrites true
- rbd pool init datapool
overrides:
thrashosds:
bdev_inject_crash: 2
bdev_inject_crash_probability: .5
ceph:
fs: xfs
conf:
client:
rbd default data pool: datapool
osd: # force bluestore since it's required for ec overwrites
osd objectstore: bluestore
bluestore block size: 96636764160
enable experimental unrecoverable data corrupting features: "*"
osd debug randomize hobject sort order: false
# this doesn't work with failures bc the log writes are not atomic across the two backends
# bluestore bluefs env mirror: true

View File

View File

@ -0,0 +1,11 @@
tasks:
- exec:
client.0:
- sudo ceph osd pool create datapool 4
- rbd pool init datapool
overrides:
ceph:
conf:
client:
rbd default data pool: datapool

View File

@ -0,0 +1,17 @@
overrides:
ceph:
log-ignorelist:
- overall HEALTH_
- \(CACHE_POOL_NEAR_FULL\)
- \(CACHE_POOL_NO_HIT_SET\)
tasks:
- exec:
client.0:
- sudo ceph osd pool create cache 4
- sudo ceph osd tier add rbd cache
- sudo ceph osd tier cache-mode cache writeback
- sudo ceph osd tier set-overlay rbd cache
- sudo ceph osd pool set cache hit_set_type bloom
- sudo ceph osd pool set cache hit_set_count 8
- sudo ceph osd pool set cache hit_set_period 60
- sudo ceph osd pool set cache target_max_objects 250

View File

@ -0,0 +1 @@
.qa/distros/supported-random-distro$

View File

@ -0,0 +1 @@
../.qa/

View File

@ -0,0 +1,13 @@
overrides:
install:
ceph:
extra_packages: [rbd-nbd]
tasks:
- qemu:
all:
clone: true
encryption_format: luks1
type: block
disks: 3
test: qa/run_xfstests_qemu.sh
exclude_arch: armv7l

View File

@ -0,0 +1,13 @@
overrides:
install:
ceph:
extra_packages: [rbd-nbd]
tasks:
- qemu:
all:
clone: true
encryption_format: luks2
type: block
disks: 3
test: qa/run_xfstests_qemu.sh
exclude_arch: armv7l

View File

@ -29,6 +29,7 @@ def normalize_disks(config):
clone = client_config.get('clone', False)
image_url = client_config.get('image_url', DEFAULT_IMAGE_URL)
device_type = client_config.get('type', 'filesystem')
encryption_format = client_config.get('encryption_format', 'none')
disks = client_config.get('disks', DEFAULT_NUM_DISKS)
if not isinstance(disks, list):
@ -55,6 +56,10 @@ def normalize_disks(config):
disk['device_letter'] = chr(ord('a') + i)
if 'encryption_format' not in disk:
disk['encryption_format'] = encryption_format
assert disk['encryption_format'] in ['none', 'luks1', 'luks2'], 'invalid encryption format'
assert disks, 'at least one rbd device must be used'
if clone:
@ -72,13 +77,16 @@ def create_images(ctx, config, managers):
for client, client_config in config.items():
disks = client_config['disks']
for disk in disks:
if disk.get('action') != 'create' or 'image_url' in disk:
if disk.get('action') != 'create' or (
'image_url' in disk and
disk['encryption_format'] == 'none'):
continue
create_config = {
client: {
'image_name': disk['image_name'],
'image_format': 2,
'image_size': disk['image_size'],
'encryption_format': disk['encryption_format'],
}
}
managers.append(
@ -104,6 +112,20 @@ def create_clones(ctx, config, managers):
rbd.clone_image(ctx=ctx, config=create_config)
)
def create_encrypted_devices(ctx, config, managers):
for client, client_config in config.items():
disks = client_config['disks']
for disk in disks:
if disk['encryption_format'] == 'none' or \
'device_letter' not in disk:
continue
dev_config = {client: disk}
managers.append(
lambda dev_config=dev_config:
rbd.dev_create(ctx=ctx, config=dev_config)
)
@contextlib.contextmanager
def create_dirs(ctx, config):
"""
@ -279,15 +301,38 @@ def download_image(ctx, config):
'wget', '-nv', '-O', base_file, disk['image_url'],
]
)
remote.run(
args=[
'qemu-img', 'convert', '-f', 'qcow2', '-O', 'raw',
base_file, 'rbd:rbd/{image_name}'.format(image_name=disk['image_name'])
]
)
if disk['encryption_format'] == 'none':
remote.run(
args=[
'qemu-img', 'convert', '-f', 'qcow2', '-O', 'raw',
base_file, 'rbd:rbd/{image_name}'.format(image_name=disk['image_name'])
]
)
else:
dev_config = {client: {'image_name': disk['image_name'],
'encryption_format': disk['encryption_format']}}
raw_file = '{tdir}/qemu/base.{name}.raw'.format(
tdir=testdir, name=disk['image_name'])
client_base_files[client].append(raw_file)
remote.run(
args=[
'qemu-img', 'convert', '-f', 'qcow2', '-O', 'raw',
base_file, raw_file
]
)
with rbd.dev_create(ctx, dev_config):
remote.run(
args=[
'dd', 'if={name}'.format(name=raw_file),
'of={name}'.format(name=dev_config[client]['device_path']),
'bs=4M', 'conv=fdatasync'
]
)
for disk in disks:
if disk['action'] == 'clone' or \
disk['encryption_format'] != 'none' or \
(disk['action'] == 'create' and 'image_url' not in disk):
continue
@ -446,11 +491,18 @@ def run_qemu(ctx, config):
if 'device_letter' not in disk:
continue
if disk['encryption_format'] == 'none':
disk_spec = 'rbd:rbd/{img}:id={id}'.format(
img=disk['image_name'],
id=client[len('client.'):]
)
else:
disk_spec = disk['device_path']
args.extend([
'-drive',
'file=rbd:rbd/{img}:id={id},format=raw,if=virtio,cache={cachemode}'.format(
img=disk['image_name'],
id=client[len('client.'):],
'file={disk_spec},format=raw,if=virtio,cache={cachemode}'.format(
disk_spec=disk_spec,
cachemode=cachemode,
),
])
@ -552,6 +604,7 @@ def task(ctx, config):
type: filesystem / block (optional, defaults to fileystem)
image_url: <URL> (optional),
image_size: <MiB> (optional)
encryption_format: luks1 / luks2 / none (optional, defaults to none)
}, ...
]
@ -597,6 +650,7 @@ def task(ctx, config):
lambda: download_image(ctx=ctx, config=config),
])
create_clones(ctx=ctx, config=config, managers=managers)
create_encrypted_devices(ctx=ctx, config=config, managers=managers)
managers.append(
lambda: run_qemu(ctx=ctx, config=config),
)

View File

@ -22,6 +22,8 @@ os.environ["RBD_FORCE_ALLOW_V1"] = "1"
log = logging.getLogger(__name__)
ENCRYPTION_PASSPHRASE = "password"
@contextlib.contextmanager
def create_image(ctx, config):
"""
@ -36,6 +38,7 @@ def create_image(ctx, config):
image_name: testimage
image_size: 100
image_format: 1
encryption_format: luks2
client.1:
Image size is expressed as a number of megabytes; default value
@ -59,6 +62,7 @@ def create_image(ctx, config):
name = properties.get('image_name', default_image_name(role))
size = properties.get('image_size', 10240)
fmt = properties.get('image_format', 1)
encryption_format = properties.get('encryption_format', 'none')
(remote,) = ctx.cluster.only(role).remotes.keys()
log.info('Creating image {name} with size {size}'.format(name=name,
size=size))
@ -77,6 +81,32 @@ def create_image(ctx, config):
if int(fmt) != 1:
args += ['--image-format', str(fmt)]
remote.run(args=args)
if encryption_format != 'none':
passphrase_file = '{tdir}/passphrase'.format(tdir=testdir)
remote.run(
args=[
'echo',
ENCRYPTION_PASSPHRASE,
run.Raw('>'),
passphrase_file
]
)
remote.run(
args=[
'adjust-ulimits',
'ceph-coverage',
'{tdir}/archive/coverage'.format(tdir=testdir),
'rbd',
'encryption',
'format',
name,
encryption_format,
passphrase_file,
'-p',
'rbd'
]
)
try:
yield
finally:
@ -226,7 +256,9 @@ def dev_create(ctx, config):
- rbd.create_image: [client.0]
- rbd.modprobe: [client.0]
- rbd.dev_create:
client.0: testimage.client.0
client.0:
image_name: testimage.client.0
encryption_format: luks2
"""
assert isinstance(config, dict) or isinstance(config, list), \
"task dev_create only supports a list or dictionary for configuration"
@ -239,12 +271,40 @@ def dev_create(ctx, config):
log.info('Creating rbd block devices...')
testdir = teuthology.get_testdir(ctx)
passphrase_file = '{tdir}/passphrase'.format(tdir=testdir)
device_path = {}
for role, image in role_images:
if image is None:
image = default_image_name(role)
for role, properties in role_images:
if properties is None:
properties = {}
name = properties.get('image_name', default_image_name(role))
encryption_format = properties.get('encryption_format', 'none')
(remote,) = ctx.cluster.only(role).remotes.keys()
if encryption_format == 'none':
device_path[role] = '/dev/rbd/rbd/{image}'.format(image=name)
device_specific_args = [
run.Raw('&&'),
# wait for the symlink to be created by udev
'while', 'test', '!', '-e', device_path, run.Raw(';'), 'do',
'sleep', '1', run.Raw(';'),
'done',
]
else:
remote.run(
args=[
'echo',
ENCRYPTION_PASSPHRASE,
run.Raw('>'),
passphrase_file
]
)
device_specific_args = [
'-t', 'nbd', '-o',
'encryption-format=%s,encryption-passphrase-file=%s' % (
encryption_format, passphrase_file)]
map_fp = StringIO()
remote.run(
args=[
'sudo',
@ -252,25 +312,43 @@ def dev_create(ctx, config):
'ceph-coverage',
'{tdir}/archive/coverage'.format(tdir=testdir),
'rbd',
'--user', role.rsplit('.')[-1],
'--id', role.rsplit('.')[-1],
'-p', 'rbd',
'map',
image,
run.Raw('&&'),
# wait for the symlink to be created by udev
'while', 'test', '!', '-e', '/dev/rbd/rbd/{image}'.format(image=image), run.Raw(';'), 'do',
'sleep', '1', run.Raw(';'),
'done',
],
name] + device_specific_args,
stdout=map_fp,
)
if encryption_format != 'none':
device_path[role] = map_fp.getvalue().rstrip()
properties['device_path'] = device_path[role]
remote.run(args=['sudo', 'chmod', '666', device_path[role]])
try:
yield
finally:
log.info('Unmapping rbd devices...')
for role, image in role_images:
if image is None:
image = default_image_name(role)
for role, properties in role_images:
if not device_path.get(role):
continue
if properties is None:
properties = {}
encryption_format = properties.get('encryption_format', 'none')
(remote,) = ctx.cluster.only(role).remotes.keys()
if encryption_format == 'none':
device_specific_args = [
run.Raw('&&'),
# wait for the symlink to be deleted by udev
'while', 'test', '-e', device_path[role],
run.Raw(';'),
'do',
'sleep', '1', run.Raw(';'),
'done',
]
else:
device_specific_args = ['-t', 'nbd']
remote.run(
args=[
'LD_LIBRARY_PATH={tdir}/binary/usr/local/lib'.format(tdir=testdir),
@ -281,15 +359,8 @@ def dev_create(ctx, config):
'rbd',
'-p', 'rbd',
'unmap',
'/dev/rbd/rbd/{imgname}'.format(imgname=image),
run.Raw('&&'),
# wait for the symlink to be deleted by udev
'while', 'test', '-e', '/dev/rbd/rbd/{image}'.format(image=image),
run.Raw(';'),
'do',
'sleep', '1', run.Raw(';'),
'done',
],
device_path[role],
] + device_specific_args,
)
@ -547,8 +618,8 @@ def xfstests(ctx, config):
log.info(' scratch ({size} MB): {image}'.format(size=scratch_size,
image=scratch_image))
modprobe_config[role] = None
image_map_config[role] = test_image
scratch_map_config[role] = scratch_image
image_map_config[role] = {'image_name': test_image}
scratch_map_config[role] = {'image_name': scratch_image}
with contextutil.nested(
lambda: create_image(ctx=ctx, config=images_config),
@ -610,7 +681,7 @@ def task(ctx, config):
for role, properties in norm_config.items():
if properties is None:
properties = {}
role_images[role] = properties.get('image_name')
role_images[role] = {'image_name': properties.get('image_name')}
else:
role_images = norm_config