ceph/qa/tasks/vip.py
Sage Weil 3c1e086be0 qa/tasks/vip: exec with bash -ex
Signed-off-by: Sage Weil <sage@newdream.net>
2021-08-05 17:45:56 -04:00

206 lines
6.8 KiB
Python

import contextlib
import ipaddress
import logging
import re
from teuthology import misc as teuthology
from teuthology.config import config as teuth_config
log = logging.getLogger(__name__)
def subst_vip(ctx, cmd):
p = re.compile(r'({{VIP(\d+)}})')
for m in p.findall(cmd):
n = int(m[1])
if n >= len(ctx.vip["vips"]):
log.warning(f'no VIP{n} (we have {len(ctx.vip["vips"])})')
else:
cmd = cmd.replace(m[0], str(ctx.vip["vips"][n]))
if '{{VIPPREFIXLEN}}' in cmd:
cmd = cmd.replace('{{VIPPREFIXLEN}}', str(ctx.vip["vnet"].prefixlen))
if '{{VIPSUBNET}}' in cmd:
cmd = cmd.replace('{{VIPSUBNET}}', str(ctx.vip["vnet"].network_address))
return cmd
def echo(ctx, config):
"""
This is mostly for debugging
"""
for remote in ctx.cluster.remotes.keys():
log.info(subst_vip(ctx, config))
def exec(ctx, config):
"""
This is similar to the standard 'exec' task, but does the VIP substitutions.
"""
assert isinstance(config, dict), "task exec got invalid config"
testdir = teuthology.get_testdir(ctx)
if 'all-roles' in config and len(config) == 1:
a = config['all-roles']
roles = teuthology.all_roles(ctx.cluster)
config = dict((id_, a) for id_ in roles if not id_.startswith('host.'))
elif 'all-hosts' in config and len(config) == 1:
a = config['all-hosts']
roles = teuthology.all_roles(ctx.cluster)
config = dict((id_, a) for id_ in roles if id_.startswith('host.'))
for role, ls in config.items():
(remote,) = ctx.cluster.only(role).remotes.keys()
log.info('Running commands on role %s host %s', role, remote.name)
for c in ls:
c.replace('$TESTDIR', testdir)
remote.run(
args=[
'sudo',
'TESTDIR={tdir}'.format(tdir=testdir),
'bash',
'-ex',
'-c',
subst_vip(ctx, c)],
)
def map_vips(mip, count):
for mapping in teuth_config.get('vip', []):
mnet = ipaddress.ip_network(mapping['machine_subnet'])
vnet = ipaddress.ip_network(mapping['virtual_subnet'])
if vnet.prefixlen >= mnet.prefixlen:
log.error(f"virtual_subnet {vnet} prefix >= machine_subnet {mnet} prefix")
return None
if mip in mnet:
pos = list(mnet.hosts()).index(mip)
log.info(f"{mip} in {mnet}, pos {pos}")
r = []
for sub in vnet.subnets(new_prefix=mnet.prefixlen):
r += [list(sub.hosts())[pos]]
count -= 1
if count == 0:
break
return vnet, r
return None
@contextlib.contextmanager
def task(ctx, config):
"""
Set up a virtual network and allocate virtual IP(s) for each machine.
The strategy here is to set up a private virtual subnet that is larger than
the subnet the machine(s) exist in, and allocate virtual IPs from that pool.
- The teuthology.yaml must include a section like::
vip:
- machine_subnet: 172.21.0.0/20
virtual_subnet: 10.0.0.0/16
At least one item's machine_subnet should map the subnet the test machine's
primary IP lives in (the one DNS resolves to). The virtual_subnet must have a
shorter prefix (i.e., larger than the machine_subnet). If there are multiple
machine_subnets, they cannot map into the same virtual_subnet.
- Each machine gets an IP in the virtual_subset statically configured by the vip
task. This lets all test machines reach each other and (most importantly) any
virtual IPs.
- 1 or more virtual IPs are then mapped for the task. These IPs are chosen based
on one of the remotes. This uses a lot of network space but it avoids any
conflicts between tests.
To use a virtual IP, the {{VIP0}}, {{VIP1}}, etc. substitutions can be used.
{{VIPSUBNET}} is the virtual_subnet address (10.0.0.0 in the example).
{{VIPPREFIXLEN}} is the virtual_subnet prefix (16 in the example.
These substitutions work for vip.echo, and (at the time of writing) cephadm.apply
and cephadm.shell.
"""
if config is None:
config = {}
count = config.get('count', 1)
ctx.vip_static = {}
ctx.vip = {}
log.info("Allocating static IPs for each host...")
for remote in ctx.cluster.remotes.keys():
ip = remote.ssh.get_transport().getpeername()[0]
log.info(f'peername {ip}')
mip = ipaddress.ip_address(ip)
vnet, vips = map_vips(mip, count + 1)
static = vips.pop(0)
log.info(f"{remote.hostname} static {static}, vnet {vnet}")
if not ctx.vip:
# do this only once (use the first remote we see), since we only need 1
# set of virtual IPs, regardless of how many remotes we have.
log.info("VIPs are {map(str, vips)}")
ctx.vip = {
'vnet': vnet,
'vips': vips,
}
else:
# all remotes must be in the same virtual network...
assert vnet == ctx.vip['vnet']
# pick interface
p = re.compile(r'^(\S+) dev (\S+) (.*)scope link (.*)src (\S+)')
iface = None
for line in remote.sh(['sudo', 'ip','route','ls']).splitlines():
m = p.findall(line)
if not m:
continue
route_iface = m[0][1]
route_ip = m[0][4]
if route_ip == ip:
iface = route_iface
break
if not iface:
log.error(f"Unable to find {remote.hostname} interface for {ip}")
continue
# configure
log.info(f"Configuring {static} on {remote.hostname} iface {iface}...")
remote.sh(['sudo',
'ip', 'addr', 'add',
str(static) + '/' + str(vnet.prefixlen),
'dev', iface])
ctx.vip_static[remote] = {
"iface": iface,
"static": static,
}
try:
yield
finally:
for remote, m in ctx.vip_static.items():
log.info(f"Removing {m['static']} (and any VIPs) on {remote.hostname} iface {m['iface']}...")
remote.sh(['sudo',
'ip', 'addr', 'del',
str(m['static']) + '/' + str(ctx.vip['vnet'].prefixlen),
'dev', m['iface']])
for vip in ctx.vip['vips']:
remote.sh(
[
'sudo',
'ip', 'addr', 'del',
str(vip) + '/' + str(ctx.vip['vnet'].prefixlen),
'dev', m['iface']
],
check_status=False,
)