ceph/qa/tasks/vip.py

import contextlib
import ipaddress
import logging
import re

from teuthology import misc as teuthology
from teuthology.config import config as teuth_config

log = logging.getLogger(__name__)


def subst_vip(ctx, cmd):
    p = re.compile(r'({{VIP(\d+)}})')
    for m in p.findall(cmd):
        n = int(m[1])
        if n >= len(ctx.vip["vips"]):
            log.warning(f'no VIP{n} (we have {len(ctx.vip["vips"])})')
        else:
            cmd = cmd.replace(m[0], str(ctx.vip["vips"][n]))

    if '{{VIPPREFIXLEN}}' in cmd:
        cmd = cmd.replace('{{VIPPREFIXLEN}}', str(ctx.vip["vnet"].prefixlen))

    if '{{VIPSUBNET}}' in cmd:
        cmd = cmd.replace('{{VIPSUBNET}}', str(ctx.vip["vnet"].network_address))

    return cmd


def echo(ctx, config):
    """
    This is mostly for debugging
    """
    for remote in ctx.cluster.remotes.keys():
        log.info(subst_vip(ctx, config))


def exec(ctx, config):
    """
    This is similar to the standard 'exec' task, but does the VIP substitutions.
    """
    assert isinstance(config, dict), "task exec got invalid config"

    testdir = teuthology.get_testdir(ctx)

    if 'all-roles' in config and len(config) == 1:
        a = config['all-roles']
        roles = teuthology.all_roles(ctx.cluster)
        config = dict((id_, a) for id_ in roles if not id_.startswith('host.'))
    elif 'all-hosts' in config and len(config) == 1:
        a = config['all-hosts']
        roles = teuthology.all_roles(ctx.cluster)
        config = dict((id_, a) for id_ in roles if id_.startswith('host.'))

    for role, ls in config.items():
        (remote,) = ctx.cluster.only(role).remotes.keys()
        log.info('Running commands on role %s host %s', role, remote.name)
        for c in ls:
            c.replace('$TESTDIR', testdir)
            remote.run(
                args=[
                    'sudo',
                    'TESTDIR={tdir}'.format(tdir=testdir),
                    'bash',
                    '-ex',
                    '-c',
                    subst_vip(ctx, c)],
                )


def map_vips(mip, count):
    for mapping in teuth_config.get('vip', []):
        mnet = ipaddress.ip_network(mapping['machine_subnet'])
        vnet = ipaddress.ip_network(mapping['virtual_subnet'])
        if vnet.prefixlen >= mnet.prefixlen:
            log.error(f"virtual_subnet {vnet} prefix >= machine_subnet {mnet} prefix")
            return None
        if mip in mnet:
            pos = list(mnet.hosts()).index(mip)
            log.info(f"{mip} in {mnet}, pos {pos}")
            r = []
            for sub in vnet.subnets(new_prefix=mnet.prefixlen):
                r += [list(sub.hosts())[pos]]
                count -= 1
                if count == 0:
                    break
            return vnet, r
    return None


@contextlib.contextmanager
def task(ctx, config):
    """
    Set up a virtual network and allocate virtual IP(s) for each machine.

    The strategy here is to set up a private virtual subnet that is larger than
    the subnet the machine(s) exist in, and allocate virtual IPs from that pool.

    - The teuthology.yaml must include a section like::

        vip:
          - machine_subnet: 172.21.0.0/20
            virtual_subnet: 10.0.0.0/16

      At least one item's machine_subnet should map the subnet the test machine's
      primary IP lives in (the one DNS resolves to).  The virtual_subnet must have a
      shorter prefix (i.e., larger than the machine_subnet).  If there are multiple
      machine_subnets, they cannot map into the same virtual_subnet.

    - Each machine gets an IP in the virtual_subset statically configured by the vip
      task. This lets all test machines reach each other and (most importantly) any
      virtual IPs.

    - 1 or more virtual IPs are then mapped for the task.  These IPs are chosen based
      on one of the remotes.  This uses a lot of network space but it avoids any
      conflicts between tests.

    To use a virtual IP, the {{VIP0}}, {{VIP1}}, etc. substitutions can be used.
    
    {{VIPSUBNET}} is the virtual_subnet address (10.0.0.0 in the example).

    {{VIPPREFIXLEN}} is the virtual_subnet prefix (16 in the example.

    These substitutions work for vip.echo, and (at the time of writing) cephadm.apply
    and cephadm.shell.
    """
    if config is None:
        config = {}
    count = config.get('count', 1)

    ctx.vip_static = {}
    ctx.vip = {}

    log.info("Allocating static IPs for each host...")
    for remote in ctx.cluster.remotes.keys():
        ip = remote.ssh.get_transport().getpeername()[0]
        log.info(f'peername {ip}')
        mip = ipaddress.ip_address(ip)
        vnet, vips = map_vips(mip, count + 1)
        static = vips.pop(0)
        log.info(f"{remote.hostname} static {static}, vnet {vnet}")

        if not ctx.vip:
            # do this only once (use the first remote we see), since we only need 1
            # set of virtual IPs, regardless of how many remotes we have.
            log.info("VIPs are {map(str, vips)}")
            ctx.vip = {
                'vnet': vnet,
                'vips': vips,
            }
        else:
            # all remotes must be in the same virtual network...
            assert vnet == ctx.vip['vnet']

        # pick interface
        p = re.compile(r'^(\S+) dev (\S+) (.*)scope link (.*)src (\S+)')
        iface = None
        for line in remote.sh(['sudo', 'ip','route','ls']).splitlines():
            m = p.findall(line)
            if not m:
                continue
            route_iface = m[0][1]
            route_ip = m[0][4]
            if route_ip == ip:
                iface = route_iface
                break

        if not iface:
            log.error(f"Unable to find {remote.hostname} interface for {ip}")
            continue

        # configure
        log.info(f"Configuring {static} on {remote.hostname} iface {iface}...")
        remote.sh(['sudo',
                   'ip', 'addr', 'add',
                   str(static) + '/' + str(vnet.prefixlen),
                   'dev', iface])

        ctx.vip_static[remote] = {
            "iface": iface,
            "static": static,
        }

    try:
        yield

    finally:
        for remote, m in ctx.vip_static.items():
            log.info(f"Removing {m['static']} (and any VIPs) on {remote.hostname} iface {m['iface']}...")
            remote.sh(['sudo',
                       'ip', 'addr', 'del',
                       str(m['static']) + '/' + str(ctx.vip['vnet'].prefixlen),
                       'dev', m['iface']])

            for vip in ctx.vip['vips']:
                remote.sh(
                    [
                        'sudo',
                        'ip', 'addr', 'del',
                        str(vip) + '/' + str(ctx.vip['vnet'].prefixlen),
                        'dev', m['iface']
                    ],
                    check_status=False,
                )
qa/tasks/vip: add vip task to allocate virtual IPs Signed-off-by: Sage Weil <sage@newdream.net> 2021-04-15 19:00:57 +00:00			`import contextlib`
			`import ipaddress`
			`import logging`
			`import re`

qa/tasks/vip: add 'vip.exec' task Signed-off-by: Sage Weil <sage@newdream.net> 2021-04-30 15:41:23 +00:00			`from teuthology import misc as teuthology`
qa/tasks/vip: add vip task to allocate virtual IPs Signed-off-by: Sage Weil <sage@newdream.net> 2021-04-15 19:00:57 +00:00			`from teuthology.config import config as teuth_config`

			`log = logging.getLogger(__name__)`


			`def subst_vip(ctx, cmd):`
			`p = re.compile(r'({{VIP(\d+)}})')`
			`for m in p.findall(cmd):`
			`n = int(m[1])`
			`if n >= len(ctx.vip["vips"]):`
			`log.warning(f'no VIP{n} (we have {len(ctx.vip["vips"])})')`
			`else:`
			`cmd = cmd.replace(m[0], str(ctx.vip["vips"][n]))`

			`if '{{VIPPREFIXLEN}}' in cmd:`
			`cmd = cmd.replace('{{VIPPREFIXLEN}}', str(ctx.vip["vnet"].prefixlen))`

			`if '{{VIPSUBNET}}' in cmd:`
			`cmd = cmd.replace('{{VIPSUBNET}}', str(ctx.vip["vnet"].network_address))`

			`return cmd`


			`def echo(ctx, config):`
			`"""`
			`This is mostly for debugging`
			`"""`
			`for remote in ctx.cluster.remotes.keys():`
			`log.info(subst_vip(ctx, config))`


qa/tasks/vip: add 'vip.exec' task Signed-off-by: Sage Weil <sage@newdream.net> 2021-04-30 15:41:23 +00:00			`def exec(ctx, config):`
			`"""`
			`This is similar to the standard 'exec' task, but does the VIP substitutions.`
			`"""`
			`assert isinstance(config, dict), "task exec got invalid config"`

			`testdir = teuthology.get_testdir(ctx)`

			`if 'all-roles' in config and len(config) == 1:`
			`a = config['all-roles']`
			`roles = teuthology.all_roles(ctx.cluster)`
			`config = dict((id_, a) for id_ in roles if not id_.startswith('host.'))`
			`elif 'all-hosts' in config and len(config) == 1:`
			`a = config['all-hosts']`
			`roles = teuthology.all_roles(ctx.cluster)`
			`config = dict((id_, a) for id_ in roles if id_.startswith('host.'))`

			`for role, ls in config.items():`
			`(remote,) = ctx.cluster.only(role).remotes.keys()`
			`log.info('Running commands on role %s host %s', role, remote.name)`
			`for c in ls:`
			`c.replace('$TESTDIR', testdir)`
			`remote.run(`
			`args=[`
			`'sudo',`
			`'TESTDIR={tdir}'.format(tdir=testdir),`
			`'bash',`
qa/tasks/vip: exec with bash -ex Signed-off-by: Sage Weil <sage@newdream.net> 2021-08-05 20:17:42 +00:00			`'-ex',`
qa/tasks/vip: add 'vip.exec' task Signed-off-by: Sage Weil <sage@newdream.net> 2021-04-30 15:41:23 +00:00			`'-c',`
			`subst_vip(ctx, c)],`
			`)`


qa/tasks/vip: add vip task to allocate virtual IPs Signed-off-by: Sage Weil <sage@newdream.net> 2021-04-15 19:00:57 +00:00			`def map_vips(mip, count):`
			`for mapping in teuth_config.get('vip', []):`
			`mnet = ipaddress.ip_network(mapping['machine_subnet'])`
			`vnet = ipaddress.ip_network(mapping['virtual_subnet'])`
			`if vnet.prefixlen >= mnet.prefixlen:`
			`log.error(f"virtual_subnet {vnet} prefix >= machine_subnet {mnet} prefix")`
			`return None`
			`if mip in mnet:`
			`pos = list(mnet.hosts()).index(mip)`
			`log.info(f"{mip} in {mnet}, pos {pos}")`
			`r = []`
			`for sub in vnet.subnets(new_prefix=mnet.prefixlen):`
			`r += [list(sub.hosts())[pos]]`
			`count -= 1`
			`if count == 0:`
			`break`
			`return vnet, r`
			`return None`


			`@contextlib.contextmanager`
			`def task(ctx, config):`
			`"""`
			`Set up a virtual network and allocate virtual IP(s) for each machine.`

			`The strategy here is to set up a private virtual subnet that is larger than`
			`the subnet the machine(s) exist in, and allocate virtual IPs from that pool.`

			`- The teuthology.yaml must include a section like::`

			`vip:`
			`- machine_subnet: 172.21.0.0/20`
			`virtual_subnet: 10.0.0.0/16`

			`At least one item's machine_subnet should map the subnet the test machine's`
			`primary IP lives in (the one DNS resolves to). The virtual_subnet must have a`
			`shorter prefix (i.e., larger than the machine_subnet). If there are multiple`
			`machine_subnets, they cannot map into the same virtual_subnet.`

			`- Each machine gets an IP in the virtual_subset statically configured by the vip`
			`task. This lets all test machines reach each other and (most importantly) any`
			`virtual IPs.`

			`- 1 or more virtual IPs are then mapped for the task. These IPs are chosen based`
			`on one of the remotes. This uses a lot of network space but it avoids any`
			`conflicts between tests.`

			`To use a virtual IP, the {{VIP0}}, {{VIP1}}, etc. substitutions can be used.`

			`{{VIPSUBNET}} is the virtual_subnet address (10.0.0.0 in the example).`

			`{{VIPPREFIXLEN}} is the virtual_subnet prefix (16 in the example.`

			`These substitutions work for vip.echo, and (at the time of writing) cephadm.apply`
			`and cephadm.shell.`
			`"""`
			`if config is None:`
			`config = {}`
			`count = config.get('count', 1)`

			`ctx.vip_static = {}`
			`ctx.vip = {}`

			`log.info("Allocating static IPs for each host...")`
			`for remote in ctx.cluster.remotes.keys():`
			`ip = remote.ssh.get_transport().getpeername()[0]`
			`log.info(f'peername {ip}')`
			`mip = ipaddress.ip_address(ip)`
			`vnet, vips = map_vips(mip, count + 1)`
			`static = vips.pop(0)`
			`log.info(f"{remote.hostname} static {static}, vnet {vnet}")`

			`if not ctx.vip:`
			`# do this only once (use the first remote we see), since we only need 1`
			`# set of virtual IPs, regardless of how many remotes we have.`
			`log.info("VIPs are {map(str, vips)}")`
			`ctx.vip = {`
			`'vnet': vnet,`
			`'vips': vips,`
			`}`
			`else:`
			`# all remotes must be in the same virtual network...`
			`assert vnet == ctx.vip['vnet']`

			`# pick interface`
			`p = re.compile(r'^(\S+) dev (\S+) (.)scope link (.)src (\S+)')`
			`iface = None`
			`for line in remote.sh(['sudo', 'ip','route','ls']).splitlines():`
			`m = p.findall(line)`
			`if not m:`
			`continue`
			`route_iface = m[0][1]`
			`route_ip = m[0][4]`
			`if route_ip == ip:`
			`iface = route_iface`
			`break`

			`if not iface:`
			`log.error(f"Unable to find {remote.hostname} interface for {ip}")`
			`continue`

			`# configure`
			`log.info(f"Configuring {static} on {remote.hostname} iface {iface}...")`
			`remote.sh(['sudo',`
			`'ip', 'addr', 'add',`
			`str(static) + '/' + str(vnet.prefixlen),`
			`'dev', iface])`

			`ctx.vip_static[remote] = {`
			`"iface": iface,`
			`"static": static,`
			`}`

			`try:`
			`yield`

			`finally:`
			`for remote, m in ctx.vip_static.items():`
			`log.info(f"Removing {m['static']} (and any VIPs) on {remote.hostname} iface {m['iface']}...")`
			`remote.sh(['sudo',`
			`'ip', 'addr', 'del',`
			`str(m['static']) + '/' + str(ctx.vip['vnet'].prefixlen),`
			`'dev', m['iface']])`

			`for vip in ctx.vip['vips']:`
			`remote.sh(`
			`[`
			`'sudo',`
			`'ip', 'addr', 'del',`
			`str(vip) + '/' + str(ctx.vip['vnet'].prefixlen),`
			`'dev', m['iface']`
			`],`
			`check_status=False,`
			`)`