ceph/teuthology/task/lost_unfound.py

import logging
import ceph_manager
from teuthology import misc as teuthology


log = logging.getLogger(__name__)


def rados(ctx, remote, cmd):
    testdir = teuthology.get_testdir(ctx)
    log.info("rados %s" % ' '.join(cmd))
    pre = [
        '{tdir}/enable-coredump'.format(tdir=testdir),
        'ceph-coverage',
        '{tdir}/archive/coverage'.format(tdir=testdir),
        'rados',
        ];
    pre.extend(cmd)
    proc = remote.run(
        args=pre,
        check_status=False
        )
    return proc.exitstatus

def task(ctx, config):
    """
    Test handling of lost objects.
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'lost_unfound task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
        )

    while len(manager.get_osd_status()['up']) < 3:
        manager.sleep(10)
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.wait_for_clean()

    # something that is always there
    dummyfile = '/etc/fstab'

    # take an osd out until the very end
    manager.kill_osd(2)
    manager.mark_down_osd(2)
    manager.mark_out_osd(2)

    # kludge to make sure they get a map
    rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile])

    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.wait_for_recovery()

    # create old objects
    for f in range(1, 10):
        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', 'data', 'rm', 'existed_%d' % f])

    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
            'tell', 'osd.1',
            'injectargs',
            '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000'
            )

    manager.kill_osd(0)
    manager.mark_down_osd(0)
    
    for f in range(1, 10):
        rados(ctx, mon, ['-p', 'data', 'put', 'new_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])

    # bring osd.0 back up, let it peer, but don't replicate the new
    # objects...
    log.info('osd.0 command_args is %s' % 'foo')
    log.info(ctx.daemons.get_daemon('osd', 0).command_args)
    ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([
            '--osd-recovery-delay-start', '1000'
            ])
    manager.revive_osd(0)
    manager.mark_in_osd(0)
    manager.wait_till_osd_is_up(0)

    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.wait_till_active()

    # take out osd.1 and the only copy of those objects.
    manager.kill_osd(1)
    manager.mark_down_osd(1)
    manager.mark_out_osd(1)
    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')

    # bring up osd.2 so that things would otherwise, in theory, recovery fully
    manager.revive_osd(2)
    manager.mark_in_osd(2)
    manager.wait_till_osd_is_up(2)

    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.wait_till_active()
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')

    # verify that there are unfound objects
    unfound = manager.get_num_unfound_objects()
    log.info("there are %d unfound objects" % unfound)
    assert unfound

    # mark stuff lost
    pgs = manager.get_pg_stats()
    for pg in pgs:
        if pg['stat_sum']['num_objects_unfound'] > 0:
            primary = 'osd.%d' % pg['acting'][0]

            # verify that i can list them direct from the osd
            log.info('listing missing/lost in %s state %s', pg['pgid'],
                     pg['state']);
            m = manager.list_pg_missing(pg['pgid'])
            #log.info('%s' % m)
            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
            num_unfound=0
            for o in m['objects']:
                if len(o['locations']) == 0:
                    num_unfound += 1
            assert m['num_unfound'] == num_unfound

            log.info("reverting unfound in %s on %s", pg['pgid'], primary)
            manager.raw_cluster_cmd('pg', pg['pgid'],
                                    'mark_unfound_lost', 'revert')
        else:
            log.info("no unfound in %s", pg['pgid'])

    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.wait_for_recovery()

    # verify result
    for f in range(1, 10):
        err = rados(ctx, mon, ['-p', 'data', 'get', 'new_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', 'data', 'get', 'existed_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', 'data', 'get', 'existing_%d' % f, '-'])
        assert not err

    # see if osd.1 can cope
    manager.revive_osd(1)
    manager.mark_in_osd(1)
    manager.wait_till_osd_is_up(1)
    manager.wait_for_clean()
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00			`import logging`
			`import ceph_manager`
			`from teuthology import misc as teuthology`


			`log = logging.getLogger(__name__)`


Replace /tmp/cephtest/ with configurable path Teuthology uses /tmp/cephtest/ as the scratch test directory for a run. This patch replaces /tmp/cephtest/ everywhere with a per-run directory: {basedir}/{rundir} where {basedir} is a directory configured in .teuthology.yaml (/tmp/cephtest if not specified), and {rundir} is the name of the run, as given in --name. If no name is specified, {user}-{timestamp} is used. To get the old behavior (/tmp/cephtest), set test_path: /tmp/cephtest in .teuthology.yaml. This change was modivated by #3782, which requires a test dir that survives across reboots, but also resolves #3767. Signed-off-by: Sam Lang <sam.lang@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com> 2013-01-23 20:37:39 +00:00			`def rados(ctx, remote, cmd):`
			`testdir = teuthology.get_testdir(ctx)`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00			`log.info("rados %s" % ' '.join(cmd))`
			`pre = [`
Replace /tmp/cephtest/ with configurable path Teuthology uses /tmp/cephtest/ as the scratch test directory for a run. This patch replaces /tmp/cephtest/ everywhere with a per-run directory: {basedir}/{rundir} where {basedir} is a directory configured in .teuthology.yaml (/tmp/cephtest if not specified), and {rundir} is the name of the run, as given in --name. If no name is specified, {user}-{timestamp} is used. To get the old behavior (/tmp/cephtest), set test_path: /tmp/cephtest in .teuthology.yaml. This change was modivated by #3782, which requires a test dir that survives across reboots, but also resolves #3767. Signed-off-by: Sam Lang <sam.lang@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com> 2013-01-23 20:37:39 +00:00			`'{tdir}/enable-coredump'.format(tdir=testdir),`
Install ceph debs and use installed debs The ceph task installs ceph using the debian packages now, and all invocations of binaries installed in {tmpdir}/binary/usr/local/bin/ are replace with the use of the binaries installed in standard locations by the debs. Author: Sander Pool <sander.pool@inktank.com> Signed-off-by: Sam Lang <sam.lang@inktank.com> 2013-02-06 19:16:52 +00:00			`'ceph-coverage',`
Replace /tmp/cephtest/ with configurable path Teuthology uses /tmp/cephtest/ as the scratch test directory for a run. This patch replaces /tmp/cephtest/ everywhere with a per-run directory: {basedir}/{rundir} where {basedir} is a directory configured in .teuthology.yaml (/tmp/cephtest if not specified), and {rundir} is the name of the run, as given in --name. If no name is specified, {user}-{timestamp} is used. To get the old behavior (/tmp/cephtest), set test_path: /tmp/cephtest in .teuthology.yaml. This change was modivated by #3782, which requires a test dir that survives across reboots, but also resolves #3767. Signed-off-by: Sam Lang <sam.lang@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com> 2013-01-23 20:37:39 +00:00			`'{tdir}/archive/coverage'.format(tdir=testdir),`
Install ceph debs and use installed debs The ceph task installs ceph using the debian packages now, and all invocations of binaries installed in {tmpdir}/binary/usr/local/bin/ are replace with the use of the binaries installed in standard locations by the debs. Author: Sander Pool <sander.pool@inktank.com> Signed-off-by: Sam Lang <sam.lang@inktank.com> 2013-02-06 19:16:52 +00:00			`'rados',`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00			`];`
			`pre.extend(cmd)`
			`proc = remote.run(`
			`args=pre,`
			`check_status=False`
			`)`
			`return proc.exitstatus`

			`def task(ctx, config):`
			`"""`
			`Test handling of lost objects.`
			`"""`
			`if config is None:`
			`config = {}`
			`assert isinstance(config, dict), \`
lost_unfound: typo 2012-01-11 00:21:00 +00:00			`'lost_unfound task only accepts a dict for configuration'`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00			`first_mon = teuthology.get_first_mon(ctx, config)`
			`(mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()`

			`manager = ceph_manager.CephManager(`
			`mon,`
			`ctx=ctx,`
			`logger=log.getChild('ceph_manager'),`
			`)`

fix misc checks that wait for N osds to be up These all cut&pasted broken code, blah! 2012-04-19 19:43:54 +00:00			`while len(manager.get_osd_status()['up']) < 3:`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00			`manager.sleep(10)`
			`manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')`
			`manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')`
			`manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')`
wait_till_clean -> wait_for_clean and wait_for_recovery Clean now also means the correct number of replicas, whereas recovered means we have done all the work we can do given the replicas/osds we have. For example, degraded and clean are now mutually exclusive. Also move away from 'till'. 2012-02-18 05:53:25 +00:00			`manager.wait_for_clean()`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00
			`# something that is always there`
			`dummyfile = '/etc/fstab'`

			`# take an osd out until the very end`
			`manager.kill_osd(2)`
			`manager.mark_down_osd(2)`
			`manager.mark_out_osd(2)`

			`# kludge to make sure they get a map`
Replace /tmp/cephtest/ with configurable path Teuthology uses /tmp/cephtest/ as the scratch test directory for a run. This patch replaces /tmp/cephtest/ everywhere with a per-run directory: {basedir}/{rundir} where {basedir} is a directory configured in .teuthology.yaml (/tmp/cephtest if not specified), and {rundir} is the name of the run, as given in --name. If no name is specified, {user}-{timestamp} is used. To get the old behavior (/tmp/cephtest), set test_path: /tmp/cephtest in .teuthology.yaml. This change was modivated by #3782, which requires a test dir that survives across reboots, but also resolves #3767. Signed-off-by: Sam Lang <sam.lang@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com> 2013-01-23 20:37:39 +00:00			`rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile])`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00
			`manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')`
			`manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')`
wait_till_clean -> wait_for_clean and wait_for_recovery Clean now also means the correct number of replicas, whereas recovered means we have done all the work we can do given the replicas/osds we have. For example, degraded and clean are now mutually exclusive. Also move away from 'till'. 2012-02-18 05:53:25 +00:00			`manager.wait_for_recovery()`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00
			`# create old objects`
			`for f in range(1, 10):`
Replace /tmp/cephtest/ with configurable path Teuthology uses /tmp/cephtest/ as the scratch test directory for a run. This patch replaces /tmp/cephtest/ everywhere with a per-run directory: {basedir}/{rundir} where {basedir} is a directory configured in .teuthology.yaml (/tmp/cephtest if not specified), and {rundir} is the name of the run, as given in --name. If no name is specified, {user}-{timestamp} is used. To get the old behavior (/tmp/cephtest), set test_path: /tmp/cephtest in .teuthology.yaml. This change was modivated by #3782, which requires a test dir that survives across reboots, but also resolves #3767. Signed-off-by: Sam Lang <sam.lang@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com> 2013-01-23 20:37:39 +00:00			`rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])`
			`rados(ctx, mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile])`
			`rados(ctx, mon, ['-p', 'data', 'rm', 'existed_%d' % f])`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00
lost_unfound: make test work with backfill If we backfill, we fail to peer instead of having every object show up as 'unfound'. Avoid that by preventing log trimming, so that we always do log recovery for this test. 2012-01-12 23:08:11 +00:00			`# delay recovery, and make the pg log very long (to prevent backfill)`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00			`manager.raw_cluster_cmd(`
			`'tell', 'osd.1',`
lost_unfound: make test work with backfill If we backfill, we fail to peer instead of having every object show up as 'unfound'. Avoid that by preventing log trimming, so that we always do log recovery for this test. 2012-01-12 23:08:11 +00:00			`'injectargs',`
			`'--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000'`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00			`)`

			`manager.kill_osd(0)`
			`manager.mark_down_osd(0)`

			`for f in range(1, 10):`
Replace /tmp/cephtest/ with configurable path Teuthology uses /tmp/cephtest/ as the scratch test directory for a run. This patch replaces /tmp/cephtest/ everywhere with a per-run directory: {basedir}/{rundir} where {basedir} is a directory configured in .teuthology.yaml (/tmp/cephtest if not specified), and {rundir} is the name of the run, as given in --name. If no name is specified, {user}-{timestamp} is used. To get the old behavior (/tmp/cephtest), set test_path: /tmp/cephtest in .teuthology.yaml. This change was modivated by #3782, which requires a test dir that survives across reboots, but also resolves #3767. Signed-off-by: Sam Lang <sam.lang@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com> 2013-01-23 20:37:39 +00:00			`rados(ctx, mon, ['-p', 'data', 'put', 'new_%d' % f, dummyfile])`
			`rados(ctx, mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile])`
			`rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00
			`# bring osd.0 back up, let it peer, but don't replicate the new`
			`# objects...`
			`log.info('osd.0 command_args is %s' % 'foo')`
			`log.info(ctx.daemons.get_daemon('osd', 0).command_args)`
			`ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([`
			`'--osd-recovery-delay-start', '1000'`
			`])`
			`manager.revive_osd(0)`
lost_unfound: mark osds in when we revive them so that we test what we meant to. It also lets us actually go clean at the very end. 2012-02-20 03:40:45 +00:00			`manager.mark_in_osd(0)`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00			`manager.wait_till_osd_is_up(0)`

			`manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')`
			`manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')`
			`manager.wait_till_active()`

			`# take out osd.1 and the only copy of those objects.`
			`manager.kill_osd(1)`
			`manager.mark_down_osd(1)`
			`manager.mark_out_osd(1)`
			`manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')`

			`# bring up osd.2 so that things would otherwise, in theory, recovery fully`
			`manager.revive_osd(2)`
lost_unfound: mark osds in when we revive them so that we test what we meant to. It also lets us actually go clean at the very end. 2012-02-20 03:40:45 +00:00			`manager.mark_in_osd(2)`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00			`manager.wait_till_osd_is_up(2)`

			`manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')`
			`manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')`
			`manager.wait_till_active()`
			`manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')`
			`manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')`

			`# verify that there are unfound objects`
			`unfound = manager.get_num_unfound_objects()`
			`log.info("there are %d unfound objects" % unfound)`
			`assert unfound`

			`# mark stuff lost`
			`pgs = manager.get_pg_stats()`
			`for pg in pgs:`
			`if pg['stat_sum']['num_objects_unfound'] > 0:`
			`primary = 'osd.%d' % pg['acting'][0]`
lost_unfound: list missing/unfound for each pg and verify the unfound counts This also tests the pg list_missing functionality. 2012-02-24 19:11:59 +00:00
			`# verify that i can list them direct from the osd`
github.com/NewDreamNetwork -> github.com/ceph 2012-03-02 18:55:19 +00:00			`log.info('listing missing/lost in %s state %s', pg['pgid'],`
			`pg['state']);`
lost_unfound: list missing/unfound for each pg and verify the unfound counts This also tests the pg list_missing functionality. 2012-02-24 19:11:59 +00:00			`m = manager.list_pg_missing(pg['pgid'])`
			`#log.info('%s' % m)`
			`assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']`
			`num_unfound=0`
			`for o in m['objects']:`
			`if len(o['locations']) == 0:`
			`num_unfound += 1`
			`assert m['num_unfound'] == num_unfound`

add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00			`log.info("reverting unfound in %s on %s", pg['pgid'], primary)`
lost_unfound: new mark_unfound_lost syntax 2012-02-24 04:07:24 +00:00			`manager.raw_cluster_cmd('pg', pg['pgid'],`
			`'mark_unfound_lost', 'revert')`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00			`else:`
			`log.info("no unfound in %s", pg['pgid'])`

			`manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')`
			`manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')`
			`manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')`
			`manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')`
wait_till_clean -> wait_for_clean and wait_for_recovery Clean now also means the correct number of replicas, whereas recovered means we have done all the work we can do given the replicas/osds we have. For example, degraded and clean are now mutually exclusive. Also move away from 'till'. 2012-02-18 05:53:25 +00:00			`manager.wait_for_recovery()`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00
			`# verify result`
			`for f in range(1, 10):`
Replace /tmp/cephtest/ with configurable path Teuthology uses /tmp/cephtest/ as the scratch test directory for a run. This patch replaces /tmp/cephtest/ everywhere with a per-run directory: {basedir}/{rundir} where {basedir} is a directory configured in .teuthology.yaml (/tmp/cephtest if not specified), and {rundir} is the name of the run, as given in --name. If no name is specified, {user}-{timestamp} is used. To get the old behavior (/tmp/cephtest), set test_path: /tmp/cephtest in .teuthology.yaml. This change was modivated by #3782, which requires a test dir that survives across reboots, but also resolves #3767. Signed-off-by: Sam Lang <sam.lang@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com> 2013-01-23 20:37:39 +00:00			`err = rados(ctx, mon, ['-p', 'data', 'get', 'new_%d' % f, '-'])`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00			`assert err`
Replace /tmp/cephtest/ with configurable path Teuthology uses /tmp/cephtest/ as the scratch test directory for a run. This patch replaces /tmp/cephtest/ everywhere with a per-run directory: {basedir}/{rundir} where {basedir} is a directory configured in .teuthology.yaml (/tmp/cephtest if not specified), and {rundir} is the name of the run, as given in --name. If no name is specified, {user}-{timestamp} is used. To get the old behavior (/tmp/cephtest), set test_path: /tmp/cephtest in .teuthology.yaml. This change was modivated by #3782, which requires a test dir that survives across reboots, but also resolves #3767. Signed-off-by: Sam Lang <sam.lang@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com> 2013-01-23 20:37:39 +00:00			`err = rados(ctx, mon, ['-p', 'data', 'get', 'existed_%d' % f, '-'])`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00			`assert err`
Replace /tmp/cephtest/ with configurable path Teuthology uses /tmp/cephtest/ as the scratch test directory for a run. This patch replaces /tmp/cephtest/ everywhere with a per-run directory: {basedir}/{rundir} where {basedir} is a directory configured in .teuthology.yaml (/tmp/cephtest if not specified), and {rundir} is the name of the run, as given in --name. If no name is specified, {user}-{timestamp} is used. To get the old behavior (/tmp/cephtest), set test_path: /tmp/cephtest in .teuthology.yaml. This change was modivated by #3782, which requires a test dir that survives across reboots, but also resolves #3767. Signed-off-by: Sam Lang <sam.lang@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com> 2013-01-23 20:37:39 +00:00			`err = rados(ctx, mon, ['-p', 'data', 'get', 'existing_%d' % f, '-'])`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00			`assert not err`

			`# see if osd.1 can cope`
			`manager.revive_osd(1)`
lost_unfound: mark osds in when we revive them so that we test what we meant to. It also lets us actually go clean at the very end. 2012-02-20 03:40:45 +00:00			`manager.mark_in_osd(1)`
add lost_unfound task Also some misc useful bits to ceph_manager. 2011-10-17 22:32:22 +00:00			`manager.wait_till_osd_is_up(1)`
wait_till_clean -> wait_for_clean and wait_for_recovery Clean now also means the correct number of replicas, whereas recovered means we have done all the work we can do given the replicas/osds we have. For example, degraded and clean are now mutually exclusive. Also move away from 'till'. 2012-02-18 05:53:25 +00:00			`manager.wait_for_clean()`