From 89beed1b0a46d1e55eb9d65f8144092d753a1fb2 Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 2 Dec 2014 13:30:06 +0000 Subject: [PATCH 1/2] tasks/ceph: conservative PG count in FS pools This was tripping over the recent commit 42c85e80 in Ceph master, which tightens the limits on acceptable PG counts per OSD, and was making teuthology runs fail due to never going clean. Rather than put in a new hardcoded count, infer it from config. Move some code around so that the ceph task can get at a Filesystem object to use in FS setup (this already has conf-getting methods). Signed-off-by: John Spray --- tasks/ceph.py | 36 ++++++++++-------------------------- tasks/ceph_manager.py | 26 ++++++++++++++++++++++++++ tasks/cephfs/filesystem.py | 23 ++++++++++++++++------- 3 files changed, 52 insertions(+), 33 deletions(-) diff --git a/tasks/ceph.py b/tasks/ceph.py index bcbf817861a..477c1efad58 100644 --- a/tasks/ceph.py +++ b/tasks/ceph.py @@ -12,7 +12,8 @@ import os import json import time -from ceph_manager import CephManager +from ceph_manager import CephManager, write_conf, DEFAULT_CONF_PATH +from tasks.cephfs.filesystem import Filesystem from teuthology import misc as teuthology from teuthology import contextutil from teuthology.orchestra import run @@ -20,7 +21,7 @@ import ceph_client as cclient from teuthology.orchestra.run import CommandFailedError from teuthology.orchestra.daemon import DaemonGroup -DEFAULT_CONF_PATH = '/etc/ceph/ceph.conf' + CEPH_ROLE_TYPES = ['mon', 'osd', 'mds', 'rgw'] log = logging.getLogger(__name__) @@ -145,28 +146,6 @@ def valgrind_post(ctx, config): raise valgrind_exception -def write_conf(ctx, conf_path=DEFAULT_CONF_PATH): - conf_fp = StringIO() - ctx.ceph.conf.write(conf_fp) - conf_fp.seek(0) - writes = ctx.cluster.run( - args=[ - 'sudo', 'mkdir', '-p', '/etc/ceph', run.Raw('&&'), - 'sudo', 'chmod', '0755', '/etc/ceph', run.Raw('&&'), - 'sudo', 'python', - '-c', - 'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))', - conf_path, - run.Raw('&&'), - 'sudo', 'chmod', '0644', conf_path, - ], - stdin=run.PIPE, - wait=False) - log.warn("writes: ") - teuthology.feed_many_stdins_and_close(conf_fp, writes) - run.wait(writes) - - @contextlib.contextmanager def cephfs_setup(ctx, config): testdir = teuthology.get_testdir(ctx) @@ -197,8 +176,13 @@ def cephfs_setup(ctx, config): if metadata_pool_exists: log.info("Metadata pool already exists, skipping") else: - mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'metadata', '256']) - mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'data', '256']) + ceph_fs = Filesystem(ctx, config) + pg_warn_min_per_osd = int(ceph_fs.get_config('mon_pg_warn_min_per_osd')) + osd_count = len(list(teuthology.all_roles_of_type(ctx.cluster, 'osd'))) + pgs_per_fs_pool = pg_warn_min_per_osd * osd_count + + mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'metadata', pgs_per_fs_pool.__str__()]) + mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'data', pgs_per_fs_pool.__str__()]) # Use 'newfs' to work with either old or new Ceph, until the 'fs new' # stuff is all landed. diff --git a/tasks/ceph_manager.py b/tasks/ceph_manager.py index 1de92796085..327dd08ba77 100644 --- a/tasks/ceph_manager.py +++ b/tasks/ceph_manager.py @@ -12,8 +12,34 @@ from teuthology import misc as teuthology from tasks.scrub import Scrubber from util.rados import cmd_erasure_code_profile from teuthology.orchestra.remote import Remote +from teuthology.orchestra import run import subprocess + +DEFAULT_CONF_PATH = '/etc/ceph/ceph.conf' + + +def write_conf(ctx, conf_path=DEFAULT_CONF_PATH): + conf_fp = StringIO() + ctx.ceph.conf.write(conf_fp) + conf_fp.seek(0) + writes = ctx.cluster.run( + args=[ + 'sudo', 'mkdir', '-p', '/etc/ceph', run.Raw('&&'), + 'sudo', 'chmod', '0755', '/etc/ceph', run.Raw('&&'), + 'sudo', 'python', + '-c', + 'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))', + conf_path, + run.Raw('&&'), + 'sudo', 'chmod', '0644', conf_path, + ], + stdin=run.PIPE, + wait=False) + teuthology.feed_many_stdins_and_close(conf_fp, writes) + run.wait(writes) + + def make_admin_daemon_dir(ctx, remote): """ Create /var/run/ceph directory on remote site. diff --git a/tasks/cephfs/filesystem.py b/tasks/cephfs/filesystem.py index 2db056d2bbd..f96c69bcee2 100644 --- a/tasks/cephfs/filesystem.py +++ b/tasks/cephfs/filesystem.py @@ -3,11 +3,11 @@ from StringIO import StringIO import json import logging import time -from tasks.ceph import write_conf from teuthology import misc from teuthology.nuke import clear_firewall from teuthology.parallel import parallel +from tasks.ceph_manager import write_conf from tasks import ceph_manager @@ -51,7 +51,12 @@ class Filesystem(object): return list(result) def get_config(self, key): - return self.mds_asok(['config', 'get', key])[key] + """ + Use the mon instead of the MDS asok, so that MDS doesn't have to be running + for us to query config. + """ + service_name, service_id = misc.get_first_mon(self._ctx, self._config).split(".") + return self.json_asok(['config', 'get', key], service_name, service_id)[key] def set_ceph_conf(self, subsys, key, value): if subsys not in self._ctx.ceph.conf: @@ -200,17 +205,21 @@ class Filesystem(object): return version - def mds_asok(self, command, mds_id=None): - if mds_id is None: - mds_id = self.get_lone_mds_id() - proc = self.mon_manager.admin_socket('mds', mds_id, command) + def json_asok(self, command, service_type, service_id): + proc = self.mon_manager.admin_socket(service_type, service_id, command) response_data = proc.stdout.getvalue() - log.info("mds_asok output: {0}".format(response_data)) + log.info("_json_asok output: {0}".format(response_data)) if response_data.strip(): return json.loads(response_data) else: return None + def mds_asok(self, command, mds_id=None): + if mds_id is None: + mds_id = self.get_lone_mds_id() + + return self.json_asok(command, 'mds', mds_id) + def set_clients_block(self, blocked, mds_id=None): """ Block (using iptables) client communications to this MDS. Be careful: if From 8fc3550002c464716236c08d32799e71452785c4 Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 2 Dec 2014 13:28:08 +0000 Subject: [PATCH 2/2] tasks/ceph: move FS creation inside Filesystem Leave the legacy handling out in cephfs_setup, move the filesystem creation stuff into Filesystem. I anticipate this being the right place for it if/when we have tests that want to do 'fs rm' 'fs new' type cycles within themselves. Signed-off-by: John Spray --- tasks/ceph.py | 12 +----------- tasks/cephfs/filesystem.py | 13 +++++++++++-- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/tasks/ceph.py b/tasks/ceph.py index 477c1efad58..0a10822676b 100644 --- a/tasks/ceph.py +++ b/tasks/ceph.py @@ -177,17 +177,7 @@ def cephfs_setup(ctx, config): log.info("Metadata pool already exists, skipping") else: ceph_fs = Filesystem(ctx, config) - pg_warn_min_per_osd = int(ceph_fs.get_config('mon_pg_warn_min_per_osd')) - osd_count = len(list(teuthology.all_roles_of_type(ctx.cluster, 'osd'))) - pgs_per_fs_pool = pg_warn_min_per_osd * osd_count - - mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'metadata', pgs_per_fs_pool.__str__()]) - mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'data', pgs_per_fs_pool.__str__()]) - - # Use 'newfs' to work with either old or new Ceph, until the 'fs new' - # stuff is all landed. - mon_remote.run(args=['sudo', 'ceph', 'mds', 'newfs', '1', '2']) - # mon_remote.run(args=['sudo', 'ceph', 'fs', 'new', 'default', 'metadata', 'data']) + ceph_fs.create() is_active_mds = lambda role: role.startswith('mds.') and not role.endswith('-s') and role.find('-s-') == -1 all_roles = [item for remote_roles in mdss.remotes.values() for item in remote_roles] diff --git a/tasks/cephfs/filesystem.py b/tasks/cephfs/filesystem.py index f96c69bcee2..837772e64a0 100644 --- a/tasks/cephfs/filesystem.py +++ b/tasks/cephfs/filesystem.py @@ -34,14 +34,23 @@ class Filesystem(object): raise RuntimeError("This task requires at least one MDS") first_mon = misc.get_first_mon(ctx, config) - (mon_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys() - self.mon_manager = ceph_manager.CephManager(mon_remote, ctx=ctx, logger=log.getChild('ceph_manager')) + (self.mon_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys() + self.mon_manager = ceph_manager.CephManager(self.mon_remote, ctx=ctx, logger=log.getChild('ceph_manager')) self.mds_daemons = dict([(mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids]) client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client')) self.client_id = client_list[0] self.client_remote = list(misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id)]))[0][1] + def create(self): + pg_warn_min_per_osd = int(self.get_config('mon_pg_warn_min_per_osd')) + osd_count = len(list(misc.all_roles_of_type(self._ctx.cluster, 'osd'))) + pgs_per_fs_pool = pg_warn_min_per_osd * osd_count + + self.mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'metadata', pgs_per_fs_pool.__str__()]) + self.mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'data', pgs_per_fs_pool.__str__()]) + self.mon_remote.run(args=['sudo', 'ceph', 'fs', 'new', 'default', 'metadata', 'data']) + def get_mds_hostnames(self): result = set() for mds_id in self.mds_ids: