From 89beed1b0a46d1e55eb9d65f8144092d753a1fb2 Mon Sep 17 00:00:00 2001
From: John Spray <jspray@redhat.com>
Date: Tue, 2 Dec 2014 13:30:06 +0000
Subject: [PATCH 1/2] tasks/ceph: conservative PG count in FS pools

This was tripping over the recent commit 42c85e80
in Ceph master, which tightens the limits on
acceptable PG counts per OSD, and was making
teuthology runs fail due to never going clean.

Rather than put in a new hardcoded count, infer
it from config.  Move some code around so that
the ceph task can get at a Filesystem object
to use in FS setup (this already has conf-getting
methods).

Signed-off-by: John Spray <john.spray@redhat.com>
---
 tasks/ceph.py              | 36 ++++++++++--------------------------
 tasks/ceph_manager.py      | 26 ++++++++++++++++++++++++++
 tasks/cephfs/filesystem.py | 23 ++++++++++++++++-------
 3 files changed, 52 insertions(+), 33 deletions(-)

diff --git a/tasks/ceph.py b/tasks/ceph.py
index bcbf817861a..477c1efad58 100644
--- a/tasks/ceph.py
+++ b/tasks/ceph.py
@@ -12,7 +12,8 @@ import os
 import json
 import time
 
-from ceph_manager import CephManager
+from ceph_manager import CephManager, write_conf, DEFAULT_CONF_PATH
+from tasks.cephfs.filesystem import Filesystem
 from teuthology import misc as teuthology
 from teuthology import contextutil
 from teuthology.orchestra import run
@@ -20,7 +21,7 @@ import ceph_client as cclient
 from teuthology.orchestra.run import CommandFailedError
 from teuthology.orchestra.daemon import DaemonGroup
 
-DEFAULT_CONF_PATH = '/etc/ceph/ceph.conf'
+
 CEPH_ROLE_TYPES = ['mon', 'osd', 'mds', 'rgw']
 
 log = logging.getLogger(__name__)
@@ -145,28 +146,6 @@ def valgrind_post(ctx, config):
             raise valgrind_exception
 
 
-def write_conf(ctx, conf_path=DEFAULT_CONF_PATH):
-    conf_fp = StringIO()
-    ctx.ceph.conf.write(conf_fp)
-    conf_fp.seek(0)
-    writes = ctx.cluster.run(
-        args=[
-            'sudo', 'mkdir', '-p', '/etc/ceph', run.Raw('&&'),
-            'sudo', 'chmod', '0755', '/etc/ceph', run.Raw('&&'),
-            'sudo', 'python',
-            '-c',
-            'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))',
-            conf_path,
-            run.Raw('&&'),
-            'sudo', 'chmod', '0644', conf_path,
-        ],
-        stdin=run.PIPE,
-        wait=False)
-    log.warn("writes: ")
-    teuthology.feed_many_stdins_and_close(conf_fp, writes)
-    run.wait(writes)
-
-
 @contextlib.contextmanager
 def cephfs_setup(ctx, config):
     testdir = teuthology.get_testdir(ctx)
@@ -197,8 +176,13 @@ def cephfs_setup(ctx, config):
         if metadata_pool_exists:
             log.info("Metadata pool already exists, skipping")
         else:
-            mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'metadata', '256'])
-            mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'data', '256'])
+            ceph_fs = Filesystem(ctx, config)
+            pg_warn_min_per_osd = int(ceph_fs.get_config('mon_pg_warn_min_per_osd'))
+            osd_count = len(list(teuthology.all_roles_of_type(ctx.cluster, 'osd')))
+            pgs_per_fs_pool = pg_warn_min_per_osd * osd_count
+
+            mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'metadata', pgs_per_fs_pool.__str__()])
+            mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'data', pgs_per_fs_pool.__str__()])
 
             # Use 'newfs' to work with either old or new Ceph, until the 'fs new'
             # stuff is all landed.
diff --git a/tasks/ceph_manager.py b/tasks/ceph_manager.py
index 1de92796085..327dd08ba77 100644
--- a/tasks/ceph_manager.py
+++ b/tasks/ceph_manager.py
@@ -12,8 +12,34 @@ from teuthology import misc as teuthology
 from tasks.scrub import Scrubber
 from util.rados import cmd_erasure_code_profile
 from teuthology.orchestra.remote import Remote
+from teuthology.orchestra import run
 import subprocess
 
+
+DEFAULT_CONF_PATH = '/etc/ceph/ceph.conf'
+
+
+def write_conf(ctx, conf_path=DEFAULT_CONF_PATH):
+    conf_fp = StringIO()
+    ctx.ceph.conf.write(conf_fp)
+    conf_fp.seek(0)
+    writes = ctx.cluster.run(
+        args=[
+            'sudo', 'mkdir', '-p', '/etc/ceph', run.Raw('&&'),
+            'sudo', 'chmod', '0755', '/etc/ceph', run.Raw('&&'),
+            'sudo', 'python',
+            '-c',
+            'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))',
+            conf_path,
+            run.Raw('&&'),
+            'sudo', 'chmod', '0644', conf_path,
+        ],
+        stdin=run.PIPE,
+        wait=False)
+    teuthology.feed_many_stdins_and_close(conf_fp, writes)
+    run.wait(writes)
+
+
 def make_admin_daemon_dir(ctx, remote):
     """
     Create /var/run/ceph directory on remote site.
diff --git a/tasks/cephfs/filesystem.py b/tasks/cephfs/filesystem.py
index 2db056d2bbd..f96c69bcee2 100644
--- a/tasks/cephfs/filesystem.py
+++ b/tasks/cephfs/filesystem.py
@@ -3,11 +3,11 @@ from StringIO import StringIO
 import json
 import logging
 import time
-from tasks.ceph import write_conf
 
 from teuthology import misc
 from teuthology.nuke import clear_firewall
 from teuthology.parallel import parallel
+from tasks.ceph_manager import write_conf
 from tasks import ceph_manager
 
 
@@ -51,7 +51,12 @@ class Filesystem(object):
         return list(result)
 
     def get_config(self, key):
-        return self.mds_asok(['config', 'get', key])[key]
+        """
+        Use the mon instead of the MDS asok, so that MDS doesn't have to be running
+        for us to query config.
+        """
+        service_name, service_id = misc.get_first_mon(self._ctx, self._config).split(".")
+        return self.json_asok(['config', 'get', key], service_name, service_id)[key]
 
     def set_ceph_conf(self, subsys, key, value):
         if subsys not in self._ctx.ceph.conf:
@@ -200,17 +205,21 @@ class Filesystem(object):
 
         return version
 
-    def mds_asok(self, command, mds_id=None):
-        if mds_id is None:
-            mds_id = self.get_lone_mds_id()
-        proc = self.mon_manager.admin_socket('mds', mds_id, command)
+    def json_asok(self, command, service_type, service_id):
+        proc = self.mon_manager.admin_socket(service_type, service_id, command)
         response_data = proc.stdout.getvalue()
-        log.info("mds_asok output: {0}".format(response_data))
+        log.info("_json_asok output: {0}".format(response_data))
         if response_data.strip():
             return json.loads(response_data)
         else:
             return None
 
+    def mds_asok(self, command, mds_id=None):
+        if mds_id is None:
+            mds_id = self.get_lone_mds_id()
+
+        return self.json_asok(command, 'mds', mds_id)
+
     def set_clients_block(self, blocked, mds_id=None):
         """
         Block (using iptables) client communications to this MDS.  Be careful: if

From 8fc3550002c464716236c08d32799e71452785c4 Mon Sep 17 00:00:00 2001
From: John Spray <jspray@redhat.com>
Date: Tue, 2 Dec 2014 13:28:08 +0000
Subject: [PATCH 2/2] tasks/ceph: move FS creation inside Filesystem

Leave the legacy handling out in cephfs_setup, move
the filesystem creation stuff into Filesystem.  I
anticipate this being the right place for it if/when
we have tests that want to do 'fs rm' 'fs new' type
cycles within themselves.

Signed-off-by: John Spray <john.spray@redhat.com>
---
 tasks/ceph.py              | 12 +-----------
 tasks/cephfs/filesystem.py | 13 +++++++++++--
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/tasks/ceph.py b/tasks/ceph.py
index 477c1efad58..0a10822676b 100644
--- a/tasks/ceph.py
+++ b/tasks/ceph.py
@@ -177,17 +177,7 @@ def cephfs_setup(ctx, config):
             log.info("Metadata pool already exists, skipping")
         else:
             ceph_fs = Filesystem(ctx, config)
-            pg_warn_min_per_osd = int(ceph_fs.get_config('mon_pg_warn_min_per_osd'))
-            osd_count = len(list(teuthology.all_roles_of_type(ctx.cluster, 'osd')))
-            pgs_per_fs_pool = pg_warn_min_per_osd * osd_count
-
-            mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'metadata', pgs_per_fs_pool.__str__()])
-            mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'data', pgs_per_fs_pool.__str__()])
-
-            # Use 'newfs' to work with either old or new Ceph, until the 'fs new'
-            # stuff is all landed.
-            mon_remote.run(args=['sudo', 'ceph', 'mds', 'newfs', '1', '2'])
-            # mon_remote.run(args=['sudo', 'ceph', 'fs', 'new', 'default', 'metadata', 'data'])
+            ceph_fs.create()
 
         is_active_mds = lambda role: role.startswith('mds.') and not role.endswith('-s') and role.find('-s-') == -1
         all_roles = [item for remote_roles in mdss.remotes.values() for item in remote_roles]
diff --git a/tasks/cephfs/filesystem.py b/tasks/cephfs/filesystem.py
index f96c69bcee2..837772e64a0 100644
--- a/tasks/cephfs/filesystem.py
+++ b/tasks/cephfs/filesystem.py
@@ -34,14 +34,23 @@ class Filesystem(object):
             raise RuntimeError("This task requires at least one MDS")
 
         first_mon = misc.get_first_mon(ctx, config)
-        (mon_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys()
-        self.mon_manager = ceph_manager.CephManager(mon_remote, ctx=ctx, logger=log.getChild('ceph_manager'))
+        (self.mon_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+        self.mon_manager = ceph_manager.CephManager(self.mon_remote, ctx=ctx, logger=log.getChild('ceph_manager'))
         self.mds_daemons = dict([(mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids])
 
         client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client'))
         self.client_id = client_list[0]
         self.client_remote = list(misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id)]))[0][1]
 
+    def create(self):
+        pg_warn_min_per_osd = int(self.get_config('mon_pg_warn_min_per_osd'))
+        osd_count = len(list(misc.all_roles_of_type(self._ctx.cluster, 'osd')))
+        pgs_per_fs_pool = pg_warn_min_per_osd * osd_count
+
+        self.mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'metadata', pgs_per_fs_pool.__str__()])
+        self.mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'data', pgs_per_fs_pool.__str__()])
+        self.mon_remote.run(args=['sudo', 'ceph', 'fs', 'new', 'default', 'metadata', 'data'])
+
     def get_mds_hostnames(self):
         result = set()
         for mds_id in self.mds_ids: