mirror of
https://github.com/ceph/ceph
synced 2024-12-26 21:43:10 +00:00
5cfe4cfa13
Signed-off-by: Sage Weil <sage@redhat.com> Signed-off-by: Loic Dachary <loic@dachary.org>
4609 lines
132 KiB
Python
Executable File
4609 lines
132 KiB
Python
Executable File
#!/usr/bin/env python
|
|
#
|
|
# Copyright (C) 2015 Red Hat <contact@redhat.com>
|
|
# Copyright (C) 2014 Inktank <info@inktank.com>
|
|
# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
|
|
# Copyright (C) 2014 Catalyst.net Ltd
|
|
#
|
|
# Author: Loic Dachary <loic@dachary.org>
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU Library Public License as published by
|
|
# the Free Software Foundation; either version 2, or (at your option)
|
|
# any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Library Public License for more details.
|
|
#
|
|
# THIS IS ceph-disk AS OF dc5a9053ce69c0630091774f16ce421da67d26fb v10.0.3-2247-gdc5a905
|
|
# PRIOR TO THE INTRODUCTION OF THE LOCKBOX VOLUME TO STORE KEY FETCHING
|
|
# STRATEGIES
|
|
#
|
|
import argparse
|
|
import errno
|
|
import fcntl
|
|
import json
|
|
import logging
|
|
import os
|
|
import platform
|
|
import re
|
|
import subprocess
|
|
import stat
|
|
import sys
|
|
import tempfile
|
|
import uuid
|
|
import time
|
|
import shlex
|
|
import pwd
|
|
import grp
|
|
|
|
CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
|
|
|
|
PTYPE = {
|
|
'regular': {
|
|
'journal': {
|
|
# identical because creating a journal is atomic
|
|
'ready': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
|
|
'tobe': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
|
|
},
|
|
'block': {
|
|
# identical because creating a block is atomic
|
|
'ready': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
|
|
'tobe': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
|
|
},
|
|
'osd': {
|
|
'ready': '4fbd7e29-9d25-41b8-afd0-062c0ceff05d',
|
|
'tobe': '89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be',
|
|
},
|
|
},
|
|
'luks': {
|
|
'journal': {
|
|
'ready': '45b0969e-9b03-4f30-b4c6-35865ceff106',
|
|
'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
|
|
},
|
|
'block': {
|
|
'ready': 'cafecafe-9b03-4f30-b4c6-35865ceff106',
|
|
'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
|
|
},
|
|
'osd': {
|
|
'ready': '4fbd7e29-9d25-41b8-afd0-35865ceff05d',
|
|
'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
|
|
},
|
|
},
|
|
'plain': {
|
|
'journal': {
|
|
'ready': '45b0969e-9b03-4f30-b4c6-5ec00ceff106',
|
|
'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
|
|
},
|
|
'block': {
|
|
'ready': 'cafecafe-9b03-4f30-b4c6-5ec00ceff106',
|
|
'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
|
|
},
|
|
'osd': {
|
|
'ready': '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d',
|
|
'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
|
|
},
|
|
},
|
|
'mpath': {
|
|
'journal': {
|
|
'ready': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
|
|
'tobe': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
|
|
},
|
|
'block': {
|
|
'ready': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
|
|
'tobe': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
|
|
},
|
|
'osd': {
|
|
'ready': '4fbd7e29-8ae0-4982-bf9d-5a8d867af560',
|
|
'tobe': '89c57f98-8ae0-4982-bf9d-5a8d867af560',
|
|
},
|
|
},
|
|
}
|
|
|
|
|
|
class Ptype(object):
|
|
|
|
@staticmethod
|
|
def get_ready_by_type(what):
|
|
return [x['ready'] for x in PTYPE[what].values()]
|
|
|
|
@staticmethod
|
|
def get_ready_by_name(name):
|
|
return [x[name]['ready'] for x in PTYPE.values()]
|
|
|
|
@staticmethod
|
|
def is_regular_space(ptype):
|
|
return Ptype.is_what_space('regular', ptype)
|
|
|
|
@staticmethod
|
|
def is_mpath_space(ptype):
|
|
return Ptype.is_what_space('mpath', ptype)
|
|
|
|
@staticmethod
|
|
def is_plain_space(ptype):
|
|
return Ptype.is_what_space('plain', ptype)
|
|
|
|
@staticmethod
|
|
def is_luks_space(ptype):
|
|
return Ptype.is_what_space('luks', ptype)
|
|
|
|
@staticmethod
|
|
def is_what_space(what, ptype):
|
|
for name in Space.NAMES:
|
|
if ptype == PTYPE[what][name]['ready']:
|
|
return True
|
|
return False
|
|
|
|
@staticmethod
|
|
def space_ptype_to_name(ptype):
|
|
for what in PTYPE.values():
|
|
for name in Space.NAMES:
|
|
if ptype == what[name]['ready']:
|
|
return name
|
|
raise ValueError('ptype ' + ptype + ' not found')
|
|
|
|
@staticmethod
|
|
def is_dmcrypt_space(ptype):
|
|
for name in Space.NAMES:
|
|
if Ptype.is_dmcrypt(ptype, name):
|
|
return True
|
|
return False
|
|
|
|
@staticmethod
|
|
def is_dmcrypt(ptype, name):
|
|
for what in ('plain', 'luks'):
|
|
if ptype == PTYPE[what][name]['ready']:
|
|
return True
|
|
return False
|
|
|
|
DEFAULT_FS_TYPE = 'xfs'
|
|
SYSFS = '/sys'
|
|
|
|
"""
|
|
OSD STATUS Definition
|
|
"""
|
|
OSD_STATUS_OUT_DOWN = 0
|
|
OSD_STATUS_OUT_UP = 1
|
|
OSD_STATUS_IN_DOWN = 2
|
|
OSD_STATUS_IN_UP = 3
|
|
|
|
MOUNT_OPTIONS = dict(
|
|
btrfs='noatime,user_subvol_rm_allowed',
|
|
# user_xattr is default ever since linux 2.6.39 / 3.0, but we'll
|
|
# delay a moment before removing it fully because we did have some
|
|
# issues with ext4 before the xatts-in-leveldb work, and it seemed
|
|
# that user_xattr helped
|
|
ext4='noatime,user_xattr',
|
|
xfs='noatime,inode64',
|
|
)
|
|
|
|
MKFS_ARGS = dict(
|
|
btrfs=[
|
|
# btrfs requires -f, for the same reason as xfs (see comment below)
|
|
'-f',
|
|
'-m', 'single',
|
|
'-l', '32768',
|
|
'-n', '32768',
|
|
],
|
|
xfs=[
|
|
# xfs insists on not overwriting previous fs; even if we wipe
|
|
# partition table, we often recreate it exactly the same way,
|
|
# so we'll see ghosts of filesystems past
|
|
'-f',
|
|
'-i', 'size=2048',
|
|
],
|
|
)
|
|
|
|
INIT_SYSTEMS = [
|
|
'upstart',
|
|
'sysvinit',
|
|
'systemd',
|
|
'auto',
|
|
'none',
|
|
]
|
|
|
|
STATEDIR = '/var/lib/ceph'
|
|
|
|
SYSCONFDIR = '/etc/ceph'
|
|
|
|
prepare_lock = None
|
|
activate_lock = None
|
|
SUPPRESS_PREFIX = None
|
|
|
|
# only warn once about some things
|
|
warned_about = {}
|
|
|
|
# Nuke the TERM variable to avoid confusing any subprocesses we call.
|
|
# For example, libreadline will print weird control sequences for some
|
|
# TERM values.
|
|
if 'TERM' in os.environ:
|
|
del os.environ['TERM']
|
|
|
|
LOG_NAME = __name__
|
|
if LOG_NAME == '__main__':
|
|
LOG_NAME = os.path.basename(sys.argv[0])
|
|
LOG = logging.getLogger(LOG_NAME)
|
|
|
|
# Allow user-preferred values for subprocess user and group
|
|
CEPH_PREF_USER = None
|
|
CEPH_PREF_GROUP = None
|
|
|
|
|
|
class filelock(object):
|
|
def __init__(self, fn):
|
|
self.fn = fn
|
|
self.fd = None
|
|
|
|
def acquire(self):
|
|
assert not self.fd
|
|
self.fd = file(self.fn, 'w')
|
|
fcntl.lockf(self.fd, fcntl.LOCK_EX)
|
|
|
|
def release(self):
|
|
assert self.fd
|
|
fcntl.lockf(self.fd, fcntl.LOCK_UN)
|
|
self.fd = None
|
|
|
|
|
|
class Error(Exception):
|
|
"""
|
|
Error
|
|
"""
|
|
|
|
def __str__(self):
|
|
doc = self.__doc__.strip()
|
|
return ': '.join([doc] + [str(a) for a in self.args])
|
|
|
|
|
|
class MountError(Error):
|
|
"""
|
|
Mounting filesystem failed
|
|
"""
|
|
|
|
|
|
class UnmountError(Error):
|
|
"""
|
|
Unmounting filesystem failed
|
|
"""
|
|
|
|
|
|
class BadMagicError(Error):
|
|
"""
|
|
Does not look like a Ceph OSD, or incompatible version
|
|
"""
|
|
|
|
|
|
class TruncatedLineError(Error):
|
|
"""
|
|
Line is truncated
|
|
"""
|
|
|
|
|
|
class TooManyLinesError(Error):
|
|
"""
|
|
Too many lines
|
|
"""
|
|
|
|
|
|
class FilesystemTypeError(Error):
|
|
"""
|
|
Cannot discover filesystem type
|
|
"""
|
|
|
|
|
|
class CephDiskException(Exception):
|
|
"""
|
|
A base exception for ceph-disk to provide custom (ad-hoc) messages that
|
|
will be caught and dealt with when main() is executed
|
|
"""
|
|
pass
|
|
|
|
|
|
class ExecutableNotFound(CephDiskException):
|
|
"""
|
|
Exception to report on executables not available in PATH
|
|
"""
|
|
pass
|
|
|
|
|
|
def is_systemd():
|
|
"""
|
|
Detect whether systemd is running
|
|
"""
|
|
with file('/proc/1/comm', 'rb') as i:
|
|
for line in i:
|
|
if 'systemd' in line:
|
|
return True
|
|
return False
|
|
|
|
|
|
def is_upstart():
|
|
"""
|
|
Detect whether upstart is running
|
|
"""
|
|
(out, err, _) = command(['init', '--version'])
|
|
if 'upstart' in out:
|
|
return True
|
|
return False
|
|
|
|
|
|
def maybe_mkdir(*a, **kw):
|
|
"""
|
|
Creates a new directory if it doesn't exist, removes
|
|
existing symlink before creating the directory.
|
|
"""
|
|
# remove any symlink, if it is there..
|
|
if os.path.exists(*a) and stat.S_ISLNK(os.lstat(*a).st_mode):
|
|
LOG.debug('Removing old symlink at %s', *a)
|
|
os.unlink(*a)
|
|
try:
|
|
os.mkdir(*a, **kw)
|
|
except OSError, e:
|
|
if e.errno == errno.EEXIST:
|
|
pass
|
|
else:
|
|
raise
|
|
|
|
|
|
def which(executable):
|
|
"""find the location of an executable"""
|
|
if 'PATH' in os.environ:
|
|
envpath = os.environ['PATH']
|
|
else:
|
|
envpath = os.defpath
|
|
PATH = envpath.split(os.pathsep)
|
|
|
|
locations = PATH + [
|
|
'/usr/local/bin',
|
|
'/bin',
|
|
'/usr/bin',
|
|
'/usr/local/sbin',
|
|
'/usr/sbin',
|
|
'/sbin',
|
|
]
|
|
|
|
for location in locations:
|
|
executable_path = os.path.join(location, executable)
|
|
if (os.path.isfile(executable_path) and
|
|
os.access(executable_path, os.X_OK)):
|
|
return executable_path
|
|
|
|
|
|
def _get_command_executable(arguments):
|
|
"""
|
|
Return the full path for an executable, raise if the executable is not
|
|
found. If the executable has already a full path do not perform any checks.
|
|
"""
|
|
if arguments[0].startswith('/'): # an absolute path
|
|
return arguments
|
|
executable = which(arguments[0])
|
|
if not executable:
|
|
command_msg = 'Could not run command: %s' % ' '.join(arguments)
|
|
executable_msg = '%s not in path.' % arguments[0]
|
|
raise ExecutableNotFound('%s %s' % (executable_msg, command_msg))
|
|
|
|
# swap the old executable for the new one
|
|
arguments[0] = executable
|
|
return arguments
|
|
|
|
|
|
def command(arguments, **kwargs):
|
|
"""
|
|
Safely execute a ``subprocess.Popen`` call making sure that the
|
|
executable exists and raising a helpful error message
|
|
if it does not.
|
|
|
|
.. note:: This should be the preferred way of calling ``subprocess.Popen``
|
|
since it provides the caller with the safety net of making sure that
|
|
executables *will* be found and will error nicely otherwise.
|
|
|
|
This returns the output of the command and the return code of the
|
|
process in a tuple: (output, returncode).
|
|
"""
|
|
arguments = _get_command_executable(arguments)
|
|
LOG.info('Running command: %s' % ' '.join(arguments))
|
|
process = subprocess.Popen(
|
|
arguments,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
**kwargs)
|
|
out, err = process.communicate()
|
|
return out, err, process.returncode
|
|
|
|
|
|
def command_check_call(arguments):
|
|
"""
|
|
Safely execute a ``subprocess.check_call`` call making sure that the
|
|
executable exists and raising a helpful error message if it does not.
|
|
|
|
.. note:: This should be the preferred way of calling
|
|
``subprocess.check_call`` since it provides the caller with the safety net
|
|
of making sure that executables *will* be found and will error nicely
|
|
otherwise.
|
|
"""
|
|
arguments = _get_command_executable(arguments)
|
|
LOG.info('Running command: %s', ' '.join(arguments))
|
|
return subprocess.check_call(arguments)
|
|
|
|
|
|
def platform_distro():
|
|
"""
|
|
Returns a normalized, lower case string without any leading nor trailing
|
|
whitespace that represents the distribution name of the current machine.
|
|
"""
|
|
distro = platform_information()[0] or ''
|
|
return distro.strip().lower()
|
|
|
|
|
|
def platform_information():
|
|
distro, release, codename = platform.linux_distribution()
|
|
# this could be an empty string in Debian
|
|
if not codename and 'debian' in distro.lower():
|
|
debian_codenames = {
|
|
'8': 'jessie',
|
|
'7': 'wheezy',
|
|
'6': 'squeeze',
|
|
}
|
|
major_version = release.split('.')[0]
|
|
codename = debian_codenames.get(major_version, '')
|
|
|
|
# In order to support newer jessie/sid or wheezy/sid strings we test
|
|
# this if sid is buried in the minor, we should use sid anyway.
|
|
if not codename and '/' in release:
|
|
major, minor = release.split('/')
|
|
if minor == 'sid':
|
|
codename = minor
|
|
else:
|
|
codename = major
|
|
|
|
return (
|
|
str(distro).strip(),
|
|
str(release).strip(),
|
|
str(codename).strip()
|
|
)
|
|
|
|
#
|
|
# An alternative block_path implementation would be
|
|
#
|
|
# name = basename(dev)
|
|
# return /sys/devices/virtual/block/$name
|
|
#
|
|
# It is however more fragile because it relies on the fact
|
|
# that the basename of the device the user will use always
|
|
# matches the one the driver will use. On Ubuntu 14.04, for
|
|
# instance, when multipath creates a partition table on
|
|
#
|
|
# /dev/mapper/353333330000007d0 -> ../dm-0
|
|
#
|
|
# it will create partition devices named
|
|
#
|
|
# /dev/mapper/353333330000007d0-part1
|
|
#
|
|
# which is the same device as /dev/dm-1 but not a symbolic
|
|
# link to it:
|
|
#
|
|
# ubuntu@other:~$ ls -l /dev/mapper /dev/dm-1
|
|
# brw-rw---- 1 root disk 252, 1 Aug 15 17:52 /dev/dm-1
|
|
# lrwxrwxrwx 1 root root 7 Aug 15 17:52 353333330000007d0 -> ../dm-0
|
|
# brw-rw---- 1 root disk 252, 1 Aug 15 17:52 353333330000007d0-part1
|
|
#
|
|
# Using the basename in this case fails.
|
|
#
|
|
|
|
|
|
def block_path(dev):
|
|
path = os.path.realpath(dev)
|
|
rdev = os.stat(path).st_rdev
|
|
(M, m) = (os.major(rdev), os.minor(rdev))
|
|
return "{sysfs}/dev/block/{M}:{m}".format(sysfs=SYSFS, M=M, m=m)
|
|
|
|
|
|
def get_dm_uuid(dev):
|
|
uuid_path = os.path.join(block_path(dev), 'dm', 'uuid')
|
|
LOG.debug("get_dm_uuid " + dev + " uuid path is " + uuid_path)
|
|
if not os.path.exists(uuid_path):
|
|
return False
|
|
uuid = open(uuid_path, 'r').read()
|
|
LOG.debug("get_dm_uuid " + dev + " uuid is " + uuid)
|
|
return uuid
|
|
|
|
|
|
def is_mpath(dev):
|
|
"""
|
|
True if the path is managed by multipath
|
|
"""
|
|
uuid = get_dm_uuid(dev)
|
|
return (uuid and
|
|
(re.match('part\d+-mpath-', uuid) or
|
|
re.match('mpath-', uuid)))
|
|
|
|
|
|
def get_dev_name(path):
|
|
"""
|
|
get device name from path. e.g.::
|
|
|
|
/dev/sda -> sdas, /dev/cciss/c0d1 -> cciss!c0d1
|
|
|
|
a device "name" is something like::
|
|
|
|
sdb
|
|
cciss!c0d1
|
|
|
|
"""
|
|
assert path.startswith('/dev/')
|
|
base = path[5:]
|
|
return base.replace('/', '!')
|
|
|
|
|
|
def get_dev_path(name):
|
|
"""
|
|
get a path (/dev/...) from a name (cciss!c0d1)
|
|
a device "path" is something like::
|
|
|
|
/dev/sdb
|
|
/dev/cciss/c0d1
|
|
|
|
"""
|
|
return '/dev/' + name.replace('!', '/')
|
|
|
|
|
|
def get_dev_relpath(name):
|
|
"""
|
|
get a relative path to /dev from a name (cciss!c0d1)
|
|
"""
|
|
return name.replace('!', '/')
|
|
|
|
|
|
def get_dev_size(dev, size='megabytes'):
|
|
"""
|
|
Attempt to get the size of a device so that we can prevent errors
|
|
from actions to devices that are smaller, and improve error reporting.
|
|
|
|
Because we want to avoid breakage in case this approach is not robust, we
|
|
will issue a warning if we failed to get the size.
|
|
|
|
:param size: bytes or megabytes
|
|
:param dev: the device to calculate the size
|
|
"""
|
|
fd = os.open(dev, os.O_RDONLY)
|
|
dividers = {'bytes': 1, 'megabytes': 1024 * 1024}
|
|
try:
|
|
device_size = os.lseek(fd, 0, os.SEEK_END)
|
|
divider = dividers.get(size, 1024 * 1024) # default to megabytes
|
|
return device_size / divider
|
|
except Exception as error:
|
|
LOG.warning('failed to get size of %s: %s' % (dev, str(error)))
|
|
finally:
|
|
os.close(fd)
|
|
|
|
|
|
def get_partition_mpath(dev, pnum):
|
|
part_re = "part{pnum}-mpath-".format(pnum=pnum)
|
|
partitions = list_partitions_mpath(dev, part_re)
|
|
if partitions:
|
|
return partitions[0]
|
|
else:
|
|
return None
|
|
|
|
|
|
def get_partition_dev(dev, pnum):
|
|
"""
|
|
get the device name for a partition
|
|
|
|
assume that partitions are named like the base dev,
|
|
with a number, and optionally
|
|
some intervening characters (like 'p'). e.g.,
|
|
|
|
sda 1 -> sda1
|
|
cciss/c0d1 1 -> cciss!c0d1p1
|
|
"""
|
|
partname = None
|
|
if is_mpath(dev):
|
|
partname = get_partition_mpath(dev, pnum)
|
|
else:
|
|
name = get_dev_name(os.path.realpath(dev))
|
|
for f in os.listdir(os.path.join('/sys/block', name)):
|
|
if f.startswith(name) and f.endswith(str(pnum)):
|
|
# we want the shortest name that starts with the base name
|
|
# and ends with the partition number
|
|
if not partname or len(f) < len(partname):
|
|
partname = f
|
|
if partname:
|
|
return get_dev_path(partname)
|
|
else:
|
|
raise Error('partition %d for %s does not appear to exist' %
|
|
(pnum, dev))
|
|
|
|
|
|
def list_all_partitions():
|
|
"""
|
|
Return a list of devices and partitions
|
|
"""
|
|
names = os.listdir('/sys/block')
|
|
dev_part_list = {}
|
|
for name in names:
|
|
# /dev/fd0 may hang http://tracker.ceph.com/issues/6827
|
|
if re.match(r'^fd\d$', name):
|
|
continue
|
|
dev_part_list[name] = list_partitions(get_dev_path(name))
|
|
return dev_part_list
|
|
|
|
|
|
def list_partitions(dev):
|
|
dev = os.path.realpath(dev)
|
|
if is_mpath(dev):
|
|
return list_partitions_mpath(dev)
|
|
else:
|
|
return list_partitions_device(dev)
|
|
|
|
|
|
def list_partitions_mpath(dev, part_re="part\d+-mpath-"):
|
|
p = block_path(dev)
|
|
partitions = []
|
|
holders = os.path.join(p, 'holders')
|
|
for holder in os.listdir(holders):
|
|
uuid_path = os.path.join(holders, holder, 'dm', 'uuid')
|
|
uuid = open(uuid_path, 'r').read()
|
|
LOG.debug("list_partitions_mpath: " + uuid_path + " uuid = " + uuid)
|
|
if re.match(part_re, uuid):
|
|
partitions.append(holder)
|
|
return partitions
|
|
|
|
|
|
def list_partitions_device(dev):
|
|
"""
|
|
Return a list of partitions on the given device name
|
|
"""
|
|
partitions = []
|
|
basename = get_dev_name(dev)
|
|
for name in os.listdir(block_path(dev)):
|
|
if name.startswith(basename):
|
|
partitions.append(name)
|
|
return partitions
|
|
|
|
|
|
def get_partition_base(dev):
|
|
"""
|
|
Get the base device for a partition
|
|
"""
|
|
dev = os.path.realpath(dev)
|
|
if not stat.S_ISBLK(os.lstat(dev).st_mode):
|
|
raise Error('not a block device', dev)
|
|
|
|
name = get_dev_name(dev)
|
|
if os.path.exists(os.path.join('/sys/block', name)):
|
|
raise Error('not a partition', dev)
|
|
|
|
# find the base
|
|
for basename in os.listdir('/sys/block'):
|
|
if os.path.exists(os.path.join('/sys/block', basename, name)):
|
|
return get_dev_path(basename)
|
|
raise Error('no parent device for partition', dev)
|
|
|
|
|
|
def is_partition_mpath(dev):
|
|
uuid = get_dm_uuid(dev)
|
|
return bool(re.match('part\d+-mpath-', uuid))
|
|
|
|
|
|
def partnum_mpath(dev):
|
|
uuid = get_dm_uuid(dev)
|
|
return re.findall('part(\d+)-mpath-', uuid)[0]
|
|
|
|
|
|
def get_partition_base_mpath(dev):
|
|
slave_path = os.path.join(block_path(dev), 'slaves')
|
|
slaves = os.listdir(slave_path)
|
|
assert slaves
|
|
name_path = os.path.join(slave_path, slaves[0], 'dm', 'name')
|
|
name = open(name_path, 'r').read().strip()
|
|
return os.path.join('/dev/mapper', name)
|
|
|
|
|
|
def is_partition(dev):
|
|
"""
|
|
Check whether a given device path is a partition or a full disk.
|
|
"""
|
|
if is_mpath(dev):
|
|
return is_partition_mpath(dev)
|
|
|
|
dev = os.path.realpath(dev)
|
|
st = os.lstat(dev)
|
|
if not stat.S_ISBLK(st.st_mode):
|
|
raise Error('not a block device', dev)
|
|
|
|
name = get_dev_name(dev)
|
|
if os.path.exists(os.path.join('/sys/block', name)):
|
|
return False
|
|
|
|
# make sure it is a partition of something else
|
|
major = os.major(st.st_rdev)
|
|
minor = os.minor(st.st_rdev)
|
|
if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)):
|
|
return True
|
|
|
|
raise Error('not a disk or partition', dev)
|
|
|
|
|
|
def is_mounted(dev):
|
|
"""
|
|
Check if the given device is mounted.
|
|
"""
|
|
dev = os.path.realpath(dev)
|
|
with file('/proc/mounts', 'rb') as proc_mounts:
|
|
for line in proc_mounts:
|
|
fields = line.split()
|
|
if len(fields) < 3:
|
|
continue
|
|
mounts_dev = fields[0]
|
|
path = fields[1]
|
|
if mounts_dev.startswith('/') and os.path.exists(mounts_dev):
|
|
mounts_dev = os.path.realpath(mounts_dev)
|
|
if mounts_dev == dev:
|
|
return path
|
|
return None
|
|
|
|
|
|
def is_held(dev):
|
|
"""
|
|
Check if a device is held by another device (e.g., a dm-crypt mapping)
|
|
"""
|
|
assert os.path.exists(dev)
|
|
if is_mpath(dev):
|
|
return []
|
|
|
|
dev = os.path.realpath(dev)
|
|
base = get_dev_name(dev)
|
|
|
|
# full disk?
|
|
directory = '/sys/block/{base}/holders'.format(base=base)
|
|
if os.path.exists(directory):
|
|
return os.listdir(directory)
|
|
|
|
# partition?
|
|
part = base
|
|
while len(base):
|
|
directory = '/sys/block/{base}/{part}/holders'.format(
|
|
part=part, base=base)
|
|
if os.path.exists(directory):
|
|
return os.listdir(directory)
|
|
base = base[:-1]
|
|
return []
|
|
|
|
|
|
def verify_not_in_use(dev, check_partitions=False):
|
|
"""
|
|
Verify if a given device (path) is in use (e.g. mounted or
|
|
in use by device-mapper).
|
|
|
|
:raises: Error if device is in use.
|
|
"""
|
|
assert os.path.exists(dev)
|
|
if is_mounted(dev):
|
|
raise Error('Device is mounted', dev)
|
|
holders = is_held(dev)
|
|
if holders:
|
|
raise Error('Device %s is in use by a device-mapper '
|
|
'mapping (dm-crypt?)' % dev, ','.join(holders))
|
|
|
|
if check_partitions and not is_partition(dev):
|
|
for partname in list_partitions(dev):
|
|
partition = get_dev_path(partname)
|
|
if is_mounted(partition):
|
|
raise Error('Device is mounted', partition)
|
|
holders = is_held(partition)
|
|
if holders:
|
|
raise Error('Device %s is in use by a device-mapper '
|
|
'mapping (dm-crypt?)'
|
|
% partition, ','.join(holders))
|
|
|
|
|
|
def must_be_one_line(line):
|
|
"""
|
|
Checks if given line is really one single line.
|
|
|
|
:raises: TruncatedLineError or TooManyLinesError
|
|
:return: Content of the line, or None if line isn't valid.
|
|
"""
|
|
if line[-1:] != '\n':
|
|
raise TruncatedLineError(line)
|
|
line = line[:-1]
|
|
if '\n' in line:
|
|
raise TooManyLinesError(line)
|
|
return line
|
|
|
|
|
|
def read_one_line(parent, name):
|
|
"""
|
|
Read a file whose sole contents are a single line.
|
|
|
|
Strips the newline.
|
|
|
|
:return: Contents of the line, or None if file did not exist.
|
|
"""
|
|
path = os.path.join(parent, name)
|
|
try:
|
|
line = file(path, 'rb').read()
|
|
except IOError as e:
|
|
if e.errno == errno.ENOENT:
|
|
return None
|
|
else:
|
|
raise
|
|
|
|
try:
|
|
line = must_be_one_line(line)
|
|
except (TruncatedLineError, TooManyLinesError) as e:
|
|
raise Error(
|
|
'File is corrupt: {path}: {msg}'.format(
|
|
path=path,
|
|
msg=e,
|
|
)
|
|
)
|
|
return line
|
|
|
|
|
|
def write_one_line(parent, name, text):
|
|
"""
|
|
Write a file whose sole contents are a single line.
|
|
|
|
Adds a newline.
|
|
"""
|
|
path = os.path.join(parent, name)
|
|
tmp = '{path}.{pid}.tmp'.format(path=path, pid=os.getpid())
|
|
with file(tmp, 'wb') as tmp_file:
|
|
tmp_file.write(text + '\n')
|
|
os.fsync(tmp_file.fileno())
|
|
path_set_context(tmp)
|
|
os.rename(tmp, path)
|
|
|
|
|
|
def init_get():
|
|
"""
|
|
Get a init system using 'ceph-detect-init'
|
|
"""
|
|
init = _check_output(
|
|
args=[
|
|
'ceph-detect-init',
|
|
'--default', 'sysvinit',
|
|
],
|
|
)
|
|
init = must_be_one_line(init)
|
|
return init
|
|
|
|
|
|
def check_osd_magic(path):
|
|
"""
|
|
Check that this path has the Ceph OSD magic.
|
|
|
|
:raises: BadMagicError if this does not look like a Ceph OSD data
|
|
dir.
|
|
"""
|
|
magic = read_one_line(path, 'magic')
|
|
if magic is None:
|
|
# probably not mkfs'ed yet
|
|
raise BadMagicError(path)
|
|
if magic != CEPH_OSD_ONDISK_MAGIC:
|
|
raise BadMagicError(path)
|
|
|
|
|
|
def check_osd_id(osd_id):
|
|
"""
|
|
Ensures osd id is numeric.
|
|
"""
|
|
if not re.match(r'^[0-9]+$', osd_id):
|
|
raise Error('osd id is not numeric', osd_id)
|
|
|
|
|
|
def allocate_osd_id(
|
|
cluster,
|
|
fsid,
|
|
keyring,
|
|
):
|
|
"""
|
|
Accocates an OSD id on the given cluster.
|
|
|
|
:raises: Error if the call to allocate the OSD id fails.
|
|
:return: The allocated OSD id.
|
|
"""
|
|
|
|
LOG.debug('Allocating OSD id...')
|
|
try:
|
|
osd_id = _check_output(
|
|
args=[
|
|
'ceph',
|
|
'--cluster', cluster,
|
|
'--name', 'client.bootstrap-osd',
|
|
'--keyring', keyring,
|
|
'osd', 'create', '--concise',
|
|
fsid,
|
|
],
|
|
)
|
|
except subprocess.CalledProcessError as e:
|
|
raise Error('ceph osd create failed', e, e.output)
|
|
osd_id = must_be_one_line(osd_id)
|
|
check_osd_id(osd_id)
|
|
return osd_id
|
|
|
|
|
|
def get_osd_id(path):
|
|
"""
|
|
Gets the OSD id of the OSD at the given path.
|
|
"""
|
|
osd_id = read_one_line(path, 'whoami')
|
|
if osd_id is not None:
|
|
check_osd_id(osd_id)
|
|
return osd_id
|
|
|
|
|
|
def get_ceph_user():
|
|
global CEPH_PREF_USER
|
|
|
|
if CEPH_PREF_USER is not None:
|
|
try:
|
|
pwd.getpwnam(CEPH_PREF_USER)
|
|
return CEPH_PREF_USER
|
|
except KeyError:
|
|
print "No such user: " + CEPH_PREF_USER
|
|
sys.exit(2)
|
|
else:
|
|
try:
|
|
pwd.getpwnam('ceph')
|
|
return 'ceph'
|
|
except KeyError:
|
|
return 'root'
|
|
|
|
|
|
def get_ceph_group():
|
|
global CEPH_PREF_GROUP
|
|
|
|
if CEPH_PREF_GROUP is not None:
|
|
try:
|
|
grp.getgrnam(CEPH_PREF_GROUP)
|
|
return CEPH_PREF_GROUP
|
|
except KeyError:
|
|
print "No such group: " + CEPH_PREF_GROUP
|
|
sys.exit(2)
|
|
else:
|
|
try:
|
|
grp.getgrnam('ceph')
|
|
return 'ceph'
|
|
except KeyError:
|
|
return 'root'
|
|
|
|
|
|
def path_set_context(path):
|
|
# restore selinux context to default policy values
|
|
if which('restorecon'):
|
|
command(['restorecon', '-R', path])
|
|
|
|
# if ceph user exists, set owner to ceph
|
|
if get_ceph_user() == 'ceph':
|
|
command(['chown', '-R', 'ceph:ceph', path])
|
|
|
|
|
|
def _check_output(args=None, **kwargs):
|
|
out, err, ret = command(args, **kwargs)
|
|
if ret:
|
|
cmd = args[0]
|
|
error = subprocess.CalledProcessError(ret, cmd)
|
|
error.output = out + err
|
|
raise error
|
|
return out
|
|
|
|
|
|
def get_conf(cluster, variable):
|
|
"""
|
|
Get the value of the given configuration variable from the
|
|
cluster.
|
|
|
|
:raises: Error if call to ceph-conf fails.
|
|
:return: The variable value or None.
|
|
"""
|
|
try:
|
|
out, err, ret = command(
|
|
[
|
|
'ceph-conf',
|
|
'--cluster={cluster}'.format(
|
|
cluster=cluster,
|
|
),
|
|
'--name=osd.',
|
|
'--lookup',
|
|
variable,
|
|
],
|
|
close_fds=True,
|
|
)
|
|
except OSError as e:
|
|
raise Error('error executing ceph-conf', e, err)
|
|
if ret == 1:
|
|
# config entry not found
|
|
return None
|
|
elif ret != 0:
|
|
raise Error('getting variable from configuration failed')
|
|
value = out.split('\n', 1)[0]
|
|
# don't differentiate between "var=" and no var set
|
|
if not value:
|
|
return None
|
|
return value
|
|
|
|
|
|
def get_conf_with_default(cluster, variable):
|
|
"""
|
|
Get a config value that is known to the C++ code.
|
|
|
|
This will fail if called on variables that are not defined in
|
|
common config options.
|
|
"""
|
|
try:
|
|
out = _check_output(
|
|
args=[
|
|
'ceph-osd',
|
|
'--cluster={cluster}'.format(
|
|
cluster=cluster,
|
|
),
|
|
'--show-config-value={variable}'.format(
|
|
variable=variable,
|
|
),
|
|
],
|
|
close_fds=True,
|
|
)
|
|
except subprocess.CalledProcessError as e:
|
|
raise Error(
|
|
'getting variable from configuration failed',
|
|
e,
|
|
)
|
|
|
|
value = str(out).split('\n', 1)[0]
|
|
return value
|
|
|
|
|
|
def get_fsid(cluster):
|
|
"""
|
|
Get the fsid of the cluster.
|
|
|
|
:return: The fsid or raises Error.
|
|
"""
|
|
fsid = get_conf_with_default(cluster=cluster, variable='fsid')
|
|
if fsid is None:
|
|
raise Error('getting cluster uuid from configuration failed')
|
|
return fsid.lower()
|
|
|
|
|
|
def get_dmcrypt_key_path(
|
|
_uuid,
|
|
key_dir,
|
|
luks
|
|
):
|
|
"""
|
|
Get path to dmcrypt key file.
|
|
|
|
:return: Path to the dmcrypt key file, callers should check for existence.
|
|
"""
|
|
if luks:
|
|
path = os.path.join(key_dir, _uuid + ".luks.key")
|
|
else:
|
|
path = os.path.join(key_dir, _uuid)
|
|
|
|
return path
|
|
|
|
|
|
def get_or_create_dmcrypt_key(
|
|
_uuid,
|
|
key_dir,
|
|
key_size,
|
|
luks
|
|
):
|
|
"""
|
|
Get path to existing dmcrypt key or create a new key file.
|
|
|
|
:return: Path to the dmcrypt key file.
|
|
"""
|
|
path = get_dmcrypt_key_path(_uuid, key_dir, luks)
|
|
if os.path.exists(path):
|
|
return path
|
|
|
|
# make a new key
|
|
try:
|
|
if not os.path.exists(key_dir):
|
|
os.makedirs(key_dir, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
with file('/dev/urandom', 'rb') as i:
|
|
key = i.read(key_size / 8)
|
|
fd = os.open(path, os.O_WRONLY | os.O_CREAT,
|
|
stat.S_IRUSR | stat.S_IWUSR)
|
|
assert os.write(fd, key) == len(key)
|
|
os.close(fd)
|
|
return path
|
|
except:
|
|
raise Error('unable to read or create dm-crypt key', path)
|
|
|
|
|
|
def _dmcrypt_map(
|
|
rawdev,
|
|
keypath,
|
|
_uuid,
|
|
cryptsetup_parameters,
|
|
luks,
|
|
format_dev=False,
|
|
):
|
|
"""
|
|
Maps a device to a dmcrypt device.
|
|
|
|
:return: Path to the dmcrypt device.
|
|
"""
|
|
dev = '/dev/mapper/' + _uuid
|
|
luksFormat_args = [
|
|
'cryptsetup',
|
|
'--batch-mode',
|
|
'--key-file',
|
|
keypath,
|
|
'luksFormat',
|
|
rawdev,
|
|
] + cryptsetup_parameters
|
|
|
|
luksOpen_args = [
|
|
'cryptsetup',
|
|
'--key-file',
|
|
keypath,
|
|
'luksOpen',
|
|
rawdev,
|
|
_uuid,
|
|
]
|
|
|
|
create_args = [
|
|
'cryptsetup',
|
|
'--key-file',
|
|
keypath,
|
|
'create',
|
|
_uuid,
|
|
rawdev,
|
|
] + cryptsetup_parameters
|
|
|
|
try:
|
|
if luks:
|
|
if format_dev:
|
|
command_check_call(luksFormat_args)
|
|
command_check_call(luksOpen_args)
|
|
else:
|
|
# Plain mode has no format function, nor any validation
|
|
# that the key is correct.
|
|
command_check_call(create_args)
|
|
# set proper ownership of mapped device
|
|
command_check_call(['chown', 'ceph:ceph', dev])
|
|
return dev
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
raise Error('unable to map device', rawdev, e)
|
|
|
|
|
|
def dmcrypt_unmap(
|
|
_uuid
|
|
):
|
|
"""
|
|
Removes the dmcrypt device with the given UUID.
|
|
"""
|
|
retries = 0
|
|
while True:
|
|
try:
|
|
command_check_call(['cryptsetup', 'remove', _uuid])
|
|
break
|
|
except subprocess.CalledProcessError as e:
|
|
if retries == 10:
|
|
raise Error('unable to unmap device', _uuid, e)
|
|
else:
|
|
time.sleep(0.5 + retries * 1.0)
|
|
retries += 1
|
|
|
|
|
|
def mount(
|
|
dev,
|
|
fstype,
|
|
options,
|
|
):
|
|
"""
|
|
Mounts a device with given filessystem type and
|
|
mount options to a tempfile path under /var/lib/ceph/tmp.
|
|
"""
|
|
# sanity check: none of the arguments are None
|
|
if dev is None:
|
|
raise ValueError('dev may not be None')
|
|
if fstype is None:
|
|
raise ValueError('fstype may not be None')
|
|
|
|
# pick best-of-breed mount options based on fs type
|
|
if options is None:
|
|
options = MOUNT_OPTIONS.get(fstype, '')
|
|
|
|
# mount
|
|
path = tempfile.mkdtemp(
|
|
prefix='mnt.',
|
|
dir=STATEDIR + '/tmp',
|
|
)
|
|
try:
|
|
LOG.debug('Mounting %s on %s with options %s', dev, path, options)
|
|
command_check_call(
|
|
[
|
|
'mount',
|
|
'-t', fstype,
|
|
'-o', options,
|
|
'--',
|
|
dev,
|
|
path,
|
|
],
|
|
)
|
|
if which('restorecon'):
|
|
command(
|
|
[
|
|
'restorecon',
|
|
path,
|
|
],
|
|
)
|
|
except subprocess.CalledProcessError as e:
|
|
try:
|
|
os.rmdir(path)
|
|
except (OSError, IOError):
|
|
pass
|
|
raise MountError(e)
|
|
|
|
return path
|
|
|
|
|
|
def unmount(
|
|
path,
|
|
):
|
|
"""
|
|
Unmount and removes the given mount point.
|
|
"""
|
|
retries = 0
|
|
while True:
|
|
try:
|
|
LOG.debug('Unmounting %s', path)
|
|
command_check_call(
|
|
[
|
|
'/bin/umount',
|
|
'--',
|
|
path,
|
|
],
|
|
)
|
|
break
|
|
except subprocess.CalledProcessError as e:
|
|
# on failure, retry 3 times with incremental backoff
|
|
if retries == 3:
|
|
raise UnmountError(e)
|
|
else:
|
|
time.sleep(0.5 + retries * 1.0)
|
|
retries += 1
|
|
|
|
os.rmdir(path)
|
|
|
|
|
|
###########################################
|
|
|
|
def extract_parted_partition_numbers(partitions):
|
|
numbers_as_strings = re.findall('^\d+', partitions, re.MULTILINE)
|
|
return map(int, numbers_as_strings)
|
|
|
|
|
|
def get_free_partition_index(dev):
|
|
"""
|
|
Get the next free partition index on a given device.
|
|
|
|
:return: Index number (> 1 if there is already a partition on the device)
|
|
or 1 if there is no partition table.
|
|
"""
|
|
try:
|
|
lines = _check_output(
|
|
args=[
|
|
'parted',
|
|
'--machine',
|
|
'--',
|
|
dev,
|
|
'print',
|
|
],
|
|
)
|
|
except subprocess.CalledProcessError as e:
|
|
LOG.info('cannot read partition index; assume it '
|
|
'isn\'t present\n (Error: %s)' % e)
|
|
return 1
|
|
|
|
if not lines:
|
|
raise Error('parted failed to output anything')
|
|
LOG.debug('get_free_partition_index: analyzing ' + lines)
|
|
if ('CHS;' not in lines and
|
|
'CYL;' not in lines and
|
|
'BYT;' not in lines):
|
|
raise Error('parted output expected to contain one of ' +
|
|
'CHH; CYL; or BYT; : ' + lines)
|
|
if os.path.realpath(dev) not in lines:
|
|
raise Error('parted output expected to contain ' + dev + ': ' + lines)
|
|
_, partitions = lines.split(os.path.realpath(dev))
|
|
partition_numbers = extract_parted_partition_numbers(partitions)
|
|
if partition_numbers:
|
|
return max(partition_numbers) + 1
|
|
else:
|
|
return 1
|
|
|
|
|
|
def check_journal_reqs(args):
|
|
_, _, allows_journal = command([
|
|
'ceph-osd', '--check-allows-journal',
|
|
'-i', '0',
|
|
'--cluster', args.cluster,
|
|
])
|
|
_, _, wants_journal = command([
|
|
'ceph-osd', '--check-wants-journal',
|
|
'-i', '0',
|
|
'--cluster', args.cluster,
|
|
])
|
|
_, _, needs_journal = command([
|
|
'ceph-osd', '--check-needs-journal',
|
|
'-i', '0',
|
|
'--cluster', args.cluster,
|
|
])
|
|
return (not allows_journal, not wants_journal, not needs_journal)
|
|
|
|
|
|
def update_partition(dev, description):
|
|
"""
|
|
Must be called after modifying a partition table so the kernel
|
|
know about the change and fire udev events accordingly. A side
|
|
effect of partprobe is to remove partitions and add them again.
|
|
The first udevadm settle waits for ongoing udev events to
|
|
complete, just in case one of them rely on an existing partition
|
|
on dev. The second udevadm settle guarantees to the caller that
|
|
all udev events related to the partition table change have been
|
|
processed, i.e. the 95-ceph-osd.rules actions and mode changes,
|
|
group changes etc. are complete.
|
|
"""
|
|
LOG.debug('Calling partprobe on %s device %s', description, dev)
|
|
partprobe_ok = False
|
|
error = 'unknown error'
|
|
for i in (1, 2, 3, 4, 5):
|
|
command_check_call(['udevadm', 'settle', '--timeout=600'])
|
|
try:
|
|
_check_output(['partprobe', dev])
|
|
partprobe_ok = True
|
|
break
|
|
except subprocess.CalledProcessError as e:
|
|
error = e.output
|
|
if ('unable to inform the kernel' not in error and
|
|
'Device or resource busy' not in error):
|
|
raise
|
|
LOG.debug('partprobe %s failed : %s (ignored, waiting 60s)'
|
|
% (dev, error))
|
|
time.sleep(60)
|
|
if not partprobe_ok:
|
|
raise Error('partprobe %s failed : %s' % (dev, error))
|
|
command_check_call(['udevadm', 'settle', '--timeout=600'])
|
|
|
|
|
|
def zap(dev):
|
|
"""
|
|
Destroy the partition table and content of a given disk.
|
|
"""
|
|
dev = os.path.realpath(dev)
|
|
dmode = os.stat(dev).st_mode
|
|
if not stat.S_ISBLK(dmode) or is_partition(dev):
|
|
raise Error('not full block device; cannot zap', dev)
|
|
try:
|
|
LOG.debug('Zapping partition table on %s', dev)
|
|
|
|
# try to wipe out any GPT partition table backups. sgdisk
|
|
# isn't too thorough.
|
|
lba_size = 4096
|
|
size = 33 * lba_size
|
|
with file(dev, 'wb') as dev_file:
|
|
dev_file.seek(-size, os.SEEK_END)
|
|
dev_file.write(size * '\0')
|
|
|
|
command_check_call(
|
|
[
|
|
'sgdisk',
|
|
'--zap-all',
|
|
'--',
|
|
dev,
|
|
],
|
|
)
|
|
command_check_call(
|
|
[
|
|
'sgdisk',
|
|
'--clear',
|
|
'--mbrtogpt',
|
|
'--',
|
|
dev,
|
|
],
|
|
)
|
|
|
|
update_partition(dev, 'zapped')
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
raise Error(e)
|
|
|
|
|
|
def adjust_symlink(target, path):
|
|
create = True
|
|
if os.path.lexists(path):
|
|
try:
|
|
mode = os.lstat(path).st_mode
|
|
if stat.S_ISREG(mode):
|
|
LOG.debug('Removing old file %s', path)
|
|
os.unlink(path)
|
|
elif stat.S_ISLNK(mode):
|
|
old = os.readlink(path)
|
|
if old != target:
|
|
LOG.debug('Removing old symlink %s -> %s', path, old)
|
|
os.unlink(path)
|
|
else:
|
|
create = False
|
|
except:
|
|
raise Error('unable to remove (or adjust) old file (symlink)',
|
|
path)
|
|
if create:
|
|
LOG.debug('Creating symlink %s -> %s', path, target)
|
|
try:
|
|
os.symlink(target, path)
|
|
except:
|
|
raise Error('unable to create symlink %s -> %s' % (path, target))
|
|
|
|
|
|
class Device(object):
|
|
|
|
def __init__(self, path, args):
|
|
self.args = args
|
|
self.path = path
|
|
self.dev_size = None
|
|
self.partitions = {}
|
|
self.ptype_map = None
|
|
assert not is_partition(self.path)
|
|
|
|
def create_partition(self, uuid, name, size=0, num=0):
|
|
ptype = self.ptype_tobe_for_name(name)
|
|
if num == 0:
|
|
num = get_free_partition_index(dev=self.path)
|
|
if size > 0:
|
|
new = '--new={num}:0:+{size}M'.format(num=num, size=size)
|
|
if size > self.get_dev_size():
|
|
LOG.error('refusing to create %s on %s' % (name, self.path))
|
|
LOG.error('%s size (%sM) is bigger than device (%sM)'
|
|
% (name, size, self.get_dev_size()))
|
|
raise Error('%s device size (%sM) is not big enough for %s'
|
|
% (self.path, self.get_dev_size(), name))
|
|
else:
|
|
new = '--largest-new={num}'.format(num=num)
|
|
|
|
LOG.debug('Creating %s partition num %d size %d on %s',
|
|
name, num, size, self.path)
|
|
command_check_call(
|
|
[
|
|
'sgdisk',
|
|
new,
|
|
'--change-name={num}:ceph {name}'.format(num=num, name=name),
|
|
'--partition-guid={num}:{uuid}'.format(num=num, uuid=uuid),
|
|
'--typecode={num}:{uuid}'.format(num=num, uuid=ptype),
|
|
'--mbrtogpt',
|
|
'--',
|
|
self.path,
|
|
]
|
|
)
|
|
update_partition(self.path, 'created')
|
|
return num
|
|
|
|
def ptype_tobe_for_name(self, name):
|
|
if name == 'data':
|
|
name = 'osd'
|
|
if self.ptype_map is None:
|
|
partition = DevicePartition.factory(
|
|
path=self.path, dev=None, args=self.args)
|
|
self.ptype_map = partition.ptype_map
|
|
return self.ptype_map[name]['tobe']
|
|
|
|
def get_partition(self, num):
|
|
if num not in self.partitions:
|
|
dev = get_partition_dev(self.path, num)
|
|
partition = DevicePartition.factory(
|
|
path=self.path, dev=dev, args=self.args)
|
|
partition.set_partition_number(num)
|
|
self.partitions[num] = partition
|
|
return self.partitions[num]
|
|
|
|
def get_dev_size(self):
|
|
if self.dev_size is None:
|
|
self.dev_size = get_dev_size(self.path)
|
|
return self.dev_size
|
|
|
|
@staticmethod
|
|
def factory(path, args):
|
|
return Device(path, args)
|
|
|
|
|
|
class DevicePartition(object):
|
|
|
|
def __init__(self, args):
|
|
self.args = args
|
|
self.num = None
|
|
self.rawdev = None
|
|
self.dev = None
|
|
self.uuid = None
|
|
self.ptype_map = None
|
|
self.ptype = None
|
|
self.set_variables_ptype()
|
|
|
|
def get_uuid(self):
|
|
if self.uuid is None:
|
|
self.uuid = get_partition_uuid(self.rawdev)
|
|
return self.uuid
|
|
|
|
def get_ptype(self):
|
|
if self.ptype is None:
|
|
self.ptype = get_partition_type(self.rawdev)
|
|
return self.ptype
|
|
|
|
def set_partition_number(self, num):
|
|
self.num = num
|
|
|
|
def get_partition_number(self):
|
|
return self.num
|
|
|
|
def set_dev(self, dev):
|
|
self.dev = dev
|
|
self.rawdev = dev
|
|
|
|
def get_dev(self):
|
|
return self.dev
|
|
|
|
def get_rawdev(self):
|
|
return self.rawdev
|
|
|
|
def set_variables_ptype(self):
|
|
self.ptype_map = PTYPE['regular']
|
|
|
|
def ptype_for_name(self, name):
|
|
return self.ptype_map[name]['ready']
|
|
|
|
@staticmethod
|
|
def factory(path, dev, args):
|
|
dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
|
|
if ((path is not None and is_mpath(path)) or
|
|
(dev is not None and is_mpath(dev))):
|
|
partition = DevicePartitionMultipath(args)
|
|
elif dmcrypt_type == 'luks':
|
|
partition = DevicePartitionCryptLuks(args)
|
|
elif dmcrypt_type == 'plain':
|
|
partition = DevicePartitionCryptPlain(args)
|
|
else:
|
|
partition = DevicePartition(args)
|
|
partition.set_dev(dev)
|
|
return partition
|
|
|
|
|
|
class DevicePartitionMultipath(DevicePartition):
|
|
|
|
def set_variables_ptype(self):
|
|
self.ptype_map = PTYPE['mpath']
|
|
|
|
|
|
class DevicePartitionCrypt(DevicePartition):
|
|
|
|
def __init__(self, args):
|
|
super(DevicePartitionCrypt, self).__init__(args)
|
|
self.osd_dm_keypath = None
|
|
self.cryptsetup_parameters = CryptHelpers.get_cryptsetup_parameters(
|
|
self.args)
|
|
self.dmcrypt_type = CryptHelpers.get_dmcrypt_type(self.args)
|
|
self.dmcrypt_keysize = CryptHelpers.get_dmcrypt_keysize(self.args)
|
|
|
|
def setup_crypt(self):
|
|
pass
|
|
|
|
def map(self):
|
|
self.setup_crypt()
|
|
self.dev = _dmcrypt_map(
|
|
rawdev=self.rawdev,
|
|
keypath=self.osd_dm_keypath,
|
|
_uuid=self.get_uuid(),
|
|
cryptsetup_parameters=self.cryptsetup_parameters,
|
|
luks=self.luks(),
|
|
format_dev=True,
|
|
)
|
|
|
|
def unmap(self):
|
|
self.setup_crypt()
|
|
dmcrypt_unmap(self.get_uuid())
|
|
self.dev = self.rawdev
|
|
|
|
def format(self):
|
|
self.setup_crypt()
|
|
self.map()
|
|
self.unmap()
|
|
|
|
|
|
class DevicePartitionCryptPlain(DevicePartitionCrypt):
|
|
|
|
def luks(self):
|
|
return False
|
|
|
|
def setup_crypt(self):
|
|
if self.osd_dm_keypath is not None:
|
|
return
|
|
|
|
self.cryptsetup_parameters += ['--key-size', str(self.dmcrypt_keysize)]
|
|
|
|
self.osd_dm_keypath = get_or_create_dmcrypt_key(
|
|
self.get_uuid(), self.args.dmcrypt_key_dir,
|
|
self.dmcrypt_keysize, False)
|
|
|
|
def set_variables_ptype(self):
|
|
self.ptype_map = PTYPE['plain']
|
|
|
|
|
|
class DevicePartitionCryptLuks(DevicePartitionCrypt):
|
|
|
|
def luks(self):
|
|
return True
|
|
|
|
def setup_crypt(self):
|
|
if self.osd_dm_keypath is not None:
|
|
return
|
|
|
|
if self.dmcrypt_keysize == 1024:
|
|
# We don't force this into the cryptsetup_parameters,
|
|
# as we want the cryptsetup defaults
|
|
# to prevail for the actual LUKS key lengths.
|
|
pass
|
|
else:
|
|
self.cryptsetup_parameters += ['--key-size',
|
|
str(self.dmcrypt_keysize)]
|
|
|
|
self.osd_dm_keypath = get_or_create_dmcrypt_key(
|
|
self.get_uuid(), self.args.dmcrypt_key_dir,
|
|
self.dmcrypt_keysize, True)
|
|
|
|
def set_variables_ptype(self):
|
|
self.ptype_map = PTYPE['luks']
|
|
|
|
|
|
class Prepare(object):
|
|
|
|
@staticmethod
|
|
def parser():
|
|
parser = argparse.ArgumentParser(add_help=False)
|
|
parser.add_argument(
|
|
'--cluster',
|
|
metavar='NAME',
|
|
default='ceph',
|
|
help='cluster name to assign this disk to',
|
|
)
|
|
parser.add_argument(
|
|
'--cluster-uuid',
|
|
metavar='UUID',
|
|
help='cluster uuid to assign this disk to',
|
|
)
|
|
parser.add_argument(
|
|
'--osd-uuid',
|
|
metavar='UUID',
|
|
help='unique OSD uuid to assign this disk to',
|
|
)
|
|
parser.add_argument(
|
|
'--dmcrypt',
|
|
action='store_true', default=None,
|
|
help='encrypt DATA and/or JOURNAL devices with dm-crypt',
|
|
)
|
|
parser.add_argument(
|
|
'--dmcrypt-key-dir',
|
|
metavar='KEYDIR',
|
|
default='/etc/ceph/dmcrypt-keys',
|
|
help='directory where dm-crypt keys are stored',
|
|
)
|
|
return parser
|
|
|
|
@staticmethod
|
|
def set_subparser(subparsers):
|
|
parents = [
|
|
Prepare.parser(),
|
|
PrepareData.parser(),
|
|
]
|
|
parents.extend(PrepareFilestore.parent_parsers())
|
|
parents.extend(PrepareBluestore.parent_parsers())
|
|
parser = subparsers.add_parser(
|
|
'prepare',
|
|
parents=parents,
|
|
help='Prepare a directory or disk for a Ceph OSD',
|
|
)
|
|
parser.set_defaults(
|
|
func=Prepare.main,
|
|
)
|
|
return parser
|
|
|
|
def prepare(self):
|
|
prepare_lock.acquire()
|
|
self.prepare_locked()
|
|
prepare_lock.release()
|
|
|
|
@staticmethod
|
|
def factory(args):
|
|
if args.bluestore:
|
|
return PrepareBluestore(args)
|
|
else:
|
|
return PrepareFilestore(args)
|
|
|
|
@staticmethod
|
|
def main(args):
|
|
Prepare.factory(args).prepare()
|
|
|
|
|
|
class PrepareFilestore(Prepare):
|
|
|
|
def __init__(self, args):
|
|
self.data = PrepareFilestoreData(args)
|
|
self.journal = PrepareJournal(args)
|
|
|
|
@staticmethod
|
|
def parent_parsers():
|
|
return [
|
|
PrepareJournal.parser(),
|
|
]
|
|
|
|
def prepare_locked(self):
|
|
self.data.prepare(self.journal)
|
|
|
|
|
|
class PrepareBluestore(Prepare):
|
|
|
|
def __init__(self, args):
|
|
self.data = PrepareBluestoreData(args)
|
|
self.block = PrepareBluestoreBlock(args)
|
|
|
|
@staticmethod
|
|
def parser():
|
|
parser = argparse.ArgumentParser(add_help=False)
|
|
parser.add_argument(
|
|
'--bluestore',
|
|
action='store_true', default=None,
|
|
help='bluestore objectstore',
|
|
)
|
|
parser.add_argument(
|
|
'--filestore',
|
|
action='store_true', default=True,
|
|
help='IGNORED FORWARD COMPATIBIILTY HACK',
|
|
)
|
|
return parser
|
|
|
|
@staticmethod
|
|
def parent_parsers():
|
|
return [
|
|
PrepareBluestore.parser(),
|
|
PrepareBluestoreBlock.parser(),
|
|
]
|
|
|
|
def prepare_locked(self):
|
|
self.data.prepare(self.block)
|
|
|
|
|
|
class Space(object):
|
|
|
|
NAMES = ('block', 'journal')
|
|
|
|
|
|
class PrepareSpace(object):
|
|
|
|
NONE = 0
|
|
FILE = 1
|
|
DEVICE = 2
|
|
|
|
def __init__(self, args):
|
|
self.args = args
|
|
self.set_type()
|
|
self.space_size = self.get_space_size()
|
|
if (getattr(self.args, self.name) and
|
|
getattr(self.args, self.name + '_uuid') is None):
|
|
setattr(self.args, self.name + '_uuid', str(uuid.uuid4()))
|
|
self.space_symlink = None
|
|
self.space_dmcrypt = None
|
|
|
|
def set_type(self):
|
|
name = self.name
|
|
args = self.args
|
|
dmode = os.stat(args.data).st_mode
|
|
if (self.wants_space() and
|
|
stat.S_ISBLK(dmode) and
|
|
not is_partition(args.data) and
|
|
getattr(args, name) is None and
|
|
getattr(args, name + '_file') is None):
|
|
LOG.info('Will colocate %s with data on %s',
|
|
name, args.data)
|
|
setattr(args, name, args.data)
|
|
|
|
if getattr(args, name) is None:
|
|
if getattr(args, name + '_dev'):
|
|
raise Error('%s is unspecified; not a block device' %
|
|
name.capitalize(), getattr(args, name))
|
|
self.type = self.NONE
|
|
return
|
|
|
|
if not os.path.exists(getattr(args, name)):
|
|
if getattr(args, name + '_dev'):
|
|
raise Error('%s does not exist; not a block device' %
|
|
name.capitalize(), getattr(args, name))
|
|
self.type = self.FILE
|
|
return
|
|
|
|
mode = os.stat(getattr(args, name)).st_mode
|
|
if stat.S_ISBLK(mode):
|
|
if getattr(args, name + '_file'):
|
|
raise Error('%s is not a regular file' % name.capitalize,
|
|
geattr(args, name))
|
|
self.type = self.DEVICE
|
|
return
|
|
|
|
if stat.S_ISREG(mode):
|
|
if getattr(args, name + '_dev'):
|
|
raise Error('%s is not a block device' % name.capitalize,
|
|
geattr(args, name))
|
|
self.type = self.FILE
|
|
|
|
raise Error('%s %s is neither a block device nor regular file' %
|
|
(name.capitalize, geattr(args, name)))
|
|
|
|
def is_none(self):
|
|
return self.type == self.NONE
|
|
|
|
def is_file(self):
|
|
return self.type == self.FILE
|
|
|
|
def is_device(self):
|
|
return self.type == self.DEVICE
|
|
|
|
@staticmethod
|
|
def parser(name):
|
|
parser = argparse.ArgumentParser(add_help=False)
|
|
parser.add_argument(
|
|
'--%s-uuid' % name,
|
|
metavar='UUID',
|
|
help='unique uuid to assign to the %s' % name,
|
|
)
|
|
parser.add_argument(
|
|
'--%s-file' % name,
|
|
action='store_true', default=None,
|
|
help='verify that %s is a file' % name.upper(),
|
|
)
|
|
parser.add_argument(
|
|
'--%s-dev' % name,
|
|
action='store_true', default=None,
|
|
help='verify that %s is a block device' % name.upper(),
|
|
)
|
|
parser.add_argument(
|
|
name,
|
|
metavar=name.upper(),
|
|
nargs='?',
|
|
help=('path to OSD %s disk block device;' % name +
|
|
' leave out to store %s in file' % name),
|
|
)
|
|
return parser
|
|
|
|
def wants_space(self):
|
|
return True
|
|
|
|
def populate_data_path(self, path):
|
|
if self.type == self.DEVICE:
|
|
self.populate_data_path_device(path)
|
|
elif self.type == self.FILE:
|
|
self.populate_data_path_file(path)
|
|
elif self.type == self.NONE:
|
|
pass
|
|
else:
|
|
raise Error('unexpected type ', self.type)
|
|
|
|
def populate_data_path_file(self, path):
|
|
space_uuid = self.name + '_uuid'
|
|
if getattr(self.args, space_uuid) is not None:
|
|
write_one_line(path, space_uuid,
|
|
getattr(self.args, space_uuid))
|
|
|
|
def populate_data_path_device(self, path):
|
|
self.populate_data_path_file(path)
|
|
if self.space_symlink is not None:
|
|
adjust_symlink(self.space_symlink,
|
|
os.path.join(path, self.name))
|
|
|
|
if self.space_dmcrypt is not None:
|
|
adjust_symlink(self.space_dmcrypt,
|
|
os.path.join(path, self.name + '_dmcrypt'))
|
|
else:
|
|
try:
|
|
os.unlink(os.path.join(path, self.name + '_dmcrypt'))
|
|
except OSError:
|
|
pass
|
|
|
|
def prepare(self):
|
|
if self.type == self.DEVICE:
|
|
self.prepare_device()
|
|
elif self.type == self.FILE:
|
|
self.prepare_file()
|
|
elif self.type == self.NONE:
|
|
pass
|
|
else:
|
|
raise Error('unexpected type ', self.type)
|
|
|
|
def prepare_file(self):
|
|
if not os.path.exists(getattr(self.args, self.name)):
|
|
LOG.debug('Creating %s file %s with size 0'
|
|
' (ceph-osd will resize and allocate)',
|
|
self.name,
|
|
getattr(self.args, self.name))
|
|
with file(getattr(self.args, self.name), 'wb') as space_file:
|
|
pass
|
|
|
|
LOG.debug('%s is file %s',
|
|
self.name.capitalize(),
|
|
getattr(self.args, self.name))
|
|
LOG.warning('OSD will not be hot-swappable if %s is '
|
|
'not the same device as the osd data' %
|
|
self.name)
|
|
self.space_symlink = space_file
|
|
|
|
def prepare_device(self):
|
|
reusing_partition = False
|
|
|
|
if is_partition(getattr(self.args, self.name)):
|
|
LOG.debug('%s %s is a partition',
|
|
self.name.capitalize(), getattr(self.args, self.name))
|
|
partition = DevicePartition.factory(
|
|
path=None, dev=getattr(self.args, self.name), args=self.args)
|
|
if isinstance(partition, DevicePartitionCrypt):
|
|
raise Error(getattr(self.args, self.name) +
|
|
' partition already exists'
|
|
' and --dmcrypt specified')
|
|
LOG.warning('OSD will not be hot-swappable' +
|
|
' if ' + self.name + ' is not' +
|
|
' the same device as the osd data')
|
|
if partition.get_ptype() == partition.ptype_for_name(self.name):
|
|
LOG.debug('%s %s was previously prepared with '
|
|
'ceph-disk. Reusing it.',
|
|
self.name.capitalize(),
|
|
getattr(self.args, self.name))
|
|
reusing_partition = True
|
|
# Read and reuse the partition uuid from this journal's
|
|
# previous life. We reuse the uuid instead of changing it
|
|
# because udev does not reliably notice changes to an
|
|
# existing partition's GUID. See
|
|
# http://tracker.ceph.com/issues/10146
|
|
setattr(self.args, self.name + '_uuid', partition.get_uuid())
|
|
LOG.debug('Reusing %s with uuid %s',
|
|
self.name,
|
|
getattr(self.args, self.name + '_uuid'))
|
|
else:
|
|
LOG.warning('%s %s was not prepared with '
|
|
'ceph-disk. Symlinking directly.',
|
|
self.name.capitalize(),
|
|
getattr(self.args, self.name))
|
|
self.space_symlink = getattr(self.args, self.name)
|
|
return
|
|
|
|
self.space_symlink = '/dev/disk/by-partuuid/{uuid}'.format(
|
|
uuid=getattr(self.args, self.name + '_uuid'))
|
|
|
|
if self.args.dmcrypt:
|
|
self.space_dmcrypt = self.space_symlink
|
|
self.space_symlink = '/dev/mapper/{uuid}'.format(
|
|
uuid=getattr(self.args, self.name + '_uuid'))
|
|
|
|
if reusing_partition:
|
|
# confirm that the space_symlink exists. It should since
|
|
# this was an active space
|
|
# in the past. Continuing otherwise would be futile.
|
|
assert os.path.exists(self.space_symlink)
|
|
return
|
|
|
|
num = self.desired_partition_number()
|
|
|
|
if num == 0:
|
|
LOG.warning('OSD will not be hot-swappable if %s '
|
|
'is not the same device as the osd data',
|
|
self.name)
|
|
|
|
device = Device.factory(getattr(self.args, self.name), self.args)
|
|
num = device.create_partition(
|
|
uuid=getattr(self.args, self.name + '_uuid'),
|
|
name=self.name,
|
|
size=self.space_size,
|
|
num=num)
|
|
|
|
partition = device.get_partition(num)
|
|
|
|
LOG.debug('%s is GPT partition %s',
|
|
self.name.capitalize(),
|
|
self.space_symlink)
|
|
|
|
if isinstance(partition, DevicePartitionCrypt):
|
|
partition.format()
|
|
|
|
command_check_call(
|
|
[
|
|
'sgdisk',
|
|
'--typecode={num}:{uuid}'.format(
|
|
num=num,
|
|
uuid=partition.ptype_for_name(self.name),
|
|
),
|
|
'--',
|
|
getattr(self.args, self.name),
|
|
],
|
|
)
|
|
|
|
LOG.debug('%s is GPT partition %s',
|
|
self.name.capitalize(),
|
|
self.space_symlink)
|
|
|
|
|
|
class PrepareJournal(PrepareSpace):
|
|
|
|
def __init__(self, args):
|
|
self.name = 'journal'
|
|
(self.allows_journal,
|
|
self.wants_journal,
|
|
self.needs_journal) = check_journal_reqs(args)
|
|
|
|
if args.journal and not self.allows_journal:
|
|
raise Error('journal specified but not allowed by osd backend')
|
|
|
|
super(PrepareJournal, self).__init__(args)
|
|
|
|
def wants_space(self):
|
|
return self.wants_journal
|
|
|
|
def get_space_size(self):
|
|
return int(get_conf_with_default(
|
|
cluster=self.args.cluster,
|
|
variable='osd_journal_size',
|
|
))
|
|
|
|
def desired_partition_number(self):
|
|
if self.args.journal == self.args.data:
|
|
# we're sharing the disk between osd data and journal;
|
|
# make journal be partition number 2
|
|
num = 2
|
|
else:
|
|
num = 0
|
|
return num
|
|
|
|
@staticmethod
|
|
def parser():
|
|
return PrepareSpace.parser('journal')
|
|
|
|
|
|
class PrepareBluestoreBlock(PrepareSpace):
|
|
|
|
def __init__(self, args):
|
|
self.name = 'block'
|
|
super(PrepareBluestoreBlock, self).__init__(args)
|
|
|
|
def get_space_size(self):
|
|
return 0 # get as much space as possible
|
|
|
|
def desired_partition_number(self):
|
|
if self.args.block == self.args.data:
|
|
num = 2
|
|
else:
|
|
num = 0
|
|
return num
|
|
|
|
@staticmethod
|
|
def parser():
|
|
return PrepareSpace.parser('block')
|
|
|
|
|
|
class CryptHelpers(object):
|
|
|
|
@staticmethod
|
|
def get_cryptsetup_parameters(args):
|
|
cryptsetup_parameters_str = get_conf(
|
|
cluster=args.cluster,
|
|
variable='osd_cryptsetup_parameters',
|
|
)
|
|
if cryptsetup_parameters_str is None:
|
|
return []
|
|
else:
|
|
return shlex.split(cryptsetup_parameters_str)
|
|
|
|
@staticmethod
|
|
def get_dmcrypt_keysize(args):
|
|
dmcrypt_keysize_str = get_conf(
|
|
cluster=args.cluster,
|
|
variable='osd_dmcrypt_key_size',
|
|
)
|
|
dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
|
|
if dmcrypt_type == 'luks':
|
|
if dmcrypt_keysize_str is None:
|
|
# As LUKS will hash the 'passphrase' in .luks.key
|
|
# into a key, set a large default
|
|
# so if not updated for some time, it is still a
|
|
# reasonable value.
|
|
#
|
|
return 1024
|
|
else:
|
|
return int(dmcrypt_keysize_str)
|
|
elif dmcrypt_type == 'plain':
|
|
if dmcrypt_keysize_str is None:
|
|
# This value is hard-coded in the udev script
|
|
return 256
|
|
else:
|
|
LOG.warning('ensure the 95-ceph-osd.rules file has '
|
|
'been copied to /etc/udev/rules.d '
|
|
'and modified to call cryptsetup '
|
|
'with --key-size=%s' % dmcrypt_keysize_str)
|
|
return int(dmcrypt_keysize_str)
|
|
else:
|
|
return 0
|
|
|
|
@staticmethod
|
|
def get_dmcrypt_type(args):
|
|
if args.dmcrypt:
|
|
dmcrypt_type = get_conf(
|
|
cluster=args.cluster,
|
|
variable='osd_dmcrypt_type',
|
|
)
|
|
|
|
if dmcrypt_type is None or dmcrypt_type == 'luks':
|
|
return 'luks'
|
|
elif dmcrypt_type == 'plain':
|
|
return 'plain'
|
|
else:
|
|
raise Error('invalid osd_dmcrypt_type parameter '
|
|
'(must be luks or plain): ', dmcrypt_type)
|
|
else:
|
|
return None
|
|
|
|
|
|
class PrepareData(object):
|
|
|
|
FILE = 1
|
|
DEVICE = 2
|
|
|
|
def __init__(self, args):
|
|
|
|
self.args = args
|
|
self.partition = None
|
|
self.set_type()
|
|
if self.args.cluster_uuid is None:
|
|
self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
|
|
|
|
if self.args.osd_uuid is None:
|
|
self.args.osd_uuid = str(uuid.uuid4())
|
|
|
|
def set_type(self):
|
|
dmode = os.stat(self.args.data).st_mode
|
|
|
|
if stat.S_ISDIR(dmode):
|
|
self.type = self.FILE
|
|
elif stat.S_ISBLK(dmode):
|
|
self.type = self.DEVICE
|
|
else:
|
|
raise Error('not a dir or block device', args.data)
|
|
|
|
def is_file(self):
|
|
return self.type == self.FILE
|
|
|
|
def is_device(self):
|
|
return self.type == self.DEVICE
|
|
|
|
@staticmethod
|
|
def parser():
|
|
parser = argparse.ArgumentParser(add_help=False)
|
|
parser.add_argument(
|
|
'--fs-type',
|
|
help='file system type to use (e.g. "ext4")',
|
|
)
|
|
parser.add_argument(
|
|
'--zap-disk',
|
|
action='store_true', default=None,
|
|
help='destroy the partition table (and content) of a disk',
|
|
)
|
|
parser.add_argument(
|
|
'--data-dir',
|
|
action='store_true', default=None,
|
|
help='verify that DATA is a dir',
|
|
)
|
|
parser.add_argument(
|
|
'--data-dev',
|
|
action='store_true', default=None,
|
|
help='verify that DATA is a block device',
|
|
)
|
|
parser.add_argument(
|
|
'data',
|
|
metavar='DATA',
|
|
help='path to OSD data (a disk block device or directory)',
|
|
)
|
|
return parser
|
|
|
|
def populate_data_path_file(self, path, *to_prepare_list):
|
|
self.populate_data_path(path, *to_prepare_list)
|
|
|
|
def populate_data_path(self, path, *to_prepare_list):
|
|
if os.path.exists(os.path.join(path, 'magic')):
|
|
LOG.debug('Data dir %s already exists', path)
|
|
return
|
|
else:
|
|
LOG.debug('Preparing osd data dir %s', path)
|
|
|
|
if self.args.osd_uuid is None:
|
|
self.args.osd_uuid = str(uuid.uuid4())
|
|
|
|
write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
|
|
write_one_line(path, 'fsid', self.args.osd_uuid)
|
|
write_one_line(path, 'magic', CEPH_OSD_ONDISK_MAGIC)
|
|
|
|
for to_prepare in to_prepare_list:
|
|
to_prepare.populate_data_path(path)
|
|
|
|
def prepare(self, *to_prepare_list):
|
|
if self.type == self.DEVICE:
|
|
self.prepare_device(*to_prepare_list)
|
|
elif self.type == self.FILE:
|
|
self.prepare_file(*to_prepare_list)
|
|
else:
|
|
raise Error('unexpected type ', self.type)
|
|
|
|
def prepare_file(self, *to_prepare_list):
|
|
|
|
if not os.path.exists(self.args.data):
|
|
raise Error('data path for directory does not exist',
|
|
self.args.data)
|
|
|
|
if self.args.data_dev:
|
|
raise Error('data path is not a block device', self.args.data)
|
|
|
|
for to_prepare in to_prepare_list:
|
|
to_prepare.prepare()
|
|
|
|
self.populate_data_path_file(self.args.data, *to_prepare_list)
|
|
|
|
def sanity_checks(self):
|
|
if not os.path.exists(self.args.data):
|
|
raise Error('data path for device does not exist',
|
|
self.args.data)
|
|
verify_not_in_use(self.args.data, True)
|
|
|
|
def set_variables(self):
|
|
if self.args.fs_type is None:
|
|
self.args.fs_type = get_conf(
|
|
cluster=self.args.cluster,
|
|
variable='osd_mkfs_type',
|
|
)
|
|
if self.args.fs_type is None:
|
|
self.args.fs_type = get_conf(
|
|
cluster=self.args.cluster,
|
|
variable='osd_fs_type',
|
|
)
|
|
if self.args.fs_type is None:
|
|
self.args.fs_type = DEFAULT_FS_TYPE
|
|
|
|
self.mkfs_args = get_conf(
|
|
cluster=self.args.cluster,
|
|
variable='osd_mkfs_options_{fstype}'.format(
|
|
fstype=self.args.fs_type,
|
|
),
|
|
)
|
|
if self.mkfs_args is None:
|
|
self.mkfs_args = get_conf(
|
|
cluster=self.args.cluster,
|
|
variable='osd_fs_mkfs_options_{fstype}'.format(
|
|
fstype=self.args.fs_type,
|
|
),
|
|
)
|
|
|
|
self.mount_options = get_conf(
|
|
cluster=self.args.cluster,
|
|
variable='osd_mount_options_{fstype}'.format(
|
|
fstype=self.args.fs_type,
|
|
),
|
|
)
|
|
if self.mount_options is None:
|
|
self.mount_options = get_conf(
|
|
cluster=self.args.cluster,
|
|
variable='osd_fs_mount_options_{fstype}'.format(
|
|
fstype=self.args.fs_type,
|
|
),
|
|
)
|
|
else:
|
|
# remove whitespaces
|
|
self.mount_options = "".join(self.mount_options.split())
|
|
|
|
if self.args.osd_uuid is None:
|
|
self.args.osd_uuid = str(uuid.uuid4())
|
|
|
|
def prepare_device(self, *to_prepare_list):
|
|
self.sanity_checks()
|
|
self.set_variables()
|
|
if self.args.zap_disk is not None:
|
|
zap(self.args.data)
|
|
|
|
def create_data_partition(self):
|
|
device = Device.factory(self.args.data, self.args)
|
|
partition_number = 1
|
|
device.create_partition(uuid=self.args.osd_uuid,
|
|
name='data',
|
|
num=partition_number,
|
|
size=self.get_space_size())
|
|
return device.get_partition(partition_number)
|
|
|
|
def set_data_partition(self):
|
|
if is_partition(self.args.data):
|
|
LOG.debug('OSD data device %s is a partition',
|
|
self.args.data)
|
|
self.partition = DevicePartition.factory(
|
|
path=None, dev=self.args.data, args=self.args)
|
|
ptype = partition.get_ptype()
|
|
if ptype != ptype_osd:
|
|
LOG.warning('incorrect partition UUID: %s, expected %s'
|
|
% (ptype, ptype_osd))
|
|
else:
|
|
LOG.debug('Creating osd partition on %s',
|
|
self.args.data)
|
|
self.partition = self.create_data_partition()
|
|
|
|
def populate_data_path_device(self, *to_prepare_list):
|
|
partition = self.partition
|
|
|
|
if isinstance(partition, DevicePartitionCrypt):
|
|
partition.map()
|
|
|
|
try:
|
|
args = [
|
|
'mkfs',
|
|
'-t',
|
|
self.args.fs_type,
|
|
]
|
|
if self.mkfs_args is not None:
|
|
args.extend(self.mkfs_args.split())
|
|
if self.args.fs_type == 'xfs':
|
|
args.extend(['-f']) # always force
|
|
else:
|
|
args.extend(MKFS_ARGS.get(self.args.fs_type, []))
|
|
args.extend([
|
|
'--',
|
|
partition.get_dev(),
|
|
])
|
|
try:
|
|
LOG.debug('Creating %s fs on %s',
|
|
self.args.fs_type, partition.get_dev())
|
|
command_check_call(args)
|
|
except subprocess.CalledProcessError as e:
|
|
raise Error(e)
|
|
|
|
path = mount(dev=partition.get_dev(),
|
|
fstype=self.args.fs_type,
|
|
options=self.mount_options)
|
|
|
|
try:
|
|
self.populate_data_path(path, *to_prepare_list)
|
|
finally:
|
|
path_set_context(path)
|
|
unmount(path)
|
|
finally:
|
|
if isinstance(partition, DevicePartitionCrypt):
|
|
partition.unmap()
|
|
|
|
if not is_partition(self.args.data):
|
|
try:
|
|
command_check_call(
|
|
[
|
|
'sgdisk',
|
|
'--typecode=%d:%s' % (partition.get_partition_number(),
|
|
partition.ptype_for_name('osd')),
|
|
'--',
|
|
self.args.data,
|
|
],
|
|
)
|
|
except subprocess.CalledProcessError as e:
|
|
raise Error(e)
|
|
update_partition(self.args.data, 'prepared')
|
|
command_check_call(['udevadm', 'trigger',
|
|
'--action=add',
|
|
'--sysname-match',
|
|
os.path.basename(partition.rawdev)])
|
|
|
|
|
|
class PrepareFilestoreData(PrepareData):
|
|
|
|
def get_space_size(self):
|
|
return 0 # get as much space as possible
|
|
|
|
def prepare_device(self, *to_prepare_list):
|
|
super(PrepareFilestoreData, self).prepare_device(*to_prepare_list)
|
|
for to_prepare in to_prepare_list:
|
|
to_prepare.prepare()
|
|
self.set_data_partition()
|
|
self.populate_data_path_device(*to_prepare_list)
|
|
|
|
def populate_data_path(self, path, *to_prepare_list):
|
|
super(PrepareFilestoreData, self).populate_data_path(path,
|
|
*to_prepare_list)
|
|
write_one_line(path, 'type', 'filestore')
|
|
|
|
|
|
class PrepareBluestoreData(PrepareData):
|
|
|
|
def get_space_size(self):
|
|
return 100 # MB
|
|
|
|
def prepare_device(self, *to_prepare_list):
|
|
super(PrepareBluestoreData, self).prepare_device(*to_prepare_list)
|
|
self.set_data_partition()
|
|
for to_prepare in to_prepare_list:
|
|
to_prepare.prepare()
|
|
self.populate_data_path_device(*to_prepare_list)
|
|
|
|
def populate_data_path(self, path, *to_prepare_list):
|
|
super(PrepareBluestoreData, self).populate_data_path(path,
|
|
*to_prepare_list)
|
|
write_one_line(path, 'type', 'bluestore')
|
|
|
|
|
|
def mkfs(
|
|
path,
|
|
cluster,
|
|
osd_id,
|
|
fsid,
|
|
keyring,
|
|
):
|
|
monmap = os.path.join(path, 'activate.monmap')
|
|
command_check_call(
|
|
[
|
|
'ceph',
|
|
'--cluster', cluster,
|
|
'--name', 'client.bootstrap-osd',
|
|
'--keyring', keyring,
|
|
'mon', 'getmap', '-o', monmap,
|
|
],
|
|
)
|
|
|
|
osd_type = read_one_line(path, 'type')
|
|
|
|
if osd_type == 'bluestore':
|
|
command_check_call(
|
|
[
|
|
'ceph-osd',
|
|
'--cluster', cluster,
|
|
'--mkfs',
|
|
'--mkkey',
|
|
'-i', osd_id,
|
|
'--monmap', monmap,
|
|
'--osd-data', path,
|
|
'--osd-uuid', fsid,
|
|
'--keyring', os.path.join(path, 'keyring'),
|
|
'--setuser', get_ceph_user(),
|
|
'--setgroup', get_ceph_user(),
|
|
],
|
|
)
|
|
else:
|
|
command_check_call(
|
|
[
|
|
'ceph-osd',
|
|
'--cluster', cluster,
|
|
'--mkfs',
|
|
'--mkkey',
|
|
'-i', osd_id,
|
|
'--monmap', monmap,
|
|
'--osd-data', path,
|
|
'--osd-journal', os.path.join(path, 'journal'),
|
|
'--osd-uuid', fsid,
|
|
'--keyring', os.path.join(path, 'keyring'),
|
|
'--setuser', get_ceph_user(),
|
|
'--setgroup', get_ceph_group(),
|
|
],
|
|
)
|
|
|
|
|
|
def auth_key(
|
|
path,
|
|
cluster,
|
|
osd_id,
|
|
keyring,
|
|
):
|
|
try:
|
|
# try dumpling+ cap scheme
|
|
command_check_call(
|
|
[
|
|
'ceph',
|
|
'--cluster', cluster,
|
|
'--name', 'client.bootstrap-osd',
|
|
'--keyring', keyring,
|
|
'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id),
|
|
'-i', os.path.join(path, 'keyring'),
|
|
'osd', 'allow *',
|
|
'mon', 'allow profile osd',
|
|
],
|
|
)
|
|
except subprocess.CalledProcessError as err:
|
|
if err.returncode == errno.EINVAL:
|
|
# try old cap scheme
|
|
command_check_call(
|
|
[
|
|
'ceph',
|
|
'--cluster', cluster,
|
|
'--name', 'client.bootstrap-osd',
|
|
'--keyring', keyring,
|
|
'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id),
|
|
'-i', os.path.join(path, 'keyring'),
|
|
'osd', 'allow *',
|
|
'mon', 'allow rwx',
|
|
],
|
|
)
|
|
else:
|
|
raise
|
|
|
|
|
|
def get_mount_point(cluster, osd_id):
|
|
parent = STATEDIR + '/osd'
|
|
return os.path.join(
|
|
parent,
|
|
'{cluster}-{osd_id}'.format(cluster=cluster, osd_id=osd_id),
|
|
)
|
|
|
|
|
|
def move_mount(
|
|
dev,
|
|
path,
|
|
cluster,
|
|
osd_id,
|
|
fstype,
|
|
mount_options,
|
|
):
|
|
LOG.debug('Moving mount to final location...')
|
|
osd_data = get_mount_point(cluster, osd_id)
|
|
maybe_mkdir(osd_data)
|
|
|
|
# pick best-of-breed mount options based on fs type
|
|
if mount_options is None:
|
|
mount_options = MOUNT_OPTIONS.get(fstype, '')
|
|
|
|
# we really want to mount --move, but that is not supported when
|
|
# the parent mount is shared, as it is by default on RH, Fedora,
|
|
# and probably others. Also, --bind doesn't properly manipulate
|
|
# /etc/mtab, which *still* isn't a symlink to /proc/mounts despite
|
|
# this being 2013. Instead, mount the original device at the final
|
|
# location.
|
|
command_check_call(
|
|
[
|
|
'/bin/mount',
|
|
'-o',
|
|
mount_options,
|
|
'--',
|
|
dev,
|
|
osd_data,
|
|
],
|
|
)
|
|
command_check_call(
|
|
[
|
|
'/bin/umount',
|
|
'-l', # lazy, in case someone else is peeking at the
|
|
# wrong moment
|
|
'--',
|
|
path,
|
|
],
|
|
)
|
|
|
|
|
|
def start_daemon(
|
|
cluster,
|
|
osd_id,
|
|
):
|
|
LOG.debug('Starting %s osd.%s...', cluster, osd_id)
|
|
|
|
path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
|
|
cluster=cluster, osd_id=osd_id)
|
|
|
|
try:
|
|
if os.path.exists(os.path.join(path, 'upstart')):
|
|
command_check_call(
|
|
[
|
|
'/sbin/initctl',
|
|
# use emit, not start, because start would fail if the
|
|
# instance was already running
|
|
'emit',
|
|
# since the daemon starting doesn't guarantee much about
|
|
# the service being operational anyway, don't bother
|
|
# waiting for it
|
|
'--no-wait',
|
|
'--',
|
|
'ceph-osd',
|
|
'cluster={cluster}'.format(cluster=cluster),
|
|
'id={osd_id}'.format(osd_id=osd_id),
|
|
],
|
|
)
|
|
elif os.path.exists(os.path.join(path, 'sysvinit')):
|
|
if os.path.exists('/usr/sbin/service'):
|
|
svc = '/usr/sbin/service'
|
|
else:
|
|
svc = '/sbin/service'
|
|
command_check_call(
|
|
[
|
|
svc,
|
|
'ceph',
|
|
'--cluster',
|
|
'{cluster}'.format(cluster=cluster),
|
|
'start',
|
|
'osd.{osd_id}'.format(osd_id=osd_id),
|
|
],
|
|
)
|
|
elif os.path.exists(os.path.join(path, 'systemd')):
|
|
command_check_call(
|
|
[
|
|
'systemctl',
|
|
'enable',
|
|
'ceph-osd@{osd_id}'.format(osd_id=osd_id),
|
|
],
|
|
)
|
|
command_check_call(
|
|
[
|
|
'systemctl',
|
|
'start',
|
|
'ceph-osd@{osd_id}'.format(osd_id=osd_id),
|
|
],
|
|
)
|
|
else:
|
|
raise Error('{cluster} osd.{osd_id} is not tagged '
|
|
'with an init system'.format(
|
|
cluster=cluster,
|
|
osd_id=osd_id,
|
|
))
|
|
except subprocess.CalledProcessError as e:
|
|
raise Error('ceph osd start failed', e)
|
|
|
|
|
|
def stop_daemon(
|
|
cluster,
|
|
osd_id,
|
|
):
|
|
LOG.debug('Stoping %s osd.%s...', cluster, osd_id)
|
|
|
|
path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
|
|
cluster=cluster, osd_id=osd_id)
|
|
|
|
try:
|
|
if os.path.exists(os.path.join(path, 'upstart')):
|
|
command_check_call(
|
|
[
|
|
'/sbin/initctl',
|
|
'stop',
|
|
'ceph-osd',
|
|
'cluster={cluster}'.format(cluster=cluster),
|
|
'id={osd_id}'.format(osd_id=osd_id),
|
|
],
|
|
)
|
|
elif os.path.exists(os.path.join(path, 'sysvinit')):
|
|
svc = which('service')
|
|
command_check_call(
|
|
[
|
|
svc,
|
|
'ceph',
|
|
'--cluster',
|
|
'{cluster}'.format(cluster=cluster),
|
|
'stop',
|
|
'osd.{osd_id}'.format(osd_id=osd_id),
|
|
],
|
|
)
|
|
elif os.path.exists(os.path.join(path, 'systemd')):
|
|
command_check_call(
|
|
[
|
|
'systemctl',
|
|
'disable',
|
|
'ceph-osd@{osd_id}'.format(osd_id=osd_id),
|
|
],
|
|
)
|
|
command_check_call(
|
|
[
|
|
'systemctl',
|
|
'stop',
|
|
'ceph-osd@{osd_id}'.format(osd_id=osd_id),
|
|
],
|
|
)
|
|
else:
|
|
raise Error('{cluster} osd.{osd_id} is not tagged with an init '
|
|
' system'.format(cluster=cluster, osd_id=osd_id))
|
|
except subprocess.CalledProcessError as e:
|
|
raise Error('ceph osd stop failed', e)
|
|
|
|
|
|
def detect_fstype(
|
|
dev,
|
|
):
|
|
fstype = _check_output(
|
|
args=[
|
|
'/sbin/blkid',
|
|
# we don't want stale cached results
|
|
'-p',
|
|
'-s', 'TYPE',
|
|
'-o', 'value',
|
|
'--',
|
|
dev,
|
|
],
|
|
)
|
|
fstype = must_be_one_line(fstype)
|
|
return fstype
|
|
|
|
|
|
def dmcrypt_map(dev, dmcrypt_key_dir):
|
|
ptype = get_partition_type(dev)
|
|
if ptype in Ptype.get_ready_by_type('plain'):
|
|
luks = False
|
|
cryptsetup_parameters = ['--key-size', '256']
|
|
elif ptype in Ptype.get_ready_by_type('luks'):
|
|
luks = True
|
|
cryptsetup_parameters = []
|
|
else:
|
|
raise Error('--dmcrypt called for dev %s with invalid ptype %s'
|
|
% (dev, ptype))
|
|
part_uuid = get_partition_uuid(dev)
|
|
dmcrypt_key_path = get_dmcrypt_key_path(part_uuid, dmcrypt_key_dir, luks)
|
|
return _dmcrypt_map(
|
|
rawdev=dev,
|
|
keypath=dmcrypt_key_path,
|
|
_uuid=part_uuid,
|
|
cryptsetup_parameters=cryptsetup_parameters,
|
|
luks=luks,
|
|
format_dev=False,
|
|
)
|
|
|
|
|
|
def mount_activate(
|
|
dev,
|
|
activate_key_template,
|
|
init,
|
|
dmcrypt,
|
|
dmcrypt_key_dir,
|
|
reactivate=False,
|
|
):
|
|
|
|
if dmcrypt:
|
|
part_uuid = get_partition_uuid(dev)
|
|
dev = dmcrypt_map(dev, dmcrypt_key_dir)
|
|
try:
|
|
fstype = detect_fstype(dev=dev)
|
|
except (subprocess.CalledProcessError,
|
|
TruncatedLineError,
|
|
TooManyLinesError) as e:
|
|
raise FilesystemTypeError(
|
|
'device {dev}'.format(dev=dev),
|
|
e,
|
|
)
|
|
|
|
# TODO always using mount options from cluster=ceph for
|
|
# now; see http://tracker.newdream.net/issues/3253
|
|
mount_options = get_conf(
|
|
cluster='ceph',
|
|
variable='osd_mount_options_{fstype}'.format(
|
|
fstype=fstype,
|
|
),
|
|
)
|
|
|
|
if mount_options is None:
|
|
mount_options = get_conf(
|
|
cluster='ceph',
|
|
variable='osd_fs_mount_options_{fstype}'.format(
|
|
fstype=fstype,
|
|
),
|
|
)
|
|
|
|
# remove whitespaces from mount_options
|
|
if mount_options is not None:
|
|
mount_options = "".join(mount_options.split())
|
|
|
|
path = mount(dev=dev, fstype=fstype, options=mount_options)
|
|
|
|
# check if the disk is deactive, change the journal owner, group
|
|
# mode for correct user and group.
|
|
if os.path.exists(os.path.join(path, 'deactive')):
|
|
# logging to syslog will help us easy to know udev triggered failure
|
|
if not reactivate:
|
|
unmount(path)
|
|
# we need to unmap again because dmcrypt map will create again
|
|
# on bootup stage (due to deactivate)
|
|
if '/dev/mapper/' in dev:
|
|
part_uuid = dev.replace('/dev/mapper/', '')
|
|
dmcrypt_unmap(part_uuid)
|
|
LOG.info('OSD deactivated! reactivate with: --reactivate')
|
|
raise Error('OSD deactivated! reactivate with: --reactivate')
|
|
# flag to activate a deactive osd.
|
|
deactive = True
|
|
else:
|
|
deactive = False
|
|
|
|
osd_id = None
|
|
cluster = None
|
|
try:
|
|
(osd_id, cluster) = activate(path, activate_key_template, init)
|
|
|
|
# Now active successfully
|
|
# If we got reactivate and deactive, remove the deactive file
|
|
if deactive and reactivate:
|
|
os.remove(os.path.join(path, 'deactive'))
|
|
LOG.info('Remove `deactive` file.')
|
|
|
|
# check if the disk is already active, or if something else is already
|
|
# mounted there
|
|
active = False
|
|
other = False
|
|
src_dev = os.stat(path).st_dev
|
|
try:
|
|
dst_dev = os.stat((STATEDIR + '/osd/{cluster}-{osd_id}').format(
|
|
cluster=cluster,
|
|
osd_id=osd_id)).st_dev
|
|
if src_dev == dst_dev:
|
|
active = True
|
|
else:
|
|
parent_dev = os.stat(STATEDIR + '/osd').st_dev
|
|
if dst_dev != parent_dev:
|
|
other = True
|
|
elif os.listdir(get_mount_point(cluster, osd_id)):
|
|
LOG.info(get_mount_point(cluster, osd_id) +
|
|
" is not empty, won't override")
|
|
other = True
|
|
|
|
except OSError:
|
|
pass
|
|
|
|
if active:
|
|
LOG.info('%s osd.%s already mounted in position; unmounting ours.'
|
|
% (cluster, osd_id))
|
|
unmount(path)
|
|
elif other:
|
|
raise Error('another %s osd.%s already mounted in position '
|
|
'(old/different cluster instance?); unmounting ours.'
|
|
% (cluster, osd_id))
|
|
else:
|
|
move_mount(
|
|
dev=dev,
|
|
path=path,
|
|
cluster=cluster,
|
|
osd_id=osd_id,
|
|
fstype=fstype,
|
|
mount_options=mount_options,
|
|
)
|
|
return (cluster, osd_id)
|
|
|
|
except:
|
|
LOG.error('Failed to activate')
|
|
unmount(path)
|
|
raise
|
|
finally:
|
|
# remove our temp dir
|
|
if os.path.exists(path):
|
|
os.rmdir(path)
|
|
|
|
|
|
def activate_dir(
|
|
path,
|
|
activate_key_template,
|
|
init,
|
|
):
|
|
|
|
if not os.path.exists(path):
|
|
raise Error(
|
|
'directory %s does not exist' % path
|
|
)
|
|
|
|
(osd_id, cluster) = activate(path, activate_key_template, init)
|
|
|
|
if init not in (None, 'none'):
|
|
canonical = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
|
|
cluster=cluster,
|
|
osd_id=osd_id)
|
|
if path != canonical:
|
|
# symlink it from the proper location
|
|
create = True
|
|
if os.path.lexists(canonical):
|
|
old = os.readlink(canonical)
|
|
if old != path:
|
|
LOG.debug('Removing old symlink %s -> %s', canonical, old)
|
|
try:
|
|
os.unlink(canonical)
|
|
except:
|
|
raise Error('unable to remove old symlink', canonical)
|
|
else:
|
|
create = False
|
|
if create:
|
|
LOG.debug('Creating symlink %s -> %s', canonical, path)
|
|
try:
|
|
os.symlink(path, canonical)
|
|
except:
|
|
raise Error('unable to create symlink %s -> %s'
|
|
% (canonical, path))
|
|
|
|
return (cluster, osd_id)
|
|
|
|
|
|
def find_cluster_by_uuid(_uuid):
|
|
"""
|
|
Find a cluster name by searching /etc/ceph/*.conf for a conf file
|
|
with the right uuid.
|
|
"""
|
|
_uuid = _uuid.lower()
|
|
no_fsid = []
|
|
if not os.path.exists(SYSCONFDIR):
|
|
return None
|
|
for conf_file in os.listdir(SYSCONFDIR):
|
|
if not conf_file.endswith('.conf'):
|
|
continue
|
|
cluster = conf_file[:-5]
|
|
try:
|
|
fsid = get_fsid(cluster)
|
|
except Error as e:
|
|
if e.message != 'getting cluster uuid from configuration failed':
|
|
raise e
|
|
no_fsid.append(cluster)
|
|
else:
|
|
if fsid == _uuid:
|
|
return cluster
|
|
# be tolerant of /etc/ceph/ceph.conf without an fsid defined.
|
|
if len(no_fsid) == 1 and no_fsid[0] == 'ceph':
|
|
LOG.warning('No fsid defined in ' + SYSCONFDIR +
|
|
'/ceph.conf; using anyway')
|
|
return 'ceph'
|
|
return None
|
|
|
|
|
|
def activate(
|
|
path,
|
|
activate_key_template,
|
|
init,
|
|
):
|
|
|
|
check_osd_magic(path)
|
|
|
|
ceph_fsid = read_one_line(path, 'ceph_fsid')
|
|
if ceph_fsid is None:
|
|
raise Error('No cluster uuid assigned.')
|
|
LOG.debug('Cluster uuid is %s', ceph_fsid)
|
|
|
|
cluster = find_cluster_by_uuid(ceph_fsid)
|
|
if cluster is None:
|
|
raise Error('No cluster conf found in ' + SYSCONFDIR +
|
|
' with fsid %s' % ceph_fsid)
|
|
LOG.debug('Cluster name is %s', cluster)
|
|
|
|
fsid = read_one_line(path, 'fsid')
|
|
if fsid is None:
|
|
raise Error('No OSD uuid assigned.')
|
|
LOG.debug('OSD uuid is %s', fsid)
|
|
|
|
keyring = activate_key_template.format(cluster=cluster,
|
|
statedir=STATEDIR)
|
|
|
|
osd_id = get_osd_id(path)
|
|
if osd_id is None:
|
|
osd_id = allocate_osd_id(
|
|
cluster=cluster,
|
|
fsid=fsid,
|
|
keyring=keyring,
|
|
)
|
|
write_one_line(path, 'whoami', osd_id)
|
|
LOG.debug('OSD id is %s', osd_id)
|
|
|
|
if not os.path.exists(os.path.join(path, 'ready')):
|
|
LOG.debug('Initializing OSD...')
|
|
# re-running mkfs is safe, so just run until it completes
|
|
mkfs(
|
|
path=path,
|
|
cluster=cluster,
|
|
osd_id=osd_id,
|
|
fsid=fsid,
|
|
keyring=keyring,
|
|
)
|
|
|
|
if init not in (None, 'none'):
|
|
if init == 'auto':
|
|
conf_val = get_conf(
|
|
cluster=cluster,
|
|
variable='init'
|
|
)
|
|
if conf_val is not None:
|
|
init = conf_val
|
|
else:
|
|
init = init_get()
|
|
|
|
LOG.debug('Marking with init system %s', init)
|
|
with file(os.path.join(path, init), 'w'):
|
|
pass
|
|
|
|
# remove markers for others, just in case.
|
|
for other in INIT_SYSTEMS:
|
|
if other != init:
|
|
try:
|
|
os.unlink(os.path.join(path, other))
|
|
except OSError:
|
|
pass
|
|
|
|
if not os.path.exists(os.path.join(path, 'active')):
|
|
LOG.debug('Authorizing OSD key...')
|
|
auth_key(
|
|
path=path,
|
|
cluster=cluster,
|
|
osd_id=osd_id,
|
|
keyring=keyring,
|
|
)
|
|
write_one_line(path, 'active', 'ok')
|
|
LOG.debug('%s osd.%s data dir is ready at %s', cluster, osd_id, path)
|
|
return (osd_id, cluster)
|
|
|
|
|
|
def main_activate(args):
|
|
cluster = None
|
|
osd_id = None
|
|
|
|
if not os.path.exists(args.path):
|
|
raise Error('%s does not exist' % args.path)
|
|
|
|
if is_suppressed(args.path):
|
|
LOG.info('suppressed activate request on %s', args.path)
|
|
return
|
|
|
|
activate_lock.acquire() # noqa
|
|
try:
|
|
mode = os.stat(args.path).st_mode
|
|
if stat.S_ISBLK(mode):
|
|
if (is_partition(args.path) and
|
|
(get_partition_type(args.path) ==
|
|
PTYPE['mpath']['osd']['ready']) and
|
|
not is_mpath(args.path)):
|
|
raise Error('%s is not a multipath block device' %
|
|
args.path)
|
|
(cluster, osd_id) = mount_activate(
|
|
dev=args.path,
|
|
activate_key_template=args.activate_key_template,
|
|
init=args.mark_init,
|
|
dmcrypt=args.dmcrypt,
|
|
dmcrypt_key_dir=args.dmcrypt_key_dir,
|
|
reactivate=args.reactivate,
|
|
)
|
|
osd_data = get_mount_point(cluster, osd_id)
|
|
|
|
elif stat.S_ISDIR(mode):
|
|
(cluster, osd_id) = activate_dir(
|
|
path=args.path,
|
|
activate_key_template=args.activate_key_template,
|
|
init=args.mark_init,
|
|
)
|
|
osd_data = args.path
|
|
|
|
else:
|
|
raise Error('%s is not a directory or block device' % args.path)
|
|
|
|
if (not args.no_start_daemon and args.mark_init == 'none'):
|
|
command_check_call(
|
|
[
|
|
'ceph-osd',
|
|
'--cluster={cluster}'.format(cluster=cluster),
|
|
'--id={osd_id}'.format(osd_id=osd_id),
|
|
'--osd-data={path}'.format(path=osd_data),
|
|
'--osd-journal={path}/journal'.format(path=osd_data),
|
|
],
|
|
)
|
|
|
|
if (not args.no_start_daemon and
|
|
args.mark_init not in (None, 'none')):
|
|
|
|
start_daemon(
|
|
cluster=cluster,
|
|
osd_id=osd_id,
|
|
)
|
|
|
|
finally:
|
|
activate_lock.release() # noqa
|
|
|
|
|
|
###########################
|
|
|
|
def _mark_osd_out(cluster, osd_id):
|
|
LOG.info('Prepare to mark osd.%d out...', osd_id)
|
|
command([
|
|
'ceph',
|
|
'osd',
|
|
'out',
|
|
'osd.%d' % osd_id,
|
|
])
|
|
|
|
|
|
def _check_osd_status(cluster, osd_id):
|
|
"""
|
|
report the osd status:
|
|
00(0) : means OSD OUT AND DOWN
|
|
01(1) : means OSD OUT AND UP
|
|
10(2) : means OSD IN AND DOWN
|
|
11(3) : means OSD IN AND UP
|
|
"""
|
|
LOG.info("Checking osd id: %s ..." % osd_id)
|
|
found = False
|
|
status_code = 0
|
|
out, err, ret = command([
|
|
'ceph',
|
|
'osd',
|
|
'dump',
|
|
'--cluster={cluster}'.format(
|
|
cluster=cluster,
|
|
),
|
|
'--format',
|
|
'json',
|
|
])
|
|
out_json = json.loads(out)
|
|
for item in out_json[u'osds']:
|
|
if item.get(u'osd') == int(osd_id):
|
|
found = True
|
|
if item.get(u'in') is 1:
|
|
status_code += 2
|
|
if item.get(u'up') is 1:
|
|
status_code += 1
|
|
if not found:
|
|
raise Error('Could not osd.%s in osd tree!' % osd_id)
|
|
return status_code
|
|
|
|
|
|
def _remove_osd_directory_files(mounted_path, cluster):
|
|
"""
|
|
To remove the 'ready', 'active', INIT-specific files.
|
|
"""
|
|
if os.path.exists(os.path.join(mounted_path, 'ready')):
|
|
os.remove(os.path.join(mounted_path, 'ready'))
|
|
LOG.info('Remove `ready` file.')
|
|
else:
|
|
LOG.info('`ready` file is already removed.')
|
|
|
|
if os.path.exists(os.path.join(mounted_path, 'active')):
|
|
os.remove(os.path.join(mounted_path, 'active'))
|
|
LOG.info('Remove `active` file.')
|
|
else:
|
|
LOG.info('`active` file is already removed.')
|
|
|
|
# Just check `upstart` and `sysvinit` directly if filename is init-spec.
|
|
conf_val = get_conf(
|
|
cluster=cluster,
|
|
variable='init'
|
|
)
|
|
if conf_val is not None:
|
|
init = conf_val
|
|
else:
|
|
init = init_get()
|
|
os.remove(os.path.join(mounted_path, init))
|
|
LOG.info('Remove `%s` file.', init)
|
|
return
|
|
|
|
|
|
def main_deactivate(args):
|
|
activate_lock.acquire() # noqa
|
|
try:
|
|
main_deactivate_locked(args)
|
|
finally:
|
|
activate_lock.release() # noqa
|
|
|
|
|
|
def main_deactivate_locked(args):
|
|
osd_id = args.deactivate_by_id
|
|
path = args.path
|
|
target_dev = None
|
|
dmcrypt = False
|
|
devices = list_devices()
|
|
|
|
# list all devices and found we need
|
|
for device in devices:
|
|
if 'partitions' in device:
|
|
for dev_part in device.get('partitions'):
|
|
if (osd_id and
|
|
'whoami' in dev_part and
|
|
dev_part['whoami'] == osd_id):
|
|
target_dev = dev_part
|
|
elif (path and
|
|
'path' in dev_part and
|
|
dev_part['path'] == path):
|
|
target_dev = dev_part
|
|
if not target_dev:
|
|
raise Error('Cannot find any match device!!')
|
|
|
|
# set up all we need variable
|
|
osd_id = target_dev['whoami']
|
|
part_type = target_dev['ptype']
|
|
mounted_path = target_dev['mount']
|
|
if Ptype.is_dmcrypt(part_type, 'osd'):
|
|
dmcrypt = True
|
|
|
|
# Do not do anything if osd is already down.
|
|
status_code = _check_osd_status(args.cluster, osd_id)
|
|
if status_code == OSD_STATUS_IN_UP:
|
|
if args.mark_out is True:
|
|
_mark_osd_out(args.cluster, int(osd_id))
|
|
stop_daemon(args.cluster, osd_id)
|
|
elif status_code == OSD_STATUS_IN_DOWN:
|
|
if args.mark_out is True:
|
|
_mark_osd_out(args.cluster, int(osd_id))
|
|
LOG.info("OSD already out/down. Do not do anything now.")
|
|
return
|
|
elif status_code == OSD_STATUS_OUT_UP:
|
|
stop_daemon(args.cluster, osd_id)
|
|
elif status_code == OSD_STATUS_OUT_DOWN:
|
|
LOG.info("OSD already out/down. Do not do anything now.")
|
|
return
|
|
|
|
# remove 'ready', 'active', and INIT-specific files.
|
|
_remove_osd_directory_files(mounted_path, args.cluster)
|
|
|
|
# Write deactivate to osd directory!
|
|
with open(os.path.join(mounted_path, 'deactive'), 'w'):
|
|
path_set_context(os.path.join(mounted_path, 'deactive'))
|
|
|
|
unmount(mounted_path)
|
|
LOG.info("Umount `%s` successfully.", mounted_path)
|
|
|
|
if dmcrypt:
|
|
dmcrypt_unmap(target_dev['uuid'])
|
|
for name in Space.NAMES:
|
|
if name + '_uuid' in target_dev:
|
|
dmcrypt_unmap(target_dev[name + '_uuid'])
|
|
|
|
###########################
|
|
|
|
|
|
def _remove_from_crush_map(cluster, osd_id):
|
|
LOG.info("Prepare to remove osd.%s from crush map..." % osd_id)
|
|
command([
|
|
'ceph',
|
|
'osd',
|
|
'crush',
|
|
'remove',
|
|
'osd.%s' % osd_id,
|
|
])
|
|
|
|
|
|
def _delete_osd_auth_key(cluster, osd_id):
|
|
LOG.info("Prepare to delete osd.%s cephx key..." % osd_id)
|
|
command([
|
|
'ceph',
|
|
'auth',
|
|
'del',
|
|
'osd.%s' % osd_id,
|
|
])
|
|
|
|
|
|
def _deallocate_osd_id(cluster, osd_id):
|
|
LOG.info("Prepare to deallocate the osd-id: %s..." % osd_id)
|
|
command([
|
|
'ceph',
|
|
'osd',
|
|
'rm',
|
|
'%s' % osd_id,
|
|
])
|
|
|
|
|
|
def destroy_lookup_device(args, predicate, description):
|
|
devices = list_devices()
|
|
for device in devices:
|
|
for partition in device.get('partitions', []):
|
|
if partition['dmcrypt']:
|
|
dmcrypt_path = dmcrypt_map(partition['path'],
|
|
args.dmcrypt_key_dir)
|
|
list_dev_osd(dmcrypt_path, {}, partition)
|
|
dmcrypt_unmap(partition['uuid'])
|
|
if predicate(partition):
|
|
return partition
|
|
raise Error('found no device matching ', description)
|
|
|
|
|
|
def main_destroy(args):
|
|
osd_id = args.destroy_by_id
|
|
path = args.path
|
|
dmcrypt = False
|
|
target_dev = None
|
|
|
|
if path:
|
|
if not is_partition(path):
|
|
raise Error(path + " must be a partition device")
|
|
path = os.path.realpath(path)
|
|
|
|
if path:
|
|
target_dev = destroy_lookup_device(
|
|
args, lambda x: x.get('path') == path,
|
|
path)
|
|
elif osd_id:
|
|
target_dev = destroy_lookup_device(
|
|
args, lambda x: x.get('whoami') == osd_id,
|
|
'osd id ' + str(osd_id))
|
|
|
|
osd_id = target_dev['whoami']
|
|
dev_path = target_dev['path']
|
|
if target_dev['ptype'] == PTYPE['mpath']['osd']['ready']:
|
|
base_dev = get_partition_base_mpath(dev_path)
|
|
else:
|
|
base_dev = get_partition_base(dev_path)
|
|
|
|
# Before osd deactivate, we cannot destroy it
|
|
status_code = _check_osd_status(args.cluster, osd_id)
|
|
if status_code != OSD_STATUS_OUT_DOWN and \
|
|
status_code != OSD_STATUS_IN_DOWN:
|
|
raise Error("Could not destroy the active osd. (osd-id: %s)" %
|
|
osd_id)
|
|
|
|
# Remove OSD from crush map
|
|
_remove_from_crush_map(args.cluster, osd_id)
|
|
|
|
# Remove OSD cephx key
|
|
_delete_osd_auth_key(args.cluster, osd_id)
|
|
|
|
# Deallocate OSD ID
|
|
_deallocate_osd_id(args.cluster, osd_id)
|
|
|
|
# we remove the crypt map and device mapper (if dmcrypt is True)
|
|
if dmcrypt:
|
|
for name in Space.NAMES:
|
|
if target_dev.get(name + '_uuid'):
|
|
dmcrypt_unmap(target_dev[name + '_uuid'])
|
|
|
|
# Check zap flag. If we found zap flag, we need to find device for
|
|
# destroy this osd data.
|
|
if args.zap is True:
|
|
# erase the osd data
|
|
LOG.info("Prepare to zap the device %s" % base_dev)
|
|
zap(base_dev)
|
|
|
|
|
|
def get_space_osd_uuid(name, path):
|
|
if not os.path.exists(path):
|
|
raise Error('%s does not exist' % path)
|
|
|
|
mode = os.stat(path).st_mode
|
|
if not stat.S_ISBLK(mode):
|
|
raise Error('%s is not a block device' % path)
|
|
|
|
if (is_partition(path) and
|
|
get_partition_type(path) in (PTYPE['mpath']['journal']['ready'],
|
|
PTYPE['mpath']['block']['ready']) and
|
|
not is_mpath(path)):
|
|
raise Error('%s is not a multipath block device' %
|
|
path)
|
|
|
|
try:
|
|
out = _check_output(
|
|
args=[
|
|
'ceph-osd',
|
|
'--get-device-fsid',
|
|
path,
|
|
],
|
|
close_fds=True,
|
|
)
|
|
except subprocess.CalledProcessError as e:
|
|
raise Error(
|
|
'failed to get osd uuid/fsid from %s' % name,
|
|
e,
|
|
)
|
|
value = str(out).split('\n', 1)[0]
|
|
LOG.debug('%s %s has OSD UUID %s', name.capitalize(), path, value)
|
|
return value
|
|
|
|
|
|
def main_activate_space(name, args):
|
|
if not os.path.exists(args.dev):
|
|
raise Error('%s does not exist' % args.dev)
|
|
|
|
cluster = None
|
|
osd_id = None
|
|
osd_uuid = None
|
|
dev = None
|
|
activate_lock.acquire() # noqa
|
|
try:
|
|
if args.dmcrypt:
|
|
dev = dmcrypt_map(args.dev, args.dmcrypt_key_dir)
|
|
else:
|
|
dev = args.dev
|
|
# FIXME: For an encrypted journal dev, does this return the
|
|
# cyphertext or plaintext dev uuid!? Also, if the journal is
|
|
# encrypted, is the data partition also always encrypted, or
|
|
# are mixed pairs supported!?
|
|
osd_uuid = get_space_osd_uuid(name, dev)
|
|
path = os.path.join('/dev/disk/by-partuuid/', osd_uuid.lower())
|
|
|
|
if is_suppressed(path):
|
|
LOG.info('suppressed activate request on %s', path)
|
|
return
|
|
|
|
(cluster, osd_id) = mount_activate(
|
|
dev=path,
|
|
activate_key_template=args.activate_key_template,
|
|
init=args.mark_init,
|
|
dmcrypt=args.dmcrypt,
|
|
dmcrypt_key_dir=args.dmcrypt_key_dir,
|
|
reactivate=args.reactivate,
|
|
)
|
|
|
|
start_daemon(
|
|
cluster=cluster,
|
|
osd_id=osd_id,
|
|
)
|
|
|
|
finally:
|
|
activate_lock.release() # noqa
|
|
|
|
|
|
###########################
|
|
|
|
|
|
def main_activate_all(args):
|
|
dir = '/dev/disk/by-parttypeuuid'
|
|
LOG.debug('Scanning %s', dir)
|
|
if not os.path.exists(dir):
|
|
return
|
|
err = False
|
|
for name in os.listdir(dir):
|
|
if name.find('.') < 0:
|
|
continue
|
|
(tag, uuid) = name.split('.')
|
|
|
|
if tag in Ptype.get_ready_by_name('osd'):
|
|
|
|
if Ptype.is_dmcrypt(tag, 'osd'):
|
|
path = os.path.join('/dev/mapper', uuid)
|
|
else:
|
|
path = os.path.join(dir, name)
|
|
|
|
if is_suppressed(path):
|
|
LOG.info('suppressed activate request on %s', path)
|
|
continue
|
|
|
|
LOG.info('Activating %s', path)
|
|
activate_lock.acquire() # noqa
|
|
try:
|
|
# never map dmcrypt cyphertext devices
|
|
(cluster, osd_id) = mount_activate(
|
|
dev=path,
|
|
activate_key_template=args.activate_key_template,
|
|
init=args.mark_init,
|
|
dmcrypt=False,
|
|
dmcrypt_key_dir='',
|
|
)
|
|
start_daemon(
|
|
cluster=cluster,
|
|
osd_id=osd_id,
|
|
)
|
|
|
|
except Exception as e:
|
|
print >> sys.stderr, '{prog}: {msg}'.format(
|
|
prog=args.prog,
|
|
msg=e,
|
|
)
|
|
err = True
|
|
|
|
finally:
|
|
activate_lock.release() # noqa
|
|
if err:
|
|
raise Error('One or more partitions failed to activate')
|
|
|
|
|
|
###########################
|
|
|
|
def is_swap(dev):
|
|
dev = os.path.realpath(dev)
|
|
with file('/proc/swaps', 'rb') as proc_swaps:
|
|
for line in proc_swaps.readlines()[1:]:
|
|
fields = line.split()
|
|
if len(fields) < 3:
|
|
continue
|
|
swaps_dev = fields[0]
|
|
if swaps_dev.startswith('/') and os.path.exists(swaps_dev):
|
|
swaps_dev = os.path.realpath(swaps_dev)
|
|
if swaps_dev == dev:
|
|
return True
|
|
return False
|
|
|
|
|
|
def get_oneliner(base, name):
|
|
path = os.path.join(base, name)
|
|
if os.path.isfile(path):
|
|
with open(path, 'r') as _file:
|
|
return _file.readline().rstrip()
|
|
return None
|
|
|
|
|
|
def get_dev_fs(dev):
|
|
fscheck, _, _ = command(
|
|
[
|
|
'blkid',
|
|
'-s',
|
|
'TYPE',
|
|
dev,
|
|
],
|
|
)
|
|
if 'TYPE' in fscheck:
|
|
fstype = fscheck.split()[1].split('"')[1]
|
|
return fstype
|
|
else:
|
|
return None
|
|
|
|
|
|
def split_dev_base_partnum(dev):
|
|
if is_mpath(dev):
|
|
partnum = partnum_mpath(dev)
|
|
base = get_partition_base_mpath(dev)
|
|
else:
|
|
b = block_path(dev)
|
|
partnum = open(os.path.join(b, 'partition')).read().strip()
|
|
base = get_partition_base(dev)
|
|
return (base, partnum)
|
|
|
|
|
|
def get_partition_type(part):
|
|
return get_blkid_partition_info(part, 'ID_PART_ENTRY_TYPE')
|
|
|
|
|
|
def get_partition_uuid(part):
|
|
return get_blkid_partition_info(part, 'ID_PART_ENTRY_UUID')
|
|
|
|
|
|
def get_blkid_partition_info(dev, what=None):
|
|
out, _, _ = command(
|
|
[
|
|
'blkid',
|
|
'-o',
|
|
'udev',
|
|
'-p',
|
|
dev,
|
|
]
|
|
)
|
|
p = {}
|
|
for line in out.splitlines():
|
|
(key, value) = line.split('=')
|
|
p[key] = value
|
|
if what:
|
|
return p.get(what)
|
|
else:
|
|
return p
|
|
|
|
|
|
def more_osd_info(path, uuid_map, desc):
|
|
desc['ceph_fsid'] = get_oneliner(path, 'ceph_fsid')
|
|
if desc['ceph_fsid']:
|
|
desc['cluster'] = find_cluster_by_uuid(desc['ceph_fsid'])
|
|
desc['whoami'] = get_oneliner(path, 'whoami')
|
|
for name in Space.NAMES:
|
|
uuid = get_oneliner(path, name + '_uuid')
|
|
if uuid:
|
|
desc[name + '_uuid'] = uuid.lower()
|
|
if desc[name + '_uuid'] in uuid_map:
|
|
desc[name + '_dev'] = uuid_map[desc[name + '_uuid']]
|
|
|
|
|
|
def list_dev_osd(dev, uuid_map, desc):
|
|
desc['mount'] = is_mounted(dev)
|
|
desc['fs_type'] = get_dev_fs(dev)
|
|
desc['state'] = 'unprepared'
|
|
if desc['mount']:
|
|
desc['state'] = 'active'
|
|
more_osd_info(desc['mount'], uuid_map, desc)
|
|
elif desc['fs_type']:
|
|
try:
|
|
tpath = mount(dev=dev, fstype=desc['fs_type'], options='')
|
|
if tpath:
|
|
try:
|
|
magic = get_oneliner(tpath, 'magic')
|
|
if magic is not None:
|
|
desc['magic'] = magic
|
|
desc['state'] = 'prepared'
|
|
more_osd_info(tpath, uuid_map, desc)
|
|
finally:
|
|
unmount(tpath)
|
|
except MountError:
|
|
pass
|
|
|
|
|
|
def list_format_more_osd_info_plain(dev):
|
|
desc = []
|
|
if dev.get('ceph_fsid'):
|
|
if dev.get('cluster'):
|
|
desc.append('cluster ' + dev['cluster'])
|
|
else:
|
|
desc.append('unknown cluster ' + dev['ceph_fsid'])
|
|
if dev.get('whoami'):
|
|
desc.append('osd.%s' % dev['whoami'])
|
|
for name in Space.NAMES:
|
|
if dev.get(name + '_dev'):
|
|
desc.append(name + ' %s' % dev[name + '_dev'])
|
|
return desc
|
|
|
|
|
|
def list_format_dev_plain(dev, prefix=''):
|
|
desc = []
|
|
if dev['ptype'] == PTYPE['regular']['osd']['ready']:
|
|
desc = (['ceph data', dev['state']] +
|
|
list_format_more_osd_info_plain(dev))
|
|
elif Ptype.is_dmcrypt(dev['ptype'], 'osd'):
|
|
dmcrypt = dev['dmcrypt']
|
|
if not dmcrypt['holders']:
|
|
desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
|
|
'not currently mapped']
|
|
elif len(dmcrypt['holders']) == 1:
|
|
holder = get_dev_path(dmcrypt['holders'][0])
|
|
desc = ['ceph data (dmcrypt %s %s)' %
|
|
(dmcrypt['type'], holder)]
|
|
desc += list_format_more_osd_info_plain(dev)
|
|
else:
|
|
desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
|
|
'holders: ' + ','.join(dmcrypt['holders'])]
|
|
elif Ptype.is_regular_space(dev['ptype']):
|
|
name = Ptype.space_ptype_to_name(dev['ptype'])
|
|
desc.append('ceph ' + name)
|
|
if dev.get(name + '_for'):
|
|
desc.append('for %s' % dev[name + '_for'])
|
|
elif Ptype.is_dmcrypt_space(dev['ptype']):
|
|
name = Ptype.space_ptype_to_name(dev['ptype'])
|
|
dmcrypt = dev['dmcrypt']
|
|
if dmcrypt['holders'] and len(dmcrypt['holders']) == 1:
|
|
holder = get_dev_path(dmcrypt['holders'][0])
|
|
desc = ['ceph ' + name + ' (dmcrypt %s %s)' %
|
|
(dmcrypt['type'], holder)]
|
|
else:
|
|
desc = ['ceph ' + name + ' (dmcrypt %s)' % dmcrypt['type']]
|
|
if dev.get(name + '_for'):
|
|
desc.append('for %s' % dev[name + '_for'])
|
|
else:
|
|
desc.append(dev['type'])
|
|
if dev.get('fs_type'):
|
|
desc.append(dev['fs_type'])
|
|
elif dev.get('ptype'):
|
|
desc.append(dev['ptype'])
|
|
if dev.get('mount'):
|
|
desc.append('mounted on %s' % dev['mount'])
|
|
return '%s%s %s' % (prefix, dev['path'], ', '.join(desc))
|
|
|
|
|
|
def list_format_plain(devices):
|
|
lines = []
|
|
for device in devices:
|
|
if device.get('partitions'):
|
|
lines.append('%s :' % device['path'])
|
|
for p in sorted(device['partitions']):
|
|
lines.append(list_format_dev_plain(dev=p,
|
|
prefix=' '))
|
|
else:
|
|
lines.append(list_format_dev_plain(dev=device,
|
|
prefix=''))
|
|
return "\n".join(lines)
|
|
|
|
|
|
def list_dev(dev, uuid_map, space_map):
|
|
info = {
|
|
'path': dev,
|
|
'dmcrypt': {},
|
|
}
|
|
|
|
info['is_partition'] = is_partition(dev)
|
|
if info['is_partition']:
|
|
ptype = get_partition_type(dev)
|
|
info['uuid'] = get_partition_uuid(dev)
|
|
else:
|
|
ptype = 'unknown'
|
|
info['ptype'] = ptype
|
|
LOG.info("list_dev(dev = " + dev + ", ptype = " + str(ptype) + ")")
|
|
if ptype in (PTYPE['regular']['osd']['ready'],
|
|
PTYPE['mpath']['osd']['ready']):
|
|
info['type'] = 'data'
|
|
if ptype == PTYPE['mpath']['osd']['ready']:
|
|
info['multipath'] = True
|
|
list_dev_osd(dev, uuid_map, info)
|
|
elif ptype == PTYPE['plain']['osd']['ready']:
|
|
holders = is_held(dev)
|
|
info['type'] = 'data'
|
|
info['dmcrypt']['holders'] = holders
|
|
info['dmcrypt']['type'] = 'plain'
|
|
if len(holders) == 1:
|
|
list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
|
|
elif ptype == PTYPE['luks']['osd']['ready']:
|
|
holders = is_held(dev)
|
|
info['type'] = 'data'
|
|
info['dmcrypt']['holders'] = holders
|
|
info['dmcrypt']['type'] = 'LUKS'
|
|
if len(holders) == 1:
|
|
list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
|
|
elif Ptype.is_regular_space(ptype) or Ptype.is_mpath_space(ptype):
|
|
name = Ptype.space_ptype_to_name(ptype)
|
|
info['type'] = name
|
|
if ptype == PTYPE['mpath'][name]['ready']:
|
|
info['multipath'] = True
|
|
if info.get('uuid') in space_map:
|
|
info[name + '_for'] = space_map[info['uuid']]
|
|
elif Ptype.is_plain_space(ptype):
|
|
name = Ptype.space_ptype_to_name(ptype)
|
|
holders = is_held(dev)
|
|
info['type'] = name
|
|
info['dmcrypt']['type'] = 'plain'
|
|
info['dmcrypt']['holders'] = holders
|
|
if info.get('uuid') in space_map:
|
|
info[name + '_for'] = space_map[info['uuid']]
|
|
elif Ptype.is_luks_space(ptype):
|
|
name = Ptype.space_ptype_to_name(ptype)
|
|
holders = is_held(dev)
|
|
info['type'] = name
|
|
info['dmcrypt']['type'] = 'LUKS'
|
|
info['dmcrypt']['holders'] = holders
|
|
if info.get('uuid') in space_map:
|
|
info[name + '_for'] = space_map[info['uuid']]
|
|
else:
|
|
path = is_mounted(dev)
|
|
fs_type = get_dev_fs(dev)
|
|
if is_swap(dev):
|
|
info['type'] = 'swap'
|
|
else:
|
|
info['type'] = 'other'
|
|
if fs_type:
|
|
info['fs_type'] = fs_type
|
|
if path:
|
|
info['mount'] = path
|
|
|
|
return info
|
|
|
|
|
|
def list_devices():
|
|
partmap = list_all_partitions()
|
|
|
|
uuid_map = {}
|
|
space_map = {}
|
|
for base, parts in sorted(partmap.iteritems()):
|
|
for p in parts:
|
|
dev = get_dev_path(p)
|
|
part_uuid = get_partition_uuid(dev)
|
|
if part_uuid:
|
|
uuid_map[part_uuid] = dev
|
|
ptype = get_partition_type(dev)
|
|
LOG.debug("main_list: " + dev +
|
|
" ptype = " + str(ptype) +
|
|
" uuid = " + str(part_uuid))
|
|
if ptype in Ptype.get_ready_by_name('osd'):
|
|
if Ptype.is_dmcrypt(ptype, 'osd'):
|
|
holders = is_held(dev)
|
|
if len(holders) != 1:
|
|
continue
|
|
dev_to_mount = get_dev_path(holders[0])
|
|
else:
|
|
dev_to_mount = dev
|
|
|
|
fs_type = get_dev_fs(dev_to_mount)
|
|
if fs_type is not None:
|
|
try:
|
|
tpath = mount(dev=dev_to_mount,
|
|
fstype=fs_type, options='')
|
|
try:
|
|
for name in Space.NAMES:
|
|
space_uuid = get_oneliner(tpath,
|
|
name + '_uuid')
|
|
if space_uuid:
|
|
space_map[space_uuid.lower()] = dev
|
|
finally:
|
|
unmount(tpath)
|
|
except MountError:
|
|
pass
|
|
|
|
LOG.debug("main_list: " + str(partmap) + ", uuid_map = " +
|
|
str(uuid_map) + ", space_map = " + str(space_map))
|
|
|
|
devices = []
|
|
for base, parts in sorted(partmap.iteritems()):
|
|
if parts:
|
|
disk = {'path': get_dev_path(base)}
|
|
partitions = []
|
|
for p in sorted(parts):
|
|
partitions.append(list_dev(get_dev_path(p),
|
|
uuid_map,
|
|
space_map))
|
|
disk['partitions'] = partitions
|
|
devices.append(disk)
|
|
else:
|
|
device = list_dev(get_dev_path(base), uuid_map, space_map)
|
|
device['path'] = get_dev_path(base)
|
|
devices.append(device)
|
|
LOG.debug("list_devices: " + str(devices))
|
|
return devices
|
|
|
|
|
|
def main_list(args):
|
|
devices = list_devices()
|
|
if args.path:
|
|
paths = []
|
|
for path in args.path:
|
|
if os.path.exists(path):
|
|
paths.append(os.path.realpath(path))
|
|
else:
|
|
paths.append(path)
|
|
selected_devices = []
|
|
for device in devices:
|
|
for path in paths:
|
|
if re.search(path + '$', device['path']):
|
|
selected_devices.append(device)
|
|
else:
|
|
selected_devices = devices
|
|
if args.format == 'json':
|
|
print json.dumps(selected_devices)
|
|
else:
|
|
output = list_format_plain(selected_devices)
|
|
if output:
|
|
print output
|
|
|
|
|
|
###########################
|
|
#
|
|
# Mark devices that we want to suppress activates on with a
|
|
# file like
|
|
#
|
|
# /var/lib/ceph/tmp/suppress-activate.sdb
|
|
#
|
|
# where the last bit is the sanitized device name (/dev/X without the
|
|
# /dev/ prefix) and the is_suppress() check matches a prefix. That
|
|
# means suppressing sdb will stop activate on sdb1, sdb2, etc.
|
|
#
|
|
|
|
def is_suppressed(path):
|
|
disk = os.path.realpath(path)
|
|
try:
|
|
if (not disk.startswith('/dev/') or
|
|
not stat.S_ISBLK(os.lstat(disk).st_mode)):
|
|
return False
|
|
base = get_dev_name(disk)
|
|
while len(base):
|
|
if os.path.exists(SUPPRESS_PREFIX + base): # noqa
|
|
return True
|
|
base = base[:-1]
|
|
except:
|
|
return False
|
|
|
|
|
|
def set_suppress(path):
|
|
disk = os.path.realpath(path)
|
|
if not os.path.exists(disk):
|
|
raise Error('does not exist', path)
|
|
if not stat.S_ISBLK(os.lstat(path).st_mode):
|
|
raise Error('not a block device', path)
|
|
base = get_dev_name(disk)
|
|
|
|
with file(SUPPRESS_PREFIX + base, 'w') as f: # noqa
|
|
pass
|
|
LOG.info('set suppress flag on %s', base)
|
|
|
|
|
|
def unset_suppress(path):
|
|
disk = os.path.realpath(path)
|
|
if not os.path.exists(disk):
|
|
raise Error('does not exist', path)
|
|
if not stat.S_ISBLK(os.lstat(path).st_mode):
|
|
raise Error('not a block device', path)
|
|
assert disk.startswith('/dev/')
|
|
base = get_dev_name(disk)
|
|
|
|
fn = SUPPRESS_PREFIX + base # noqa
|
|
if not os.path.exists(fn):
|
|
raise Error('not marked as suppressed', path)
|
|
|
|
try:
|
|
os.unlink(fn)
|
|
LOG.info('unset suppress flag on %s', base)
|
|
except OSError as e:
|
|
raise Error('failed to unsuppress', e)
|
|
|
|
|
|
def main_suppress(args):
|
|
set_suppress(args.path)
|
|
|
|
|
|
def main_unsuppress(args):
|
|
unset_suppress(args.path)
|
|
|
|
|
|
def main_zap(args):
|
|
for dev in args.dev:
|
|
zap(dev)
|
|
|
|
|
|
def main_trigger(args):
|
|
LOG.debug("main_trigger: " + str(args))
|
|
if is_systemd() and not args.sync:
|
|
# http://www.freedesktop.org/software/systemd/man/systemd-escape.html
|
|
escaped_dev = args.dev[1:].replace('-', '\\x2d')
|
|
service = 'ceph-disk@{dev}.service'.format(dev=escaped_dev)
|
|
LOG.info('systemd detected, triggering %s' % service)
|
|
command(
|
|
[
|
|
'systemctl',
|
|
'--no-block',
|
|
'restart',
|
|
service,
|
|
]
|
|
)
|
|
return
|
|
if is_upstart() and not args.sync:
|
|
LOG.info('upstart detected, triggering ceph-disk task')
|
|
command(
|
|
[
|
|
'initctl',
|
|
'emit',
|
|
'ceph-disk',
|
|
'dev={dev}'.format(dev=args.dev),
|
|
'pid={pid}'.format(pid=os.getpid()),
|
|
]
|
|
)
|
|
return
|
|
|
|
parttype = get_partition_type(args.dev)
|
|
partid = get_partition_uuid(args.dev)
|
|
|
|
LOG.info('trigger {dev} parttype {parttype} uuid {partid}'.format(
|
|
dev=args.dev,
|
|
parttype=parttype,
|
|
partid=partid,
|
|
))
|
|
|
|
if parttype in (PTYPE['regular']['osd']['ready'],
|
|
PTYPE['mpath']['osd']['ready']):
|
|
command(
|
|
[
|
|
'ceph-disk',
|
|
'activate',
|
|
args.dev,
|
|
]
|
|
)
|
|
elif parttype in (PTYPE['regular']['journal']['ready'],
|
|
PTYPE['mpath']['journal']['ready']):
|
|
command(
|
|
[
|
|
'ceph-disk',
|
|
'activate-journal',
|
|
args.dev,
|
|
]
|
|
)
|
|
|
|
# journals are easy: map, chown, activate-journal
|
|
elif parttype == PTYPE['plain']['journal']['ready']:
|
|
command(
|
|
[
|
|
'/sbin/cryptsetup',
|
|
'--key-file',
|
|
'/etc/ceph/dmcrypt-keys/{partid}'.format(partid=partid),
|
|
'--key-size',
|
|
'256',
|
|
'create',
|
|
partid,
|
|
args.dev,
|
|
]
|
|
)
|
|
newdev = '/dev/mapper/' + partid
|
|
count = 0
|
|
while not os.path.exists(newdev) and count <= 10:
|
|
time.sleep(1)
|
|
count += 1
|
|
command(
|
|
[
|
|
'/bin/chown',
|
|
'ceph:ceph',
|
|
newdev,
|
|
]
|
|
)
|
|
command(
|
|
[
|
|
'/usr/sbin/ceph-disk',
|
|
'activate-journal',
|
|
newdev,
|
|
]
|
|
)
|
|
elif parttype == PTYPE['luks']['journal']['ready']:
|
|
command(
|
|
[
|
|
'/sbin/cryptsetup',
|
|
'--key-file',
|
|
'/etc/ceph/dmcrypt-keys/{partid}.luks.key'.format(
|
|
partid=partid),
|
|
'luksOpen',
|
|
args.dev,
|
|
partid,
|
|
]
|
|
)
|
|
newdev = '/dev/mapper/' + partid
|
|
count = 0
|
|
while not os.path.exists(newdev) and count <= 10:
|
|
time.sleep(1)
|
|
count += 1
|
|
command(
|
|
[
|
|
'/bin/chown',
|
|
'ceph:ceph',
|
|
newdev,
|
|
]
|
|
)
|
|
command(
|
|
[
|
|
'/usr/sbin/ceph-disk',
|
|
'activate-journal',
|
|
newdev,
|
|
]
|
|
)
|
|
|
|
elif parttype in (PTYPE['regular']['block']['ready'],
|
|
PTYPE['mpath']['block']['ready']):
|
|
command(
|
|
[
|
|
'ceph-disk',
|
|
'activate-block',
|
|
args.dev,
|
|
]
|
|
)
|
|
|
|
# blocks are easy: map, chown, activate-block
|
|
elif parttype == PTYPE['plain']['block']['ready']:
|
|
command(
|
|
[
|
|
'/sbin/cryptsetup',
|
|
'--key-file',
|
|
'/etc/ceph/dmcrypt-keys/{partid}'.format(partid=partid),
|
|
'--key-size',
|
|
'256',
|
|
'create',
|
|
partid,
|
|
args.dev,
|
|
]
|
|
)
|
|
newdev = '/dev/mapper/' + partid
|
|
count = 0
|
|
while not os.path.exists(newdev) and count <= 10:
|
|
time.sleep(1)
|
|
count += 1
|
|
command(
|
|
[
|
|
'/bin/chown',
|
|
'ceph:ceph',
|
|
newdev,
|
|
]
|
|
)
|
|
command(
|
|
[
|
|
'/usr/sbin/ceph-disk',
|
|
'activate-block',
|
|
newdev,
|
|
]
|
|
)
|
|
elif parttype == PTYPE['luks']['block']['ready']:
|
|
command(
|
|
[
|
|
'/sbin/cryptsetup',
|
|
'--key-file',
|
|
'/etc/ceph/dmcrypt-keys/{partid}.luks.key'.format(
|
|
partid=partid),
|
|
'luksOpen',
|
|
args.dev,
|
|
partid,
|
|
]
|
|
)
|
|
newdev = '/dev/mapper/' + partid
|
|
count = 0
|
|
while not os.path.exists(newdev) and count <= 10:
|
|
time.sleep(1)
|
|
count += 1
|
|
command(
|
|
[
|
|
'/bin/chown',
|
|
'ceph:ceph',
|
|
newdev,
|
|
]
|
|
)
|
|
command(
|
|
[
|
|
'/usr/sbin/ceph-disk',
|
|
'activate-block',
|
|
newdev,
|
|
]
|
|
)
|
|
|
|
# osd data: map, activate
|
|
elif parttype == PTYPE['plain']['osd']['ready']:
|
|
command(
|
|
[
|
|
'/sbin/cryptsetup',
|
|
'--key-file',
|
|
'/etc/ceph/dmcrypt-keys/{partid}'.format(partid=partid),
|
|
'--key-size',
|
|
'256',
|
|
'create',
|
|
partid,
|
|
args.dev,
|
|
]
|
|
)
|
|
newdev = '/dev/mapper/' + partid
|
|
count = 0
|
|
while not os.path.exists(newdev) and count <= 10:
|
|
time.sleep(1)
|
|
count += 1
|
|
command(
|
|
[
|
|
'/usr/sbin/ceph-disk',
|
|
'activate',
|
|
newdev,
|
|
]
|
|
)
|
|
|
|
elif parttype == PTYPE['luks']['osd']['ready']:
|
|
command(
|
|
[
|
|
'/sbin/cryptsetup',
|
|
'--key-file',
|
|
'/etc/ceph/dmcrypt-keys/{partid}.luks.key'.format(
|
|
partid=partid),
|
|
'luksOpen',
|
|
args.dev,
|
|
partid,
|
|
]
|
|
)
|
|
newdev = '/dev/mapper/' + partid
|
|
count = 0
|
|
while not os.path.exists(newdev) and count <= 10:
|
|
time.sleep(1)
|
|
count += 1
|
|
command(
|
|
[
|
|
'/usr/sbin/ceph-disk',
|
|
'activate',
|
|
newdev,
|
|
]
|
|
)
|
|
|
|
else:
|
|
raise Error('unrecognized partition type %s' % parttype)
|
|
|
|
|
|
def setup_statedir(dir):
|
|
# XXX The following use of globals makes linting
|
|
# really hard. Global state in Python is iffy and
|
|
# should be avoided.
|
|
global STATEDIR
|
|
STATEDIR = dir
|
|
|
|
if not os.path.exists(STATEDIR):
|
|
os.mkdir(STATEDIR)
|
|
if not os.path.exists(STATEDIR + "/tmp"):
|
|
os.mkdir(STATEDIR + "/tmp")
|
|
|
|
global prepare_lock
|
|
prepare_lock = filelock(STATEDIR + '/tmp/ceph-disk.prepare.lock')
|
|
|
|
global activate_lock
|
|
activate_lock = filelock(STATEDIR + '/tmp/ceph-disk.activate.lock')
|
|
|
|
global SUPPRESS_PREFIX
|
|
SUPPRESS_PREFIX = STATEDIR + '/tmp/suppress-activate.'
|
|
|
|
|
|
def setup_sysconfdir(dir):
|
|
global SYSCONFDIR
|
|
SYSCONFDIR = dir
|
|
|
|
|
|
def parse_args(argv):
|
|
parser = argparse.ArgumentParser(
|
|
'ceph-disk',
|
|
)
|
|
parser.add_argument(
|
|
'-v', '--verbose',
|
|
action='store_true', default=None,
|
|
help='be more verbose',
|
|
)
|
|
parser.add_argument(
|
|
'--log-stdout',
|
|
action='store_true', default=None,
|
|
help='log to stdout',
|
|
)
|
|
parser.add_argument(
|
|
'--prepend-to-path',
|
|
metavar='PATH',
|
|
default='/usr/bin',
|
|
help=('prepend PATH to $PATH for backward compatibility '
|
|
'(default /usr/bin)'),
|
|
)
|
|
parser.add_argument(
|
|
'--statedir',
|
|
metavar='PATH',
|
|
default='/var/lib/ceph',
|
|
help=('directory in which ceph state is preserved '
|
|
'(default /var/lib/ceph)'),
|
|
)
|
|
parser.add_argument(
|
|
'--sysconfdir',
|
|
metavar='PATH',
|
|
default='/etc/ceph',
|
|
help=('directory in which ceph configuration files are found '
|
|
'(default /etc/ceph)'),
|
|
)
|
|
parser.add_argument(
|
|
'--setuser',
|
|
metavar='USER',
|
|
default=None,
|
|
help='use the given user for subprocesses, rather than ceph or root'
|
|
)
|
|
parser.add_argument(
|
|
'--setgroup',
|
|
metavar='GROUP',
|
|
default=None,
|
|
help='use the given group for subprocesses, rather than ceph or root'
|
|
)
|
|
parser.set_defaults(
|
|
# we want to hold on to this, for later
|
|
prog=parser.prog,
|
|
)
|
|
|
|
subparsers = parser.add_subparsers(
|
|
title='subcommands',
|
|
description='valid subcommands',
|
|
help='sub-command help',
|
|
)
|
|
|
|
Prepare.set_subparser(subparsers)
|
|
make_activate_parser(subparsers)
|
|
make_activate_block_parser(subparsers)
|
|
make_activate_journal_parser(subparsers)
|
|
make_activate_all_parser(subparsers)
|
|
make_list_parser(subparsers)
|
|
make_suppress_parser(subparsers)
|
|
make_deactivate_parser(subparsers)
|
|
make_destroy_parser(subparsers)
|
|
make_zap_parser(subparsers)
|
|
make_trigger_parser(subparsers)
|
|
|
|
args = parser.parse_args(argv)
|
|
return args
|
|
|
|
|
|
def make_trigger_parser(subparsers):
|
|
trigger_parser = subparsers.add_parser(
|
|
'trigger',
|
|
help='Trigger an event (caled by udev)')
|
|
trigger_parser.add_argument(
|
|
'dev',
|
|
help=('device'),
|
|
)
|
|
trigger_parser.add_argument(
|
|
'--sync',
|
|
action='store_true', default=None,
|
|
help=('do operation synchronously; do not trigger systemd'),
|
|
)
|
|
trigger_parser.set_defaults(
|
|
func=main_trigger,
|
|
)
|
|
return trigger_parser
|
|
|
|
|
|
def make_activate_parser(subparsers):
|
|
activate_parser = subparsers.add_parser(
|
|
'activate',
|
|
help='Activate a Ceph OSD')
|
|
activate_parser.add_argument(
|
|
'--mount',
|
|
action='store_true', default=None,
|
|
help='mount a block device [deprecated, ignored]',
|
|
)
|
|
activate_parser.add_argument(
|
|
'--activate-key',
|
|
metavar='PATH',
|
|
help='bootstrap-osd keyring path template (%(default)s)',
|
|
dest='activate_key_template',
|
|
)
|
|
activate_parser.add_argument(
|
|
'--mark-init',
|
|
metavar='INITSYSTEM',
|
|
help='init system to manage this dir',
|
|
default='auto',
|
|
choices=INIT_SYSTEMS,
|
|
)
|
|
activate_parser.add_argument(
|
|
'--no-start-daemon',
|
|
action='store_true', default=None,
|
|
help='do not start the daemon',
|
|
)
|
|
activate_parser.add_argument(
|
|
'path',
|
|
metavar='PATH',
|
|
help='path to block device or directory',
|
|
)
|
|
activate_parser.add_argument(
|
|
'--dmcrypt',
|
|
action='store_true', default=None,
|
|
help='map DATA and/or JOURNAL devices with dm-crypt',
|
|
)
|
|
activate_parser.add_argument(
|
|
'--dmcrypt-key-dir',
|
|
metavar='KEYDIR',
|
|
default='/etc/ceph/dmcrypt-keys',
|
|
help='directory where dm-crypt keys are stored',
|
|
)
|
|
activate_parser.add_argument(
|
|
'--reactivate',
|
|
action='store_true', default=False,
|
|
help='activate the deactived OSD',
|
|
)
|
|
activate_parser.set_defaults(
|
|
activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
|
|
func=main_activate,
|
|
)
|
|
return activate_parser
|
|
|
|
|
|
def make_activate_block_parser(subparsers):
|
|
return make_activate_space_parser('block', subparsers)
|
|
|
|
|
|
def make_activate_journal_parser(subparsers):
|
|
return make_activate_space_parser('journal', subparsers)
|
|
|
|
|
|
def make_activate_space_parser(name, subparsers):
|
|
activate_space_parser = subparsers.add_parser(
|
|
'activate-%s' % name,
|
|
help='Activate an OSD via its %s device' % name)
|
|
activate_space_parser.add_argument(
|
|
'dev',
|
|
metavar='DEV',
|
|
help='path to %s block device' % name,
|
|
)
|
|
activate_space_parser.add_argument(
|
|
'--activate-key',
|
|
metavar='PATH',
|
|
help='bootstrap-osd keyring path template (%(default)s)',
|
|
dest='activate_key_template',
|
|
)
|
|
activate_space_parser.add_argument(
|
|
'--mark-init',
|
|
metavar='INITSYSTEM',
|
|
help='init system to manage this dir',
|
|
default='auto',
|
|
choices=INIT_SYSTEMS,
|
|
)
|
|
activate_space_parser.add_argument(
|
|
'--dmcrypt',
|
|
action='store_true', default=None,
|
|
help=('map data and/or auxiliariy (journal, etc.) '
|
|
'devices with dm-crypt'),
|
|
)
|
|
activate_space_parser.add_argument(
|
|
'--dmcrypt-key-dir',
|
|
metavar='KEYDIR',
|
|
default='/etc/ceph/dmcrypt-keys',
|
|
help='directory where dm-crypt keys are stored',
|
|
)
|
|
activate_space_parser.add_argument(
|
|
'--reactivate',
|
|
action='store_true', default=False,
|
|
help='activate the deactived OSD',
|
|
)
|
|
activate_space_parser.set_defaults(
|
|
activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
|
|
func=lambda args: main_activate_space(name, args),
|
|
)
|
|
return activate_space_parser
|
|
|
|
|
|
def make_activate_all_parser(subparsers):
|
|
activate_all_parser = subparsers.add_parser(
|
|
'activate-all',
|
|
help='Activate all tagged OSD partitions')
|
|
activate_all_parser.add_argument(
|
|
'--activate-key',
|
|
metavar='PATH',
|
|
help='bootstrap-osd keyring path template (%(default)s)',
|
|
dest='activate_key_template',
|
|
)
|
|
activate_all_parser.add_argument(
|
|
'--mark-init',
|
|
metavar='INITSYSTEM',
|
|
help='init system to manage this dir',
|
|
default='auto',
|
|
choices=INIT_SYSTEMS,
|
|
)
|
|
activate_all_parser.set_defaults(
|
|
activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
|
|
func=main_activate_all,
|
|
)
|
|
return activate_all_parser
|
|
|
|
|
|
def make_list_parser(subparsers):
|
|
list_parser = subparsers.add_parser(
|
|
'list',
|
|
help='List disks, partitions, and Ceph OSDs')
|
|
list_parser.add_argument(
|
|
'--format',
|
|
help='output format',
|
|
default='plain',
|
|
choices=['json', 'plain'],
|
|
)
|
|
list_parser.add_argument(
|
|
'path',
|
|
metavar='PATH',
|
|
nargs='*',
|
|
help='path to block devices, relative to /sys/block',
|
|
)
|
|
list_parser.set_defaults(
|
|
func=main_list,
|
|
)
|
|
return list_parser
|
|
|
|
|
|
def make_suppress_parser(subparsers):
|
|
suppress_parser = subparsers.add_parser(
|
|
'suppress-activate',
|
|
help='Suppress activate on a device (prefix)')
|
|
suppress_parser.add_argument(
|
|
'path',
|
|
metavar='PATH',
|
|
help='path to block device or directory',
|
|
)
|
|
suppress_parser.set_defaults(
|
|
func=main_suppress,
|
|
)
|
|
|
|
unsuppress_parser = subparsers.add_parser(
|
|
'unsuppress-activate',
|
|
help='Stop suppressing activate on a device (prefix)')
|
|
unsuppress_parser.add_argument(
|
|
'path',
|
|
metavar='PATH',
|
|
help='path to block device or directory',
|
|
)
|
|
unsuppress_parser.set_defaults(
|
|
func=main_unsuppress,
|
|
)
|
|
return suppress_parser
|
|
|
|
|
|
def make_deactivate_parser(subparsers):
|
|
deactivate_parser = subparsers.add_parser(
|
|
'deactivate',
|
|
help='Deactivate a Ceph OSD')
|
|
deactivate_parser.add_argument(
|
|
'--cluster',
|
|
metavar='NAME',
|
|
default='ceph',
|
|
help='cluster name to assign this disk to',
|
|
)
|
|
deactivate_parser.add_argument(
|
|
'path',
|
|
metavar='PATH',
|
|
nargs='?',
|
|
help='path to block device or directory',
|
|
)
|
|
deactivate_parser.add_argument(
|
|
'--deactivate-by-id',
|
|
metavar='<id>',
|
|
help='ID of OSD to deactive'
|
|
)
|
|
deactivate_parser.add_argument(
|
|
'--mark-out',
|
|
action='store_true', default=False,
|
|
help='option to mark the osd out',
|
|
)
|
|
deactivate_parser.set_defaults(
|
|
func=main_deactivate,
|
|
)
|
|
|
|
|
|
def make_destroy_parser(subparsers):
|
|
destroy_parser = subparsers.add_parser(
|
|
'destroy',
|
|
help='Destroy a Ceph OSD')
|
|
destroy_parser.add_argument(
|
|
'--cluster',
|
|
metavar='NAME',
|
|
default='ceph',
|
|
help='cluster name to assign this disk to',
|
|
)
|
|
destroy_parser.add_argument(
|
|
'path',
|
|
metavar='PATH',
|
|
nargs='?',
|
|
help='path to block device or directory',
|
|
)
|
|
destroy_parser.add_argument(
|
|
'--destroy-by-id',
|
|
metavar='<id>',
|
|
help='ID of OSD to destroy'
|
|
)
|
|
destroy_parser.add_argument(
|
|
'--dmcrypt-key-dir',
|
|
metavar='KEYDIR',
|
|
default='/etc/ceph/dmcrypt-keys',
|
|
help=('directory where dm-crypt keys are stored '
|
|
'(If you don\'t know how it work, '
|
|
'dont use it. we have default value)'),
|
|
)
|
|
destroy_parser.add_argument(
|
|
'--zap',
|
|
action='store_true', default=False,
|
|
help='option to erase data and partition',
|
|
)
|
|
destroy_parser.set_defaults(
|
|
func=main_destroy,
|
|
)
|
|
|
|
|
|
def make_zap_parser(subparsers):
|
|
zap_parser = subparsers.add_parser(
|
|
'zap',
|
|
help='Zap/erase/destroy a device\'s partition table (and contents)')
|
|
zap_parser.add_argument(
|
|
'dev',
|
|
metavar='DEV',
|
|
nargs='+',
|
|
help='path to block device',
|
|
)
|
|
zap_parser.set_defaults(
|
|
func=main_zap,
|
|
)
|
|
return zap_parser
|
|
|
|
|
|
def main(argv):
|
|
args = parse_args(argv)
|
|
|
|
setup_logging(args.verbose, args.log_stdout)
|
|
|
|
if args.prepend_to_path != '':
|
|
path = os.environ.get('PATH', os.defpath)
|
|
os.environ['PATH'] = args.prepend_to_path + ":" + path
|
|
|
|
setup_statedir(args.statedir)
|
|
setup_sysconfdir(args.sysconfdir)
|
|
|
|
global CEPH_PREF_USER
|
|
CEPH_PREF_USER = args.setuser
|
|
global CEPH_PREF_GROUP
|
|
CEPH_PREF_GROUP = args.setgroup
|
|
|
|
if args.verbose:
|
|
args.func(args)
|
|
else:
|
|
main_catch(args.func, args)
|
|
|
|
|
|
def setup_logging(verbose, log_stdout):
|
|
loglevel = logging.WARNING
|
|
if verbose:
|
|
loglevel = logging.DEBUG
|
|
|
|
if log_stdout:
|
|
ch = logging.StreamHandler(stream=sys.stdout)
|
|
ch.setLevel(loglevel)
|
|
formatter = logging.Formatter('%(filename)s: %(message)s')
|
|
ch.setFormatter(formatter)
|
|
LOG.addHandler(ch)
|
|
LOG.setLevel(loglevel)
|
|
else:
|
|
logging.basicConfig(
|
|
level=loglevel,
|
|
)
|
|
|
|
|
|
def main_catch(func, args):
|
|
|
|
try:
|
|
func(args)
|
|
|
|
except Error as e:
|
|
raise SystemExit(
|
|
'{prog}: {msg}'.format(
|
|
prog=args.prog,
|
|
msg=e,
|
|
)
|
|
)
|
|
|
|
except CephDiskException as error:
|
|
exc_name = error.__class__.__name__
|
|
raise SystemExit(
|
|
'{prog} {exc_name}: {msg}'.format(
|
|
prog=args.prog,
|
|
exc_name=exc_name,
|
|
msg=error,
|
|
)
|
|
)
|
|
|
|
|
|
def run():
|
|
main(sys.argv[1:])
|
|
|
|
if __name__ == '__main__':
|
|
main(sys.argv[1:])
|
|
warned_about = {}
|