ceph-volume: system.get_mounts() refactor

When a network mount is present in `/proc/mounts` but for any reason
the corresponding server is down, this function hangs forever.
In a cluster deployed with cephadm, the consequence is that
it triggers `ceph-volume inventory` commands that hang and stay in D
state.

The idea here is to use a thread with a timeout to abort the call if the
timeout is reached.
`get_mounts()` is now a method of a class so we can exclude a path
altogether during the whole `inventory` execution (otherwise,
ceph-volume would try to access it as many devices there is on the
host which could slow down the inventory execution)

Fixes: https://tracker.ceph.com/issues/57070

Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
This commit is contained in:
Guillaume Abrioux 2022-08-09 08:27:30 +02:00
parent 9a498550d1
commit 89cad1f33b
4 changed files with 111 additions and 71 deletions

View File

@ -137,8 +137,8 @@ class Scan(object):
osd_metadata[file_json_key] = content osd_metadata[file_json_key] = content
# we must scan the paths again because this might be a temporary mount # we must scan the paths again because this might be a temporary mount
path_mounts = system.get_mounts(paths=True) path_mounts = system.Mounts(paths=True)
device = path_mounts.get(path) device = path_mounts.get_mounts().get(path)
# it is possible to have more than one device, pick the first one, and # it is possible to have more than one device, pick the first one, and
# warn that it is possible that more than one device is 'data' # warn that it is possible that more than one device is 'data'
@ -360,8 +360,8 @@ class Scan(object):
)) ))
# Capture some environment status, so that it can be reused all over # Capture some environment status, so that it can be reused all over
self.device_mounts = system.get_mounts(devices=True) self.device_mounts = system.Mounts(devices=True).get_mounts()
self.path_mounts = system.get_mounts(paths=True) self.path_mounts = system.Mounts(paths=True).get_mounts()
for path in paths: for path in paths:
args.osd_path = path args.osd_path = path

View File

@ -145,27 +145,28 @@ class TestGetMounts(object):
with open(proc_path, 'w') as f: with open(proc_path, 'w') as f:
f.write('') f.write('')
monkeypatch.setattr(system, 'PROCDIR', PROCDIR) monkeypatch.setattr(system, 'PROCDIR', PROCDIR)
assert system.get_mounts() == {} m = system.Mounts()
assert m.get_mounts() == {}
def test_is_mounted_(self, fake_proc): def test_is_mounted_(self, fake_proc):
result = system.get_mounts() m = system.Mounts()
assert result['/dev/sdc2'] == ['/boot'] assert m.get_mounts()['/dev/sdc2'] == ['/boot']
def test_ignores_two_fields(self, fake_proc): def test_ignores_two_fields(self, fake_proc):
result = system.get_mounts() m = system.Mounts()
assert result.get('/dev/sde4') is None assert m.get_mounts().get('/dev/sde4') is None
def test_tmpfs_is_reported(self, fake_proc): def test_tmpfs_is_reported(self, fake_proc):
result = system.get_mounts() m = system.Mounts()
assert result['tmpfs'][0] == '/dev/shm' assert m.get_mounts()['tmpfs'][0] == '/dev/shm'
def test_non_skip_devs_arent_reported(self, fake_proc): def test_non_skip_devs_arent_reported(self, fake_proc):
result = system.get_mounts() m = system.Mounts()
assert result.get('cgroup') is None assert m.get_mounts().get('cgroup') is None
def test_multiple_mounts_are_appended(self, fake_proc): def test_multiple_mounts_are_appended(self, fake_proc):
result = system.get_mounts() m = system.Mounts()
assert len(result['tmpfs']) == 7 assert len(m.get_mounts()['tmpfs']) == 7
def test_nonexistent_devices_are_skipped(self, tmpdir, monkeypatch): def test_nonexistent_devices_are_skipped(self, tmpdir, monkeypatch):
PROCDIR = str(tmpdir) PROCDIR = str(tmpdir)
@ -176,8 +177,8 @@ class TestGetMounts(object):
/dev/sda2 /far/lib/ceph/osd/ceph-1 xfs rw,attr2,inode64,noquota 0 0""")) /dev/sda2 /far/lib/ceph/osd/ceph-1 xfs rw,attr2,inode64,noquota 0 0"""))
monkeypatch.setattr(system, 'PROCDIR', PROCDIR) monkeypatch.setattr(system, 'PROCDIR', PROCDIR)
monkeypatch.setattr(os.path, 'exists', lambda x: False if x == '/dev/sda1' else True) monkeypatch.setattr(os.path, 'exists', lambda x: False if x == '/dev/sda1' else True)
result = system.get_mounts() m = system.Mounts()
assert result.get('/dev/sda1') is None assert m.get_mounts().get('/dev/sda1') is None
class TestIsBinary(object): class TestIsBinary(object):

View File

@ -235,7 +235,7 @@ def legacy_encrypted(device):
This function assumes that ``device`` will be a partition. This function assumes that ``device`` will be a partition.
""" """
if os.path.isdir(device): if os.path.isdir(device):
mounts = system.get_mounts(paths=True) mounts = system.Mounts(paths=True).get_mounts()
# yes, rebind the device variable here because a directory isn't going # yes, rebind the device variable here because a directory isn't going
# to help with parsing # to help with parsing
device = mounts.get(device, [None])[0] device = mounts.get(device, [None])[0]

View File

@ -6,6 +6,7 @@ import platform
import tempfile import tempfile
import uuid import uuid
import subprocess import subprocess
import threading
from ceph_volume import process, terminal from ceph_volume import process, terminal
from . import as_string from . import as_string
@ -236,7 +237,8 @@ def path_is_mounted(path, destination=None):
""" """
Check if the given path is mounted Check if the given path is mounted
""" """
mounts = get_mounts(paths=True) m = Mounts(paths=True)
mounts = m.get_mounts()
realpath = os.path.realpath(path) realpath = os.path.realpath(path)
mounted_locations = mounts.get(realpath, []) mounted_locations = mounts.get(realpath, [])
@ -250,16 +252,17 @@ def device_is_mounted(dev, destination=None):
Check if the given device is mounted, optionally validating that a Check if the given device is mounted, optionally validating that a
destination exists destination exists
""" """
plain_mounts = get_mounts(devices=True) plain_mounts = Mounts(devices=True)
realpath_mounts = get_mounts(devices=True, realpath=True) realpath_mounts = Mounts(devices=True, realpath=True)
realpath_dev = os.path.realpath(dev) if dev.startswith('/') else dev realpath_dev = os.path.realpath(dev) if dev.startswith('/') else dev
destination = os.path.realpath(destination) if destination else None destination = os.path.realpath(destination) if destination else None
# plain mounts # plain mounts
plain_dev_mounts = plain_mounts.get(dev, []) plain_dev_mounts = plain_mounts.get_mounts().get(dev, [])
realpath_dev_mounts = plain_mounts.get(realpath_dev, []) realpath_dev_mounts = plain_mounts.get_mounts().get(realpath_dev, [])
# realpath mounts # realpath mounts
plain_dev_real_mounts = realpath_mounts.get(dev, []) plain_dev_real_mounts = realpath_mounts.get_mounts().get(dev, [])
realpath_dev_real_mounts = realpath_mounts.get(realpath_dev, []) realpath_dev_real_mounts = realpath_mounts.get_mounts().get(realpath_dev, [])
mount_locations = [ mount_locations = [
plain_dev_mounts, plain_dev_mounts,
@ -282,61 +285,97 @@ def device_is_mounted(dev, destination=None):
logger.info('%s was not found as mounted', dev) logger.info('%s was not found as mounted', dev)
return False return False
class Mounts(object):
excluded_paths = []
def get_mounts(devices=False, paths=False, realpath=False): def __init__(self, devices=False, paths=False, realpath=False):
""" self.devices = devices
Create a mapping of all available system mounts so that other helpers can self.paths = paths
detect nicely what path or device is mounted self.realpath = realpath
It ignores (most of) non existing devices, but since some setups might need def safe_realpath(self, path, timeout=0.2):
some extra device information, it will make an exception for: def _realpath(path, result):
p = os.path.realpath(path)
result.append(p)
- tmpfs result = []
- devtmpfs t = threading.Thread(target=_realpath, args=(path, result))
- /dev/root t.setDaemon(True)
t.start()
t.join(timeout)
if t.is_alive():
return None
return result[0]
If ``devices`` is set to ``True`` the mapping will be a device-to-path(s), def get_mounts(self):
if ``paths`` is set to ``True`` then the mapping will be """
a path-to-device(s) Create a mapping of all available system mounts so that other helpers can
detect nicely what path or device is mounted
:param realpath: Resolve devices to use their realpaths. This is useful for It ignores (most of) non existing devices, but since some setups might need
paths like LVM where more than one path can point to the same device some extra device information, it will make an exception for:
"""
devices_mounted = {}
paths_mounted = {}
do_not_skip = ['tmpfs', 'devtmpfs', '/dev/root']
default_to_devices = devices is False and paths is False
with open(PROCDIR + '/mounts', 'rb') as mounts: - tmpfs
proc_mounts = mounts.readlines() - devtmpfs
- /dev/root
for line in proc_mounts: If ``devices`` is set to ``True`` the mapping will be a device-to-path(s),
fields = [as_string(f) for f in line.split()] if ``paths`` is set to ``True`` then the mapping will be
if len(fields) < 3: a path-to-device(s)
continue
if realpath: :param realpath: Resolve devices to use their realpaths. This is useful for
device = os.path.realpath(fields[0]) if fields[0].startswith('/') else fields[0] paths like LVM where more than one path can point to the same device
else: """
device = fields[0] devices_mounted = {}
path = os.path.realpath(fields[1]) paths_mounted = {}
# only care about actual existing devices do_not_skip = ['tmpfs', 'devtmpfs', '/dev/root']
if not os.path.exists(device) or not device.startswith('/'): default_to_devices = self.devices is False and self.paths is False
if device not in do_not_skip:
with open(PROCDIR + '/mounts', 'rb') as mounts:
proc_mounts = mounts.readlines()
for line in proc_mounts:
fields = [as_string(f) for f in line.split()]
if len(fields) < 3:
continue continue
if device in devices_mounted.keys(): if fields[0] in Mounts.excluded_paths or \
devices_mounted[device].append(path) fields[1] in Mounts.excluded_paths:
else: continue
devices_mounted[device] = [path] if self.realpath:
if path in paths_mounted.keys(): if fields[0].startswith('/'):
paths_mounted[path].append(device) device = self.safe_realpath(fields[0])
else: if device is None:
paths_mounted[path] = [device] logger.warning(f"Can't get realpath on {fields[0]}, skipping.")
Mounts.excluded_paths.append(fields[0])
continue
else:
device = fields[0]
else:
device = fields[0]
path = self.safe_realpath(fields[1])
if path is None:
logger.warning(f"Can't get realpath on {fields[1]}, skipping.")
Mounts.excluded_paths.append(fields[1])
continue
# only care about actual existing devices
if not os.path.exists(device) or not device.startswith('/'):
if device not in do_not_skip:
continue
if device in devices_mounted.keys():
devices_mounted[device].append(path)
else:
devices_mounted[device] = [path]
if path in paths_mounted.keys():
paths_mounted[path].append(device)
else:
paths_mounted[path] = [device]
# Default to returning information for devices if # Default to returning information for devices if
if devices is True or default_to_devices: if self.devices is True or default_to_devices:
return devices_mounted return devices_mounted
else: else:
return paths_mounted return paths_mounted
def set_context(path, recursive=False): def set_context(path, recursive=False):