From 89cad1f33b30bf9237734ee3595eb000facfc7d8 Mon Sep 17 00:00:00 2001 From: Guillaume Abrioux Date: Tue, 9 Aug 2022 08:27:30 +0200 Subject: [PATCH] ceph-volume: system.get_mounts() refactor When a network mount is present in `/proc/mounts` but for any reason the corresponding server is down, this function hangs forever. In a cluster deployed with cephadm, the consequence is that it triggers `ceph-volume inventory` commands that hang and stay in D state. The idea here is to use a thread with a timeout to abort the call if the timeout is reached. `get_mounts()` is now a method of a class so we can exclude a path altogether during the whole `inventory` execution (otherwise, ceph-volume would try to access it as many devices there is on the host which could slow down the inventory execution) Fixes: https://tracker.ceph.com/issues/57070 Signed-off-by: Guillaume Abrioux --- .../ceph_volume/devices/simple/scan.py | 8 +- .../ceph_volume/tests/util/test_system.py | 27 ++-- .../ceph_volume/util/encryption.py | 2 +- src/ceph-volume/ceph_volume/util/system.py | 145 +++++++++++------- 4 files changed, 111 insertions(+), 71 deletions(-) diff --git a/src/ceph-volume/ceph_volume/devices/simple/scan.py b/src/ceph-volume/ceph_volume/devices/simple/scan.py index 70e5256d273..ff7040beb06 100644 --- a/src/ceph-volume/ceph_volume/devices/simple/scan.py +++ b/src/ceph-volume/ceph_volume/devices/simple/scan.py @@ -137,8 +137,8 @@ class Scan(object): osd_metadata[file_json_key] = content # we must scan the paths again because this might be a temporary mount - path_mounts = system.get_mounts(paths=True) - device = path_mounts.get(path) + path_mounts = system.Mounts(paths=True) + device = path_mounts.get_mounts().get(path) # it is possible to have more than one device, pick the first one, and # warn that it is possible that more than one device is 'data' @@ -360,8 +360,8 @@ class Scan(object): )) # Capture some environment status, so that it can be reused all over - self.device_mounts = system.get_mounts(devices=True) - self.path_mounts = system.get_mounts(paths=True) + self.device_mounts = system.Mounts(devices=True).get_mounts() + self.path_mounts = system.Mounts(paths=True).get_mounts() for path in paths: args.osd_path = path diff --git a/src/ceph-volume/ceph_volume/tests/util/test_system.py b/src/ceph-volume/ceph_volume/tests/util/test_system.py index cfaac5be747..5746f7023ce 100644 --- a/src/ceph-volume/ceph_volume/tests/util/test_system.py +++ b/src/ceph-volume/ceph_volume/tests/util/test_system.py @@ -145,27 +145,28 @@ class TestGetMounts(object): with open(proc_path, 'w') as f: f.write('') monkeypatch.setattr(system, 'PROCDIR', PROCDIR) - assert system.get_mounts() == {} + m = system.Mounts() + assert m.get_mounts() == {} def test_is_mounted_(self, fake_proc): - result = system.get_mounts() - assert result['/dev/sdc2'] == ['/boot'] + m = system.Mounts() + assert m.get_mounts()['/dev/sdc2'] == ['/boot'] def test_ignores_two_fields(self, fake_proc): - result = system.get_mounts() - assert result.get('/dev/sde4') is None + m = system.Mounts() + assert m.get_mounts().get('/dev/sde4') is None def test_tmpfs_is_reported(self, fake_proc): - result = system.get_mounts() - assert result['tmpfs'][0] == '/dev/shm' + m = system.Mounts() + assert m.get_mounts()['tmpfs'][0] == '/dev/shm' def test_non_skip_devs_arent_reported(self, fake_proc): - result = system.get_mounts() - assert result.get('cgroup') is None + m = system.Mounts() + assert m.get_mounts().get('cgroup') is None def test_multiple_mounts_are_appended(self, fake_proc): - result = system.get_mounts() - assert len(result['tmpfs']) == 7 + m = system.Mounts() + assert len(m.get_mounts()['tmpfs']) == 7 def test_nonexistent_devices_are_skipped(self, tmpdir, monkeypatch): PROCDIR = str(tmpdir) @@ -176,8 +177,8 @@ class TestGetMounts(object): /dev/sda2 /far/lib/ceph/osd/ceph-1 xfs rw,attr2,inode64,noquota 0 0""")) monkeypatch.setattr(system, 'PROCDIR', PROCDIR) monkeypatch.setattr(os.path, 'exists', lambda x: False if x == '/dev/sda1' else True) - result = system.get_mounts() - assert result.get('/dev/sda1') is None + m = system.Mounts() + assert m.get_mounts().get('/dev/sda1') is None class TestIsBinary(object): diff --git a/src/ceph-volume/ceph_volume/util/encryption.py b/src/ceph-volume/ceph_volume/util/encryption.py index b23c0f79b3d..cefd6094bd0 100644 --- a/src/ceph-volume/ceph_volume/util/encryption.py +++ b/src/ceph-volume/ceph_volume/util/encryption.py @@ -235,7 +235,7 @@ def legacy_encrypted(device): This function assumes that ``device`` will be a partition. """ if os.path.isdir(device): - mounts = system.get_mounts(paths=True) + mounts = system.Mounts(paths=True).get_mounts() # yes, rebind the device variable here because a directory isn't going # to help with parsing device = mounts.get(device, [None])[0] diff --git a/src/ceph-volume/ceph_volume/util/system.py b/src/ceph-volume/ceph_volume/util/system.py index ed1fb8ed2a2..590a0599b56 100644 --- a/src/ceph-volume/ceph_volume/util/system.py +++ b/src/ceph-volume/ceph_volume/util/system.py @@ -6,6 +6,7 @@ import platform import tempfile import uuid import subprocess +import threading from ceph_volume import process, terminal from . import as_string @@ -236,7 +237,8 @@ def path_is_mounted(path, destination=None): """ Check if the given path is mounted """ - mounts = get_mounts(paths=True) + m = Mounts(paths=True) + mounts = m.get_mounts() realpath = os.path.realpath(path) mounted_locations = mounts.get(realpath, []) @@ -250,16 +252,17 @@ def device_is_mounted(dev, destination=None): Check if the given device is mounted, optionally validating that a destination exists """ - plain_mounts = get_mounts(devices=True) - realpath_mounts = get_mounts(devices=True, realpath=True) + plain_mounts = Mounts(devices=True) + realpath_mounts = Mounts(devices=True, realpath=True) + realpath_dev = os.path.realpath(dev) if dev.startswith('/') else dev destination = os.path.realpath(destination) if destination else None # plain mounts - plain_dev_mounts = plain_mounts.get(dev, []) - realpath_dev_mounts = plain_mounts.get(realpath_dev, []) + plain_dev_mounts = plain_mounts.get_mounts().get(dev, []) + realpath_dev_mounts = plain_mounts.get_mounts().get(realpath_dev, []) # realpath mounts - plain_dev_real_mounts = realpath_mounts.get(dev, []) - realpath_dev_real_mounts = realpath_mounts.get(realpath_dev, []) + plain_dev_real_mounts = realpath_mounts.get_mounts().get(dev, []) + realpath_dev_real_mounts = realpath_mounts.get_mounts().get(realpath_dev, []) mount_locations = [ plain_dev_mounts, @@ -282,61 +285,97 @@ def device_is_mounted(dev, destination=None): logger.info('%s was not found as mounted', dev) return False +class Mounts(object): + excluded_paths = [] -def get_mounts(devices=False, paths=False, realpath=False): - """ - Create a mapping of all available system mounts so that other helpers can - detect nicely what path or device is mounted + def __init__(self, devices=False, paths=False, realpath=False): + self.devices = devices + self.paths = paths + self.realpath = realpath - It ignores (most of) non existing devices, but since some setups might need - some extra device information, it will make an exception for: + def safe_realpath(self, path, timeout=0.2): + def _realpath(path, result): + p = os.path.realpath(path) + result.append(p) - - tmpfs - - devtmpfs - - /dev/root + result = [] + t = threading.Thread(target=_realpath, args=(path, result)) + t.setDaemon(True) + t.start() + t.join(timeout) + if t.is_alive(): + return None + return result[0] - If ``devices`` is set to ``True`` the mapping will be a device-to-path(s), - if ``paths`` is set to ``True`` then the mapping will be - a path-to-device(s) + def get_mounts(self): + """ + Create a mapping of all available system mounts so that other helpers can + detect nicely what path or device is mounted - :param realpath: Resolve devices to use their realpaths. This is useful for - paths like LVM where more than one path can point to the same device - """ - devices_mounted = {} - paths_mounted = {} - do_not_skip = ['tmpfs', 'devtmpfs', '/dev/root'] - default_to_devices = devices is False and paths is False + It ignores (most of) non existing devices, but since some setups might need + some extra device information, it will make an exception for: - with open(PROCDIR + '/mounts', 'rb') as mounts: - proc_mounts = mounts.readlines() + - tmpfs + - devtmpfs + - /dev/root - for line in proc_mounts: - fields = [as_string(f) for f in line.split()] - if len(fields) < 3: - continue - if realpath: - device = os.path.realpath(fields[0]) if fields[0].startswith('/') else fields[0] - else: - device = fields[0] - path = os.path.realpath(fields[1]) - # only care about actual existing devices - if not os.path.exists(device) or not device.startswith('/'): - if device not in do_not_skip: + If ``devices`` is set to ``True`` the mapping will be a device-to-path(s), + if ``paths`` is set to ``True`` then the mapping will be + a path-to-device(s) + + :param realpath: Resolve devices to use their realpaths. This is useful for + paths like LVM where more than one path can point to the same device + """ + devices_mounted = {} + paths_mounted = {} + do_not_skip = ['tmpfs', 'devtmpfs', '/dev/root'] + default_to_devices = self.devices is False and self.paths is False + + + with open(PROCDIR + '/mounts', 'rb') as mounts: + proc_mounts = mounts.readlines() + + for line in proc_mounts: + fields = [as_string(f) for f in line.split()] + if len(fields) < 3: continue - if device in devices_mounted.keys(): - devices_mounted[device].append(path) - else: - devices_mounted[device] = [path] - if path in paths_mounted.keys(): - paths_mounted[path].append(device) - else: - paths_mounted[path] = [device] + if fields[0] in Mounts.excluded_paths or \ + fields[1] in Mounts.excluded_paths: + continue + if self.realpath: + if fields[0].startswith('/'): + device = self.safe_realpath(fields[0]) + if device is None: + logger.warning(f"Can't get realpath on {fields[0]}, skipping.") + Mounts.excluded_paths.append(fields[0]) + continue + else: + device = fields[0] + else: + device = fields[0] + path = self.safe_realpath(fields[1]) + if path is None: + logger.warning(f"Can't get realpath on {fields[1]}, skipping.") + Mounts.excluded_paths.append(fields[1]) + continue + # only care about actual existing devices + if not os.path.exists(device) or not device.startswith('/'): + if device not in do_not_skip: + continue + if device in devices_mounted.keys(): + devices_mounted[device].append(path) + else: + devices_mounted[device] = [path] + if path in paths_mounted.keys(): + paths_mounted[path].append(device) + else: + paths_mounted[path] = [device] - # Default to returning information for devices if - if devices is True or default_to_devices: - return devices_mounted - else: - return paths_mounted + # Default to returning information for devices if + if self.devices is True or default_to_devices: + return devices_mounted + else: + return paths_mounted def set_context(path, recursive=False):