From a6658c91bb96bed3e4033a6f15c500f5e0b6c4eb Mon Sep 17 00:00:00 2001 From: Martin Ohmacht Date: Wed, 28 Sep 2022 14:09:04 -0400 Subject: [PATCH] blk/kernel: add plugin system for devices with compression and move VDO support into plugin The current VDO support implementation is buried inside the common/blkdev.cc with a simple interface used by KernelDevice. It is not easily extendable and can not be easily used for other devices providing similar capabilities. This patch adds a plugin system that is based in its structure on the erasure code plugin system and moves the VDO support code into a VDO plugin. Signed-off-by: Martin Ohmacht --- ceph.spec.in | 3 + debian/ceph-base.install | 1 + debian/control | 1 + src/CMakeLists.txt | 5 +- src/blk/BlockDevice.h | 5 +- src/blk/CMakeLists.txt | 2 +- src/blk/kernel/KernelDevice.cc | 43 ++-- src/blk/kernel/KernelDevice.h | 7 +- src/ceph_osd.cc | 9 + src/common/blkdev.cc | 134 ------------ src/common/blkdev.h | 6 - src/common/options/osd.yaml.in | 17 ++ src/common/win32/blkdev.cc | 15 -- src/extblkdev/CMakeLists.txt | 14 ++ src/extblkdev/ExtBlkDevInterface.h | 141 +++++++++++++ src/extblkdev/ExtBlkDevPlugin.cc | 268 ++++++++++++++++++++++++ src/extblkdev/ExtBlkDevPlugin.h | 38 ++++ src/extblkdev/vdo/CMakeLists.txt | 9 + src/extblkdev/vdo/ExtBlkDevPluginVdo.cc | 59 ++++++ src/extblkdev/vdo/ExtBlkDevPluginVdo.h | 34 +++ src/extblkdev/vdo/ExtBlkDevVdo.cc | 156 ++++++++++++++ src/extblkdev/vdo/ExtBlkDevVdo.h | 52 +++++ src/global/global_init.cc | 8 + src/os/bluestore/BlueStore.cc | 11 +- src/os/filestore/FileStore.cc | 47 ++--- src/os/filestore/FileStore.h | 4 +- 26 files changed, 868 insertions(+), 221 deletions(-) create mode 100644 src/extblkdev/CMakeLists.txt create mode 100644 src/extblkdev/ExtBlkDevInterface.h create mode 100644 src/extblkdev/ExtBlkDevPlugin.cc create mode 100644 src/extblkdev/ExtBlkDevPlugin.h create mode 100644 src/extblkdev/vdo/CMakeLists.txt create mode 100644 src/extblkdev/vdo/ExtBlkDevPluginVdo.cc create mode 100644 src/extblkdev/vdo/ExtBlkDevPluginVdo.h create mode 100644 src/extblkdev/vdo/ExtBlkDevVdo.cc create mode 100644 src/extblkdev/vdo/ExtBlkDevVdo.h diff --git a/ceph.spec.in b/ceph.spec.in index d3c49e20a1b..8b208d50829 100644 --- a/ceph.spec.in +++ b/ceph.spec.in @@ -233,6 +233,7 @@ BuildRequires: libaio-devel BuildRequires: libblkid-devel >= 2.17 BuildRequires: cryptsetup-devel BuildRequires: libcurl-devel +BuildRequires: libcap-devel BuildRequires: libcap-ng-devel BuildRequires: fmt-devel >= 6.2.1 BuildRequires: pkgconfig(libudev) @@ -1563,6 +1564,8 @@ rm -rf %{_vpath_builddir} %dir %{_libdir}/ceph %dir %{_libdir}/ceph/erasure-code %{_libdir}/ceph/erasure-code/libec_*.so* +%dir %{_libdir}/ceph/extblkdev +%{_libdir}/ceph/extblkdev/libceph_*.so* %dir %{_libdir}/ceph/compressor %{_libdir}/ceph/compressor/libceph_*.so* %{_unitdir}/ceph-crash.service diff --git a/debian/ceph-base.install b/debian/ceph-base.install index 80aadd909e8..a3d7757a585 100644 --- a/debian/ceph-base.install +++ b/debian/ceph-base.install @@ -9,6 +9,7 @@ usr/bin/osdmaptool usr/bin/ceph-kvstore-tool usr/libexec/ceph/ceph_common.sh usr/lib/ceph/erasure-code/* +usr/lib/ceph/extblkdev/* usr/lib/rados-classes/* usr/sbin/ceph-create-keys usr/share/doc/ceph/sample.ceph.conf diff --git a/debian/control b/debian/control index e09e1f19791..89b3b3741ea 100644 --- a/debian/control +++ b/debian/control @@ -36,6 +36,7 @@ Build-Depends: automake, libcrypto++-dev , libcryptsetup-dev, libcap-ng-dev, + libcap-dev, libcunit1-dev, libcurl4-openssl-dev, libevent-dev, diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 549b18bcfdf..3ae76d59f69 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -433,6 +433,7 @@ add_subdirectory(common) add_subdirectory(crush) add_subdirectory(msg) add_subdirectory(arch) +add_subdirectory(extblkdev) set(ceph_common_objs $ @@ -446,7 +447,7 @@ set(ceph_common_objs $ $) set(ceph_common_deps - json_spirit erasure_code arch crc32 + json_spirit erasure_code extblkdev arch crc32 ${LIB_RESOLV} Boost::thread Boost::system @@ -678,7 +679,7 @@ set(ceph_osd_srcs ceph_osd.cc) add_executable(ceph-osd ${ceph_osd_srcs}) -add_dependencies(ceph-osd erasure_code_plugins) +add_dependencies(ceph-osd erasure_code_plugins extblkdev_plugins) target_link_libraries(ceph-osd osd os global-static common ${ALLOC_LIBS} ${BLKID_LIBRARIES}) diff --git a/src/blk/BlockDevice.h b/src/blk/BlockDevice.h index 67b4b330b6c..96d14ce1cc8 100644 --- a/src/blk/BlockDevice.h +++ b/src/blk/BlockDevice.h @@ -29,6 +29,7 @@ #include "acconfig.h" #include "common/ceph_mutex.h" #include "include/common_fwd.h" +#include "extblkdev/ExtBlkDevInterface.h" #if defined(HAVE_LIBAIO) || defined(HAVE_POSIXAIO) #include "aio/aio.h" @@ -237,8 +238,8 @@ public: uint64_t get_optimal_io_size() const { return optimal_io_size; } /// hook to provide utilization of thinly-provisioned device - virtual bool get_thin_utilization(uint64_t *total, uint64_t *avail) const { - return false; + virtual int get_ebd_state(ExtBlkDevState &state) const { + return -ENOENT; } virtual int collect_metadata(const std::string& prefix, std::map *pm) const = 0; diff --git a/src/blk/CMakeLists.txt b/src/blk/CMakeLists.txt index 629dfe2c319..288955dd054 100644 --- a/src/blk/CMakeLists.txt +++ b/src/blk/CMakeLists.txt @@ -31,7 +31,7 @@ if(libblk_srcs) endif() if(HAVE_LIBAIO) - target_link_libraries(blk PUBLIC ${AIO_LIBRARIES}) + target_link_libraries(blk PUBLIC ${AIO_LIBRARIES} extblkdev) endif(HAVE_LIBAIO) if(WITH_SPDK) diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc index 687aff06c6d..d9c1e529c07 100644 --- a/src/blk/kernel/KernelDevice.cc +++ b/src/blk/kernel/KernelDevice.cc @@ -242,7 +242,12 @@ int KernelDevice::open(const string& p) support_discard = blkdev_buffered.support_discard(); optimal_io_size = blkdev_buffered.get_optimal_io_size(); this->devname = devname; - _detect_vdo(); + // check if any extended block device plugin recognizes this device + // detect_vdo has moved into the VDO plugin + int rc = extblkdev::detect_device(cct, devname, ebd_impl); + if (rc != 0) { + dout(20) << __func__ << " no plugin volume maps to " << devname << dendl; + } } } @@ -305,10 +310,7 @@ void KernelDevice::close() _discard_stop(); _pre_close(); - if (vdo_fd >= 0) { - VOID_TEMP_FAILURE_RETRY(::close(vdo_fd)); - vdo_fd = -1; - } + extblkdev::release_device(ebd_impl); for (int i = 0; i < WRITE_LIFE_MAX; i++) { assert(fd_directs[i] >= 0); @@ -335,11 +337,10 @@ int KernelDevice::collect_metadata(const string& prefix, map *pm) } else { (*pm)[prefix + "type"] = "ssd"; } - if (vdo_fd >= 0) { - (*pm)[prefix + "vdo"] = "true"; - uint64_t total, avail; - get_vdo_utilization(vdo_fd, &total, &avail); - (*pm)[prefix + "vdo_physical_size"] = stringify(total); + // if compression device detected, collect meta data for device + // VDO specific meta data has moved into VDO plugin + if (ebd_impl) { + ebd_impl->collect_metadata(prefix, pm); } { @@ -407,24 +408,14 @@ int KernelDevice::collect_metadata(const string& prefix, map *pm) return 0; } -void KernelDevice::_detect_vdo() +int KernelDevice::get_ebd_state(ExtBlkDevState &state) const { - vdo_fd = get_vdo_stats_handle(devname.c_str(), &vdo_name); - if (vdo_fd >= 0) { - dout(1) << __func__ << " VDO volume " << vdo_name - << " maps to " << devname << dendl; - } else { - dout(20) << __func__ << " no VDO volume maps to " << devname << dendl; + // use compression driver plugin to determine physical size and availability + // VDO specific get_thin_utilization has moved into VDO plugin + if (ebd_impl) { + return ebd_impl->get_state(state); } - return; -} - -bool KernelDevice::get_thin_utilization(uint64_t *total, uint64_t *avail) const -{ - if (vdo_fd < 0) { - return false; - } - return get_vdo_utilization(vdo_fd, total, avail); + return -ENOENT; } int KernelDevice::choose_fd(bool buffered, int write_hint) const diff --git a/src/blk/kernel/KernelDevice.h b/src/blk/kernel/KernelDevice.h index 613a9bbfed9..2aed3bbe6a0 100644 --- a/src/blk/kernel/KernelDevice.h +++ b/src/blk/kernel/KernelDevice.h @@ -24,6 +24,7 @@ #include "aio/aio.h" #include "BlockDevice.h" +#include "extblkdev/ExtBlkDevPlugin.h" #define RW_IO_MAX (INT_MAX & CEPH_PAGE_MASK) @@ -35,8 +36,7 @@ private: bool enable_wrt = true; bool aio, dio; - int vdo_fd = -1; ///< fd for vdo sysfs directory - std::string vdo_name; + ExtBlkDevInterfaceRef ebd_impl; // structure for retrieving compression state from extended block device std::string devname; ///< kernel dev name (/sys/block/$devname), if any @@ -109,7 +109,6 @@ private: void debug_aio_link(aio_t& aio); void debug_aio_unlink(aio_t& aio); - void _detect_vdo(); int choose_fd(bool buffered, int write_hint) const; ceph::unique_leakable_ptr create_custom_aligned(size_t len, IOContext* ioc) const; @@ -130,7 +129,7 @@ public: } int get_devices(std::set *ls) const override; - bool get_thin_utilization(uint64_t *total, uint64_t *avail) const override; + int get_ebd_state(ExtBlkDevState &state) const override; int read(uint64_t off, uint64_t len, ceph::buffer::list *pbl, IOContext *ioc, diff --git a/src/ceph_osd.cc b/src/ceph_osd.cc index b10c799b4a3..a81322d00c7 100644 --- a/src/ceph_osd.cc +++ b/src/ceph_osd.cc @@ -26,6 +26,7 @@ #include "mon/MonClient.h" #include "include/ceph_features.h" #include "common/config.h" +#include "extblkdev/ExtBlkDevPlugin.h" #include "mon/MonMap.h" @@ -472,6 +473,14 @@ flushjournal_out: forker.exit(0); } + { + int r = extblkdev::preload(g_ceph_context); + if (r < 0) { + derr << "Failed preloading extblkdev plugins, error code: " << r << dendl; + forker.exit(1); + } + } + string magic; uuid_d cluster_fsid, osd_fsid; ceph_release_t require_osd_release = ceph_release_t::unknown; diff --git a/src/common/blkdev.cc b/src/common/blkdev.cc index b3047fc3037..6fc2965a19a 100644 --- a/src/common/blkdev.cc +++ b/src/common/blkdev.cc @@ -338,95 +338,6 @@ void get_raw_devices(const std::string& in, } } -int _get_vdo_stats_handle(const char *devname, std::string *vdo_name) -{ - int vdo_fd = -1; - - // we need to go from the raw devname (e.g., dm-4) to the VDO volume name. - // currently the best way seems to be to look at /dev/mapper/* ... - std::string expect = std::string("../") + devname; // expected symlink target - DIR *dir = ::opendir("/dev/mapper"); - if (!dir) { - return -1; - } - struct dirent *de = nullptr; - while ((de = ::readdir(dir))) { - if (de->d_name[0] == '.') - continue; - char fn[4096], target[4096]; - snprintf(fn, sizeof(fn), "/dev/mapper/%s", de->d_name); - int r = readlink(fn, target, sizeof(target)); - if (r < 0 || r >= (int)sizeof(target)) - continue; - target[r] = 0; - if (expect == target) { - snprintf(fn, sizeof(fn), "/sys/kvdo/%s/statistics", de->d_name); - vdo_fd = ::open(fn, O_RDONLY|O_CLOEXEC); //DIRECTORY); - if (vdo_fd >= 0) { - *vdo_name = de->d_name; - break; - } - } - } - closedir(dir); - return vdo_fd; -} - -int get_vdo_stats_handle(const char *devname, std::string *vdo_name) -{ - std::set devs = { devname }; - while (!devs.empty()) { - std::string dev = *devs.begin(); - devs.erase(devs.begin()); - int fd = _get_vdo_stats_handle(dev.c_str(), vdo_name); - if (fd >= 0) { - // yay, it's vdo - return fd; - } - // ok, see if there are constituent devices - if (dev.find("dm-") == 0) { - get_dm_parents(dev, &devs); - } - } - return -1; -} - -int64_t get_vdo_stat(int vdo_fd, const char *property) -{ - int64_t ret = 0; - int fd = ::openat(vdo_fd, property, O_RDONLY|O_CLOEXEC); - if (fd < 0) { - return 0; - } - char buf[1024]; - int r = ::read(fd, buf, sizeof(buf) - 1); - if (r > 0) { - buf[r] = 0; - ret = atoll(buf); - } - TEMP_FAILURE_RETRY(::close(fd)); - return ret; -} - -bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail) -{ - int64_t block_size = get_vdo_stat(fd, "block_size"); - int64_t physical_blocks = get_vdo_stat(fd, "physical_blocks"); - int64_t overhead_blocks_used = get_vdo_stat(fd, "overhead_blocks_used"); - int64_t data_blocks_used = get_vdo_stat(fd, "data_blocks_used"); - if (!block_size - || !physical_blocks - || !overhead_blocks_used - || !data_blocks_used) { - return false; - } - int64_t avail_blocks = - physical_blocks - overhead_blocks_used - data_blocks_used; - *total = block_size * physical_blocks; - *avail = block_size * avail_blocks; - return true; -} - std::string _decode_model_enc(const std::string& in) { auto v = boost::replace_all_copy(in, "\\x20", " "); @@ -908,21 +819,6 @@ void get_raw_devices(const std::string& in, { } -int get_vdo_stats_handle(const char *devname, std::string *vdo_name) -{ - return -1; -} - -int64_t get_vdo_stat(int fd, const char *property) -{ - return 0; -} - -bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail) -{ - return false; -} - std::string get_device_id(const std::string& devname, std::string *err) { @@ -1083,21 +979,6 @@ void get_raw_devices(const std::string& in, { } -int get_vdo_stats_handle(const char *devname, std::string *vdo_name) -{ - return -1; -} - -int64_t get_vdo_stat(int fd, const char *property) -{ - return 0; -} - -bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail) -{ - return false; -} - std::string get_device_id(const std::string& devname, std::string *err) { @@ -1237,21 +1118,6 @@ void get_raw_devices(const std::string& in, { } -int get_vdo_stats_handle(const char *devname, std::string *vdo_name) -{ - return -1; -} - -int64_t get_vdo_stat(int fd, const char *property) -{ - return 0; -} - -bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail) -{ - return false; -} - std::string get_device_id(const std::string& devname, std::string *err) { diff --git a/src/common/blkdev.h b/src/common/blkdev.h index ed9da450e0f..369cbc204f6 100644 --- a/src/common/blkdev.h +++ b/src/common/blkdev.h @@ -36,12 +36,6 @@ extern int block_device_get_metrics(const std::string& devname, int timeout, extern void get_raw_devices(const std::string& in, std::set *ls); -// for VDO -/// return an op fd for the sysfs stats dir, if this is a VDO device -extern int get_vdo_stats_handle(const char *devname, std::string *vdo_name); -extern int64_t get_vdo_stat(int fd, const char *property); -extern bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail); - class BlkDev { public: BlkDev(int fd); diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in index a9fe7f2af4b..273eea909bc 100644 --- a/src/common/options/osd.yaml.in +++ b/src/common/options/osd.yaml.in @@ -27,6 +27,16 @@ options: - osd_numa_auto_affinity flags: - startup +- name: set_keepcaps + type: bool + level: advanced + desc: set the keepcaps flag before changing UID, preserving the permitted capability set + long_desc: When ceph switches from root to the ceph uid, all capabilities in all sets are eraseed. If + a component that is capability aware needs a specific capability, the keepcaps flag maintains + the permitted capability set, allowing the capabilities in the effective set to be activated as needed. + default: false + flags: + - startup - name: osd_smart_report_timeout type: uint level: advanced @@ -1267,6 +1277,13 @@ options: default: 512 fmt_desc: The maximum number of objects per backfill scan.p with_legacy: true +- name: osd_extblkdev_plugins + type: str + level: advanced + desc: extended block device plugins to load, provide compression feedback at runtime + default: vdo + flags: + - startup # minimum number of peers - name: osd_heartbeat_min_peers type: int diff --git a/src/common/win32/blkdev.cc b/src/common/win32/blkdev.cc index bb815a044fc..3714441e7e2 100644 --- a/src/common/win32/blkdev.cc +++ b/src/common/win32/blkdev.cc @@ -94,21 +94,6 @@ void get_raw_devices(const std::string& in, { } -int get_vdo_stats_handle(const char *devname, std::string *vdo_name) -{ - return -1; -} - -int64_t get_vdo_stat(int fd, const char *property) -{ - return 0; -} - -bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail) -{ - return false; -} - std::string get_device_id(const std::string& devname, std::string *err) { diff --git a/src/extblkdev/CMakeLists.txt b/src/extblkdev/CMakeLists.txt new file mode 100644 index 00000000000..64010f31cf3 --- /dev/null +++ b/src/extblkdev/CMakeLists.txt @@ -0,0 +1,14 @@ +## extended block device plugins + +set(extblkdev_plugin_dir ${CEPH_INSTALL_PKGLIBDIR}/extblkdev) + +add_subdirectory(vdo) + +add_library(extblkdev STATIC ExtBlkDevPlugin.cc) + +if(NOT WIN32) +target_link_libraries(extblkdev cap) +endif() + +add_custom_target(extblkdev_plugins DEPENDS + ceph_ebd_vdo) diff --git a/src/extblkdev/ExtBlkDevInterface.h b/src/extblkdev/ExtBlkDevInterface.h new file mode 100644 index 00000000000..219780fcd68 --- /dev/null +++ b/src/extblkdev/ExtBlkDevInterface.h @@ -0,0 +1,141 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph distributed storage system + * + * (C) Copyright IBM Corporation 2022 + * Author: Martin Ohmacht + * + * Based on the file ceph/src/erasure-code/ErasureCodeInterface.h + * Copyright (C) 2013 Cloudwatt + * Author: Loic Dachary + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#ifndef CEPH_EXT_BLK_DEV_INTERFACE_H +#define CEPH_EXT_BLK_DEV_INTERFACE_H + +/*! @file ExtBlkDevInterface.h + @brief Interface provided by extended block device plugins + + Block devices with verdor specific capabilities rely on plugins implementing + **ExtBlkDevInterface** to provide access to their capabilities. + + Methods returning an **int** return **0** on success and a + negative value on error. + */ + +#include +#include +#include +#include +#ifdef __linux__ +#include +#else +typedef void *cap_t; +#endif + +#include "common/PluginRegistry.h" + +namespace ceph { + class ExtBlkDevState { + uint64_t logical_total=0; + uint64_t logical_avail=0; + uint64_t physical_total=0; + uint64_t physical_avail=0; + public: + uint64_t get_logical_total(){return logical_total;} + uint64_t get_logical_avail(){return logical_avail;} + uint64_t get_physical_total(){return physical_total;} + uint64_t get_physical_avail(){return physical_avail;} + void set_logical_total(uint64_t alogical_total){logical_total=alogical_total;} + void set_logical_avail(uint64_t alogical_avail){logical_avail=alogical_avail;} + void set_physical_total(uint64_t aphysical_total){physical_total=aphysical_total;} + void set_physical_avail(uint64_t aphysical_avail){physical_avail=aphysical_avail;} + }; + + + class ExtBlkDevInterface { + public: + virtual ~ExtBlkDevInterface() {} + + /** + * Initialize the instance if device logdevname is supported + * + * Return 0 on success or a negative errno on error + * + * @param [in] logdevname name of device to check for support by this plugin + * @return 0 on success or a negative errno on error. + */ + virtual int init(const std::string& logdevname) = 0; + + /** + * Return the name of the underlying device detected by **init** method + * + * @return the name of the underlying device + */ + virtual const std::string& get_devname() const = 0; + + /** + * Provide status of underlying physical storage after compression + * + * Return 0 on success or a negative errno on error. + * + * @param [out] state current state of the undelying device + * @return 0 on success or a negative errno on error. + */ + virtual int get_state(ExtBlkDevState& state) = 0; + + /** + * Populate property map with meta data of device. + * + * @param [in] prefix prefix to be prepended to all map values by this method + * @param [in,out] pm property map of the device, to be extended by attributes detected by this plugin + * @return 0 on success or a negative errno on error. + */ + virtual int collect_metadata(const std::string& prefix, std::map *pm) = 0; + }; + + typedef std::shared_ptr ExtBlkDevInterfaceRef; + + class ExtBlkDevPlugin : public Plugin { + public: + + explicit ExtBlkDevPlugin(CephContext *cct) : Plugin(cct) {} + virtual ~ExtBlkDevPlugin() {} + + /** + * Indicate plugin-required capabilities in permitted set + * If a plugin requires a capability to be active in the + * permitted set when invoked, it must indicate so by setting + * the required flags in the cap_t structure passed into this method. + * The cap_t structure is empty when passed into the method, and only the + * method's modifications to the permitted set are used by ceph. + * The plugin must elevate the capabilities into the effective + * set at a later point when needed during the invocation of its + * other methods, and is responsible to restore the effective set + * before returning from the method + * + * @param [out] caps capability set indicating the necessary capabilities + */ + virtual int get_required_cap_set(cap_t caps) = 0; + + /** + * Factory method, creating ExtBlkDev instances + * + * @param [in] logdevname name of logic device, may be composed of physical devices + * @param [out] ext_blk_dev object created on successful device support detection + * @return 0 on success or a negative errno on error. + */ + virtual int factory(const std::string& logdevname, + ExtBlkDevInterfaceRef& ext_blk_dev) = 0; + }; + +} + +#endif diff --git a/src/extblkdev/ExtBlkDevPlugin.cc b/src/extblkdev/ExtBlkDevPlugin.cc new file mode 100644 index 00000000000..28f47311b54 --- /dev/null +++ b/src/extblkdev/ExtBlkDevPlugin.cc @@ -0,0 +1,268 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph distributed storage system + * + * (C) Copyright IBM Corporation 2022 + * Author: Martin Ohmacht + * + * Based on the file ceph/src/erasure-code/ErasureCodePlugin.cc + * Copyright (C) 2013,2014 Cloudwatt + * Copyright (C) 2014 Red Hat + * + * Author: Loic Dachary + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#include + +#include "ceph_ver.h" +#include "ExtBlkDevPlugin.h" +#include "common/errno.h" +#include "include/dlfcn_compat.h" +#include "include/str_list.h" +#include "include/ceph_assert.h" +#include "common/ceph_context.h" +#include "common/debug.h" + +#define dout_subsys ceph_subsys_bdev +#define dout_context cct + +using namespace std; + +namespace ceph { + + namespace extblkdev { + + +#ifdef __linux__ + // iterate across plugins and determine each capability's reqirement + // merge requirements into merge_caps set + int get_required_caps(CephContext *cct, cap_t &merge_caps) + { + cap_t plugin_caps = nullptr; + auto close_caps_on_return = make_scope_guard([&] { + if (plugin_caps != nullptr) { + cap_free(plugin_caps); + } + }); + + // plugin-private cap set to populate by a plugin + plugin_caps = cap_init(); + if (plugin_caps == nullptr) { + return -errno; + } + auto registry = cct->get_plugin_registry(); + std::lock_guard l(registry->lock); + // did we preload any extblkdev type plugins? + auto ptype = registry->plugins.find("extblkdev"); + if (ptype != registry->plugins.end()) { + // iterate over all extblkdev plugins + for (auto& it : ptype->second) { + // clear cap set before passing to plugin + if (cap_clear(plugin_caps) < 0) { + return -errno; + } + // let plugin populate set with required caps + auto ebdplugin = dynamic_cast(it.second); + if (ebdplugin == nullptr) { + derr << __func__ << " Is not an extblkdev plugin: " << it.first << dendl; + return -ENOENT; + } + int rc = ebdplugin->get_required_cap_set(plugin_caps); + if (rc != 0) + return rc; + // iterate over capabilities and check for active bits + for (int i = 0; i <= CAP_LAST_CAP; ++i) { + cap_flag_value_t val; + if (cap_get_flag(plugin_caps, i, CAP_PERMITTED, &val) < 0) { + return -errno; + } + if (val != CAP_CLEAR) { + cap_value_t arr[1]; + arr[0] = i; + // set capability in merged set + if (cap_set_flag(merge_caps, CAP_PERMITTED, 1, arr, CAP_SET) < 0) { + return -errno; + } + } + } + } + } + return 0; + } + + // trim away all capabilities of this process that are not explicitly set in merge_set + int trim_caps(CephContext *cct, cap_t &merge_caps) + { + cap_t proc_caps = nullptr; + auto close_caps_on_return = make_scope_guard([&] { + if (proc_caps != nullptr) { + cap_free(proc_caps); + } + }); + bool changed = false; + // get process capability set + proc_caps = cap_get_proc(); + if (proc_caps == nullptr) { + dout(1) << " cap_get_proc failed with errno: " << errno << dendl; + return -errno; + } + { + char *cap_str = cap_to_text(proc_caps, 0); + if (cap_str != nullptr){ + dout(10) << " cap_get_proc yields: " << cap_str << dendl; + cap_free(cap_str); + } + } + // iterate over capabilities + for (int i = 0; i <= CAP_LAST_CAP; ++i) { + cap_flag_value_t val; + if (cap_get_flag(merge_caps, i, CAP_PERMITTED, &val) < 0) { + return -errno; + } + if (val == CAP_CLEAR) { + if (cap_get_flag(proc_caps, i, CAP_PERMITTED, &val) < 0) { + return -errno; + } + if (val != CAP_CLEAR) { + // if bit clear in merged set, but set in process set, clear in process set + changed = true; + cap_value_t arr[1]; + arr[0] = i; + if (cap_set_flag(proc_caps, CAP_PERMITTED, 1, arr, CAP_CLEAR) < 0) { + return -errno; + } + if (cap_set_flag(proc_caps, CAP_EFFECTIVE, 1, arr, CAP_CLEAR) < 0) { + return -errno; + } + } + } + } + // apply reduced capability set to process + if (changed) { + char *cap_str = cap_to_text(proc_caps, 0); + if (cap_str != nullptr){ + dout(10) << " new caps for cap_set_proc: " << cap_str << dendl; + cap_free(cap_str); + } + if (cap_set_proc(proc_caps) < 0) { + dout(1) << " cap_set_proc failed with errno: " << errno << dendl; + return -errno; + } + } + return 0; + } + + int limit_caps(CephContext *cct) + { + cap_t merge_caps = nullptr; + auto close_caps_on_return = make_scope_guard([&] { + if (merge_caps != nullptr) { + cap_free(merge_caps); + } + }); + // collect required caps in merge_caps + merge_caps = cap_init(); + if (merge_caps == nullptr) { + return -errno; + } + int rc = get_required_caps(cct, merge_caps); + if (rc != 0) { + return rc; + } + return trim_caps(cct, merge_caps); + } +#endif + + // preload set of extblkdev plugins defined in config + int preload(CephContext *cct) + { + const auto& conf = cct->_conf; + string plugins = conf.get_val("osd_extblkdev_plugins"); + dout(10) << "starting preload of extblkdev plugins: " << plugins << dendl; + + list plugins_list; + get_str_list(plugins, plugins_list); + + auto registry = cct->get_plugin_registry(); + { + std::lock_guard l(registry->lock); + for (auto& plg : plugins_list) { + dout(10) << "starting load of extblkdev plugin: " << plg << dendl; + int rc = registry->load("extblkdev", std::string("ebd_") + plg); + if (rc) { + derr << __func__ << " failed preloading extblkdev plugin: " << plg << dendl; + return rc; + }else{ + dout(10) << "successful load of extblkdev plugin: " << plg << dendl; + } + } + } +#ifdef __linux__ + // if we are still running as root, we do not need to trim capabilities + // as we are intended to use the privileges + if (geteuid() == 0) { + return 0; + } + return limit_caps(cct); +#else + return 0; +#endif + } + + + // scan extblkdev plugins for support of this device + int detect_device(CephContext *cct, + const std::string &logdevname, + ExtBlkDevInterfaceRef& ebd_impl) + { + int rc = -ENOENT; + std::string plg_name; + auto registry = cct->get_plugin_registry(); + std::lock_guard l(registry->lock); + auto ptype = registry->plugins.find("extblkdev"); + if (ptype == registry->plugins.end()) { + return -ENOENT; + } + + for (auto& it : ptype->second) { + + dout(10) << __func__ << " Trying to detect block device " << logdevname + << " using plugin " << it.first << dendl; + auto ebdplugin = dynamic_cast(it.second); + if (ebdplugin == nullptr) { + derr << __func__ << " Is not an extblkdev plugin: " << it.first << dendl; + return -ENOENT; + } + rc = ebdplugin->factory(logdevname, ebd_impl); + if (rc == 0) { + plg_name = it.first; + break; + } + } + if (rc == 0) { + dout(1) << __func__ << " using plugin " << plg_name << ", " << "volume " << ebd_impl->get_devname() + << " maps to " << logdevname << dendl; + } else { + dout(10) << __func__ << " no plugin volume maps to " << logdevname << dendl; + } + return rc; + } + + // release device object + int release_device(ExtBlkDevInterfaceRef& ebd_impl) + { + if (ebd_impl) { + ebd_impl.reset(); + } + return 0; + } + + } +} diff --git a/src/extblkdev/ExtBlkDevPlugin.h b/src/extblkdev/ExtBlkDevPlugin.h new file mode 100644 index 00000000000..beb9e496375 --- /dev/null +++ b/src/extblkdev/ExtBlkDevPlugin.h @@ -0,0 +1,38 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph distributed storage system + * + * (C) Copyright IBM Corporation 2022 + * Author: Martin Ohmacht + * + * Based on the file ceph/src/erasure-code/ErasureCodePlugin.h + * Copyright (C) 2013,2014 Cloudwatt + * Copyright (C) 2014 Red Hat + * + * Author: Loic Dachary + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#ifndef CEPH_EXT_BLK_DEV_PLUGIN_H +#define CEPH_EXT_BLK_DEV_PLUGIN_H + +#include "ExtBlkDevInterface.h" + +namespace ceph { + + namespace extblkdev { + int preload(CephContext *cct); + int detect_device(CephContext *cct, + const std::string &logdevname, + ExtBlkDevInterfaceRef& ebd_impl); + int release_device(ExtBlkDevInterfaceRef& ebd_impl); + } +} + +#endif diff --git a/src/extblkdev/vdo/CMakeLists.txt b/src/extblkdev/vdo/CMakeLists.txt new file mode 100644 index 00000000000..60d4f293ddb --- /dev/null +++ b/src/extblkdev/vdo/CMakeLists.txt @@ -0,0 +1,9 @@ +# vdo plugin + +set(vdo_srcs + ExtBlkDevPluginVdo.cc + ExtBlkDevVdo.cc +) + +add_library(ceph_ebd_vdo SHARED ${vdo_srcs}) +install(TARGETS ceph_ebd_vdo DESTINATION ${extblkdev_plugin_dir}) diff --git a/src/extblkdev/vdo/ExtBlkDevPluginVdo.cc b/src/extblkdev/vdo/ExtBlkDevPluginVdo.cc new file mode 100644 index 00000000000..dbe156182c7 --- /dev/null +++ b/src/extblkdev/vdo/ExtBlkDevPluginVdo.cc @@ -0,0 +1,59 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * (C) Copyright IBM Corporation 2022 + * Author: Martin Ohmacht + * + * Based on the file src/erasure-code/clay/ErasureCodePluginClay.cc + * Copyright (C) 2018 Indian Institute of Science + * + * Author: Myna Vajha + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#include "ceph_ver.h" +#include "ExtBlkDevPluginVdo.h" +#include "common/ceph_context.h" + + +// This plugin does not require any capabilities to be set +int ExtBlkDevPluginVdo::get_required_cap_set(cap_t caps) +{ + return 0; +} + + +int ExtBlkDevPluginVdo::factory(const std::string& logdevname, + ceph::ExtBlkDevInterfaceRef& ext_blk_dev) +{ + auto vdo = new ExtBlkDevVdo(cct); + int r = vdo->init(logdevname); + if (r != 0) { + delete vdo; + return r; + } + ext_blk_dev.reset(vdo); + return 0; +}; + +const char *__ceph_plugin_version() { return CEPH_GIT_NICE_VER; } + +int __ceph_plugin_init(CephContext *cct, + const std::string& type, + const std::string& name) +{ + auto plg = new ExtBlkDevPluginVdo(cct); + if(plg == 0) return -ENOMEM; + int rc = cct->get_plugin_registry()->add(type, name, plg); + if(rc != 0){ + delete plg; + } + return rc; +} diff --git a/src/extblkdev/vdo/ExtBlkDevPluginVdo.h b/src/extblkdev/vdo/ExtBlkDevPluginVdo.h new file mode 100644 index 00000000000..784f642ec52 --- /dev/null +++ b/src/extblkdev/vdo/ExtBlkDevPluginVdo.h @@ -0,0 +1,34 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph distributed storage system + * + * (C) Copyright IBM Corporation 2022 + * Author: Martin Ohmacht + * + * Based on the file src/erasure-code/clay/ErasureCodePluginClay.h + * Copyright (C) 2018 Indian Institute of Science + * + * Author: Myna Vajha + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#ifndef CEPH_EXT_BLK_DEV_PLUGIN_VDO_H +#define CEPH_EXT_BLK_DEV_PLUGIN_VDO_H + +#include "ExtBlkDevVdo.h" + +class ExtBlkDevPluginVdo : public ceph::ExtBlkDevPlugin { +public: + explicit ExtBlkDevPluginVdo(CephContext *cct) : ExtBlkDevPlugin(cct) {} + int get_required_cap_set(cap_t caps) override; + int factory(const std::string& logdevname, + ceph::ExtBlkDevInterfaceRef& ext_blk_dev) override; +}; + +#endif diff --git a/src/extblkdev/vdo/ExtBlkDevVdo.cc b/src/extblkdev/vdo/ExtBlkDevVdo.cc new file mode 100644 index 00000000000..c40cd1a1ad5 --- /dev/null +++ b/src/extblkdev/vdo/ExtBlkDevVdo.cc @@ -0,0 +1,156 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * (C) Copyright IBM Corporation 2022 + * Author: Martin Ohmacht + * + * Based on the file ceph/src/common/blkdev.cc + * Copyright (c) 2015 Hewlett-Packard Development Company, L.P. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#include "ExtBlkDevVdo.h" +#include "common/blkdev.h" +#include "include/stringify.h" +#include +#include "common/debug.h" + +#define dout_subsys ceph_subsys_bdev +#define dout_context cct +#undef dout_prefix +#define dout_prefix *_dout << "vdo(" << this << ") " + + +int ExtBlkDevVdo::_get_vdo_stats_handle(const std::string& devname) +{ + int rc = -ENOENT; + dout(10) << __func__ << " VDO init checking device: " << devname << dendl; + + // we need to go from the raw devname (e.g., dm-4) to the VDO volume name. + // currently the best way seems to be to look at /dev/mapper/* ... + std::string expect = std::string("../") + devname; // expected symlink target + DIR *dir = ::opendir("/dev/mapper"); + if (!dir) { + return -errno; + } + struct dirent *de = nullptr; + while ((de = ::readdir(dir))) { + if (de->d_name[0] == '.') + continue; + char fn[4096], target[4096]; + snprintf(fn, sizeof(fn), "/dev/mapper/%s", de->d_name); + int r = readlink(fn, target, sizeof(target)); + if (r < 0 || r >= (int)sizeof(target)) + continue; + target[r] = 0; + if (expect == target) { + snprintf(fn, sizeof(fn), "/sys/kvdo/%s/statistics", de->d_name); + int vdo_fd = ::open(fn, O_RDONLY|O_CLOEXEC); + if (vdo_fd >= 0) { + name = de->d_name; + vdo_dir_fd = vdo_fd; + rc = 0; + break; + } + } + } + closedir(dir); + return rc; +} + +int ExtBlkDevVdo::get_vdo_stats_handle() +{ + std::set devs = { logdevname }; + while (!devs.empty()) { + std::string dev = *devs.begin(); + devs.erase(devs.begin()); + int rc = _get_vdo_stats_handle(dev); + if (rc == 0) { + // yay, it's vdo + return rc; + } + // ok, see if there are constituent devices + if (dev.find("dm-") == 0) { + get_dm_parents(dev, &devs); + } + } + return -ENOENT; +} + +int64_t ExtBlkDevVdo::get_vdo_stat(const char *property) +{ + int64_t ret = 0; + int fd = ::openat(vdo_dir_fd, property, O_RDONLY|O_CLOEXEC); + if (fd < 0) { + return 0; + } + char buf[1024]; + int r = ::read(fd, buf, sizeof(buf) - 1); + if (r > 0) { + buf[r] = 0; + ret = atoll(buf); + } + VOID_TEMP_FAILURE_RETRY(::close(fd)); + return ret; +} + + +int ExtBlkDevVdo::init(const std::string& alogdevname) +{ + logdevname = alogdevname; + // get directory handle for VDO metadata + return get_vdo_stats_handle(); +} + + +int ExtBlkDevVdo::get_state(ceph::ExtBlkDevState& state) +{ + int64_t block_size = get_vdo_stat("block_size"); + int64_t physical_blocks = get_vdo_stat("physical_blocks"); + int64_t overhead_blocks_used = get_vdo_stat("overhead_blocks_used"); + int64_t data_blocks_used = get_vdo_stat("data_blocks_used"); + int64_t logical_blocks = get_vdo_stat("logical_blocks"); + int64_t logical_blocks_used = get_vdo_stat("logical_blocks_used"); + if (!block_size + || !physical_blocks + || !overhead_blocks_used + || !data_blocks_used + || !logical_blocks) { + dout(1) << __func__ << " VDO sysfs provided zero value for at least one statistic: " << dendl; + dout(1) << __func__ << " VDO block_size: " << block_size << dendl; + dout(1) << __func__ << " VDO physical_blocks: " << physical_blocks << dendl; + dout(1) << __func__ << " VDO overhead_blocks_used: " << overhead_blocks_used << dendl; + dout(1) << __func__ << " VDO data_blocks_used: " << data_blocks_used << dendl; + dout(1) << __func__ << " VDO logical_blocks: " << logical_blocks << dendl; + return -1; + } + int64_t avail_blocks = + physical_blocks - overhead_blocks_used - data_blocks_used; + int64_t logical_avail_blocks = + logical_blocks - logical_blocks_used; + state.set_logical_total(block_size * logical_blocks); + state.set_logical_avail(block_size * logical_avail_blocks); + state.set_physical_total(block_size * physical_blocks); + state.set_physical_avail(block_size * avail_blocks); + return 0; +} + +int ExtBlkDevVdo::collect_metadata(const std::string& prefix, std::map *pm) +{ + ceph::ExtBlkDevState state; + int rc = get_state(state); + if(rc != 0){ + return rc; + } + (*pm)[prefix + "vdo"] = "true"; + (*pm)[prefix + "vdo_physical_size"] = stringify(state.get_physical_total()); + return 0; +} diff --git a/src/extblkdev/vdo/ExtBlkDevVdo.h b/src/extblkdev/vdo/ExtBlkDevVdo.h new file mode 100644 index 00000000000..09865a27e03 --- /dev/null +++ b/src/extblkdev/vdo/ExtBlkDevVdo.h @@ -0,0 +1,52 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * (C) Copyright IBM Corporation 2022 + * Author: Martin Ohmacht + * + * Based on the file ceph/src/common/blkdev.cc + * Copyright (c) 2015 Hewlett-Packard Development Company, L.P. + * + * And also based on the file src/erasure-code/clay/ErasureCodeClay.h + * Copyright (C) 2018 Indian Institute of Science + * + * Author: Myna Vajha + * + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_EXT_BLK_DEV_VDO_H +#define CEPH_EXT_BLK_DEV_VDO_H + +#include "extblkdev/ExtBlkDevInterface.h" +#include "include/compat.h" + +class ExtBlkDevVdo final : public ceph::ExtBlkDevInterface +{ + int vdo_dir_fd = -1; ///< fd for vdo sysfs directory + std::string name; // name of the underlying vdo device + std::string logdevname; // name of the top level logical device + CephContext *cct; +public: + explicit ExtBlkDevVdo(CephContext *cct) : cct(cct) {} + ~ExtBlkDevVdo(){ + if(vdo_dir_fd >= 0) + VOID_TEMP_FAILURE_RETRY(::close(vdo_dir_fd)); + } + int _get_vdo_stats_handle(const std::string& devname); + int get_vdo_stats_handle(); + int64_t get_vdo_stat(const char *property); + virtual int init(const std::string& logdevname); + virtual const std::string& get_devname() const {return name;} + virtual int get_state(ceph::ExtBlkDevState& state); + virtual int collect_metadata(const std::string& prefix, std::map *pm); +}; + +#endif diff --git a/src/global/global_init.cc b/src/global/global_init.cc index 5d636652fdc..57ee5ee7167 100644 --- a/src/global/global_init.cc +++ b/src/global/global_init.cc @@ -22,6 +22,7 @@ #include "common/signal.h" #include "common/version.h" #include "erasure-code/ErasureCodePlugin.h" +#include "extblkdev/ExtBlkDevPlugin.h" #include "global/global_context.h" #include "global/global_init.h" #include "global/pidfile.h" @@ -317,6 +318,13 @@ global_init(const std::map *defaults, << std::endl; exit(1); } +#if defined(HAVE_SYS_PRCTL_H) + if (g_conf().get_val("set_keepcaps")) { + if (prctl(PR_SET_KEEPCAPS, 1) == -1) { + cerr << "warning: unable to set keepcaps flag: " << cpp_strerror(errno) << std::endl; + } + } +#endif if (setuid(uid) != 0) { cerr << "unable to setuid " << uid << ": " << cpp_strerror(errno) << std::endl; diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 4fab7de0378..95eb915ab79 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -10421,15 +10421,16 @@ void BlueStore::_get_statfs_overall(struct store_statfs_t *buf) - buf->omap_allocated; } - uint64_t thin_total, thin_avail; - if (bdev->get_thin_utilization(&thin_total, &thin_avail)) { - buf->total += thin_total; + ExtBlkDevState ebd_state; + int rc = bdev->get_ebd_state(ebd_state); + if (rc == 0) { + buf->total += ebd_state.get_physical_total(); // we are limited by both the size of the virtual device and the // underlying physical device. - bfree = std::min(bfree, thin_avail); + bfree = std::min(bfree, ebd_state.get_physical_avail()); - buf->allocated = thin_total - thin_avail; + buf->allocated = ebd_state.get_physical_total() - ebd_state.get_physical_avail();; } else { buf->total += bdev->get_size(); } diff --git a/src/os/filestore/FileStore.cc b/src/os/filestore/FileStore.cc index 5ee012b04e2..fd1ffecef18 100644 --- a/src/os/filestore/FileStore.cc +++ b/src/os/filestore/FileStore.cc @@ -731,10 +731,10 @@ void FileStore::collect_metadata(map *pm) (*pm)["backend_filestore_dev_node"] = string(dev_node); devname = dev_node; } - if (rc == 0 && vdo_fd >= 0) { - (*pm)["vdo"] = "true"; - (*pm)["vdo_physical_size"] = - stringify(4096 * get_vdo_stat(vdo_fd, "physical_blocks")); + // if compression device detected, collect meta data for device + // VDO specific meta data has moved into VDO plugin + if (rc == 0 && ebd_impl) { + ebd_impl->collect_metadata("", pm); } if (journal) { journal->collect_metadata(pm); @@ -778,12 +778,19 @@ int FileStore::statfs(struct store_statfs_t *buf0, osd_alert_list_t* alerts) buf0->omap_allocated += object_map->get_db()->get_estimated_size(kv_usage); } - uint64_t thin_total, thin_avail; - if (get_vdo_utilization(vdo_fd, &thin_total, &thin_avail)) { - buf0->total = thin_total; - bfree = std::min(bfree, thin_avail); - buf0->allocated = thin_total - thin_avail; - buf0->data_stored = bfree; + if (ebd_impl) { + ExtBlkDevState state; + int rc = ebd_impl->get_state(state); + if (rc == 0){ + buf0->total = state.get_physical_total(); + bfree = std::min(bfree, state.get_physical_avail()); + buf0->allocated = state.get_physical_total() - state.get_physical_avail(); + buf0->data_stored = bfree; + } else { + buf0->total = buf.f_blocks * buf.f_bsize; + buf0->allocated = bfree; + buf0->data_stored = bfree; + } } else { buf0->total = buf.f_blocks * buf.f_bsize; buf0->allocated = bfree; @@ -1287,16 +1294,11 @@ int FileStore::_detect_fs() return r; } - // vdo - { - char dev_node[PATH_MAX]; - if (int rc = BlkDev{fsid_fd}.wholedisk(dev_node, PATH_MAX); rc == 0) { - vdo_fd = get_vdo_stats_handle(dev_node, &vdo_name); - if (vdo_fd >= 0) { - dout(0) << __func__ << " VDO volume " << vdo_name << " for " << dev_node - << dendl; - } - } + // check if any extended block device plugin recognizes this device + // detect_vdo has moved into the VDO plugin + int rc = extblkdev::detect_device(cct, devname, ebd_impl); + if (rc != 0) { + dout(20) << __func__ << " no plugin volume maps to " << devname << dendl; } // test xattrs @@ -2092,10 +2094,7 @@ int FileStore::umount() (*it)->stop(); } - if (vdo_fd >= 0) { - VOID_TEMP_FAILURE_RETRY(::close(vdo_fd)); - vdo_fd = -1; - } + extblkdev::release_device(ebd_impl); if (fsid_fd >= 0) { VOID_TEMP_FAILURE_RETRY(::close(fsid_fd)); fsid_fd = -1; diff --git a/src/os/filestore/FileStore.h b/src/os/filestore/FileStore.h index e975abc3d48..67caeaec0a0 100644 --- a/src/os/filestore/FileStore.h +++ b/src/os/filestore/FileStore.h @@ -47,6 +47,7 @@ #include "WBThrottle.h" #include "include/uuid.h" +#include "extblkdev/ExtBlkDevPlugin.h" #if defined(__linux__) # ifndef BTRFS_SUPER_MAGIC @@ -162,8 +163,7 @@ private: std::string devname; - int vdo_fd = -1; - std::string vdo_name; + ExtBlkDevInterfaceRef ebd_impl; // structure for retrieving compression state from extended block device deque snaps;