mirror of https://github.com/ceph/ceph
blk/kernel: add plugin system for devices with compression and move VDO support into plugin
The current VDO support implementation is buried inside the common/blkdev.cc with a simple interface used by KernelDevice. It is not easily extendable and can not be easily used for other devices providing similar capabilities. This patch adds a plugin system that is based in its structure on the erasure code plugin system and moves the VDO support code into a VDO plugin. Signed-off-by: Martin Ohmacht <mohmacht@us.ibm.com>
This commit is contained in:
parent
652bf75409
commit
a6658c91bb
|
@ -233,6 +233,7 @@ BuildRequires: libaio-devel
|
|||
BuildRequires: libblkid-devel >= 2.17
|
||||
BuildRequires: cryptsetup-devel
|
||||
BuildRequires: libcurl-devel
|
||||
BuildRequires: libcap-devel
|
||||
BuildRequires: libcap-ng-devel
|
||||
BuildRequires: fmt-devel >= 6.2.1
|
||||
BuildRequires: pkgconfig(libudev)
|
||||
|
@ -1563,6 +1564,8 @@ rm -rf %{_vpath_builddir}
|
|||
%dir %{_libdir}/ceph
|
||||
%dir %{_libdir}/ceph/erasure-code
|
||||
%{_libdir}/ceph/erasure-code/libec_*.so*
|
||||
%dir %{_libdir}/ceph/extblkdev
|
||||
%{_libdir}/ceph/extblkdev/libceph_*.so*
|
||||
%dir %{_libdir}/ceph/compressor
|
||||
%{_libdir}/ceph/compressor/libceph_*.so*
|
||||
%{_unitdir}/ceph-crash.service
|
||||
|
|
|
@ -9,6 +9,7 @@ usr/bin/osdmaptool
|
|||
usr/bin/ceph-kvstore-tool
|
||||
usr/libexec/ceph/ceph_common.sh
|
||||
usr/lib/ceph/erasure-code/*
|
||||
usr/lib/ceph/extblkdev/*
|
||||
usr/lib/rados-classes/*
|
||||
usr/sbin/ceph-create-keys
|
||||
usr/share/doc/ceph/sample.ceph.conf
|
||||
|
|
|
@ -36,6 +36,7 @@ Build-Depends: automake,
|
|||
libcrypto++-dev <pkg.ceph.crimson>,
|
||||
libcryptsetup-dev,
|
||||
libcap-ng-dev,
|
||||
libcap-dev,
|
||||
libcunit1-dev,
|
||||
libcurl4-openssl-dev,
|
||||
libevent-dev,
|
||||
|
|
|
@ -433,6 +433,7 @@ add_subdirectory(common)
|
|||
add_subdirectory(crush)
|
||||
add_subdirectory(msg)
|
||||
add_subdirectory(arch)
|
||||
add_subdirectory(extblkdev)
|
||||
|
||||
set(ceph_common_objs
|
||||
$<TARGET_OBJECTS:common-auth-objs>
|
||||
|
@ -446,7 +447,7 @@ set(ceph_common_objs
|
|||
$<TARGET_OBJECTS:common_mountcephfs_objs>
|
||||
$<TARGET_OBJECTS:crush_objs>)
|
||||
set(ceph_common_deps
|
||||
json_spirit erasure_code arch crc32
|
||||
json_spirit erasure_code extblkdev arch crc32
|
||||
${LIB_RESOLV}
|
||||
Boost::thread
|
||||
Boost::system
|
||||
|
@ -678,7 +679,7 @@ set(ceph_osd_srcs
|
|||
ceph_osd.cc)
|
||||
|
||||
add_executable(ceph-osd ${ceph_osd_srcs})
|
||||
add_dependencies(ceph-osd erasure_code_plugins)
|
||||
add_dependencies(ceph-osd erasure_code_plugins extblkdev_plugins)
|
||||
target_link_libraries(ceph-osd osd os global-static common
|
||||
${ALLOC_LIBS}
|
||||
${BLKID_LIBRARIES})
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "acconfig.h"
|
||||
#include "common/ceph_mutex.h"
|
||||
#include "include/common_fwd.h"
|
||||
#include "extblkdev/ExtBlkDevInterface.h"
|
||||
|
||||
#if defined(HAVE_LIBAIO) || defined(HAVE_POSIXAIO)
|
||||
#include "aio/aio.h"
|
||||
|
@ -237,8 +238,8 @@ public:
|
|||
uint64_t get_optimal_io_size() const { return optimal_io_size; }
|
||||
|
||||
/// hook to provide utilization of thinly-provisioned device
|
||||
virtual bool get_thin_utilization(uint64_t *total, uint64_t *avail) const {
|
||||
return false;
|
||||
virtual int get_ebd_state(ExtBlkDevState &state) const {
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
virtual int collect_metadata(const std::string& prefix, std::map<std::string,std::string> *pm) const = 0;
|
||||
|
|
|
@ -31,7 +31,7 @@ if(libblk_srcs)
|
|||
endif()
|
||||
|
||||
if(HAVE_LIBAIO)
|
||||
target_link_libraries(blk PUBLIC ${AIO_LIBRARIES})
|
||||
target_link_libraries(blk PUBLIC ${AIO_LIBRARIES} extblkdev)
|
||||
endif(HAVE_LIBAIO)
|
||||
|
||||
if(WITH_SPDK)
|
||||
|
|
|
@ -242,7 +242,12 @@ int KernelDevice::open(const string& p)
|
|||
support_discard = blkdev_buffered.support_discard();
|
||||
optimal_io_size = blkdev_buffered.get_optimal_io_size();
|
||||
this->devname = devname;
|
||||
_detect_vdo();
|
||||
// check if any extended block device plugin recognizes this device
|
||||
// detect_vdo has moved into the VDO plugin
|
||||
int rc = extblkdev::detect_device(cct, devname, ebd_impl);
|
||||
if (rc != 0) {
|
||||
dout(20) << __func__ << " no plugin volume maps to " << devname << dendl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -305,10 +310,7 @@ void KernelDevice::close()
|
|||
_discard_stop();
|
||||
_pre_close();
|
||||
|
||||
if (vdo_fd >= 0) {
|
||||
VOID_TEMP_FAILURE_RETRY(::close(vdo_fd));
|
||||
vdo_fd = -1;
|
||||
}
|
||||
extblkdev::release_device(ebd_impl);
|
||||
|
||||
for (int i = 0; i < WRITE_LIFE_MAX; i++) {
|
||||
assert(fd_directs[i] >= 0);
|
||||
|
@ -335,11 +337,10 @@ int KernelDevice::collect_metadata(const string& prefix, map<string,string> *pm)
|
|||
} else {
|
||||
(*pm)[prefix + "type"] = "ssd";
|
||||
}
|
||||
if (vdo_fd >= 0) {
|
||||
(*pm)[prefix + "vdo"] = "true";
|
||||
uint64_t total, avail;
|
||||
get_vdo_utilization(vdo_fd, &total, &avail);
|
||||
(*pm)[prefix + "vdo_physical_size"] = stringify(total);
|
||||
// if compression device detected, collect meta data for device
|
||||
// VDO specific meta data has moved into VDO plugin
|
||||
if (ebd_impl) {
|
||||
ebd_impl->collect_metadata(prefix, pm);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -407,24 +408,14 @@ int KernelDevice::collect_metadata(const string& prefix, map<string,string> *pm)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void KernelDevice::_detect_vdo()
|
||||
int KernelDevice::get_ebd_state(ExtBlkDevState &state) const
|
||||
{
|
||||
vdo_fd = get_vdo_stats_handle(devname.c_str(), &vdo_name);
|
||||
if (vdo_fd >= 0) {
|
||||
dout(1) << __func__ << " VDO volume " << vdo_name
|
||||
<< " maps to " << devname << dendl;
|
||||
} else {
|
||||
dout(20) << __func__ << " no VDO volume maps to " << devname << dendl;
|
||||
// use compression driver plugin to determine physical size and availability
|
||||
// VDO specific get_thin_utilization has moved into VDO plugin
|
||||
if (ebd_impl) {
|
||||
return ebd_impl->get_state(state);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
bool KernelDevice::get_thin_utilization(uint64_t *total, uint64_t *avail) const
|
||||
{
|
||||
if (vdo_fd < 0) {
|
||||
return false;
|
||||
}
|
||||
return get_vdo_utilization(vdo_fd, total, avail);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
int KernelDevice::choose_fd(bool buffered, int write_hint) const
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
#include "aio/aio.h"
|
||||
#include "BlockDevice.h"
|
||||
#include "extblkdev/ExtBlkDevPlugin.h"
|
||||
|
||||
#define RW_IO_MAX (INT_MAX & CEPH_PAGE_MASK)
|
||||
|
||||
|
@ -35,8 +36,7 @@ private:
|
|||
bool enable_wrt = true;
|
||||
bool aio, dio;
|
||||
|
||||
int vdo_fd = -1; ///< fd for vdo sysfs directory
|
||||
std::string vdo_name;
|
||||
ExtBlkDevInterfaceRef ebd_impl; // structure for retrieving compression state from extended block device
|
||||
|
||||
std::string devname; ///< kernel dev name (/sys/block/$devname), if any
|
||||
|
||||
|
@ -109,7 +109,6 @@ private:
|
|||
void debug_aio_link(aio_t& aio);
|
||||
void debug_aio_unlink(aio_t& aio);
|
||||
|
||||
void _detect_vdo();
|
||||
int choose_fd(bool buffered, int write_hint) const;
|
||||
|
||||
ceph::unique_leakable_ptr<buffer::raw> create_custom_aligned(size_t len, IOContext* ioc) const;
|
||||
|
@ -130,7 +129,7 @@ public:
|
|||
}
|
||||
int get_devices(std::set<std::string> *ls) const override;
|
||||
|
||||
bool get_thin_utilization(uint64_t *total, uint64_t *avail) const override;
|
||||
int get_ebd_state(ExtBlkDevState &state) const override;
|
||||
|
||||
int read(uint64_t off, uint64_t len, ceph::buffer::list *pbl,
|
||||
IOContext *ioc,
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "mon/MonClient.h"
|
||||
#include "include/ceph_features.h"
|
||||
#include "common/config.h"
|
||||
#include "extblkdev/ExtBlkDevPlugin.h"
|
||||
|
||||
#include "mon/MonMap.h"
|
||||
|
||||
|
@ -472,6 +473,14 @@ flushjournal_out:
|
|||
forker.exit(0);
|
||||
}
|
||||
|
||||
{
|
||||
int r = extblkdev::preload(g_ceph_context);
|
||||
if (r < 0) {
|
||||
derr << "Failed preloading extblkdev plugins, error code: " << r << dendl;
|
||||
forker.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
string magic;
|
||||
uuid_d cluster_fsid, osd_fsid;
|
||||
ceph_release_t require_osd_release = ceph_release_t::unknown;
|
||||
|
|
|
@ -338,95 +338,6 @@ void get_raw_devices(const std::string& in,
|
|||
}
|
||||
}
|
||||
|
||||
int _get_vdo_stats_handle(const char *devname, std::string *vdo_name)
|
||||
{
|
||||
int vdo_fd = -1;
|
||||
|
||||
// we need to go from the raw devname (e.g., dm-4) to the VDO volume name.
|
||||
// currently the best way seems to be to look at /dev/mapper/* ...
|
||||
std::string expect = std::string("../") + devname; // expected symlink target
|
||||
DIR *dir = ::opendir("/dev/mapper");
|
||||
if (!dir) {
|
||||
return -1;
|
||||
}
|
||||
struct dirent *de = nullptr;
|
||||
while ((de = ::readdir(dir))) {
|
||||
if (de->d_name[0] == '.')
|
||||
continue;
|
||||
char fn[4096], target[4096];
|
||||
snprintf(fn, sizeof(fn), "/dev/mapper/%s", de->d_name);
|
||||
int r = readlink(fn, target, sizeof(target));
|
||||
if (r < 0 || r >= (int)sizeof(target))
|
||||
continue;
|
||||
target[r] = 0;
|
||||
if (expect == target) {
|
||||
snprintf(fn, sizeof(fn), "/sys/kvdo/%s/statistics", de->d_name);
|
||||
vdo_fd = ::open(fn, O_RDONLY|O_CLOEXEC); //DIRECTORY);
|
||||
if (vdo_fd >= 0) {
|
||||
*vdo_name = de->d_name;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
return vdo_fd;
|
||||
}
|
||||
|
||||
int get_vdo_stats_handle(const char *devname, std::string *vdo_name)
|
||||
{
|
||||
std::set<std::string> devs = { devname };
|
||||
while (!devs.empty()) {
|
||||
std::string dev = *devs.begin();
|
||||
devs.erase(devs.begin());
|
||||
int fd = _get_vdo_stats_handle(dev.c_str(), vdo_name);
|
||||
if (fd >= 0) {
|
||||
// yay, it's vdo
|
||||
return fd;
|
||||
}
|
||||
// ok, see if there are constituent devices
|
||||
if (dev.find("dm-") == 0) {
|
||||
get_dm_parents(dev, &devs);
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int64_t get_vdo_stat(int vdo_fd, const char *property)
|
||||
{
|
||||
int64_t ret = 0;
|
||||
int fd = ::openat(vdo_fd, property, O_RDONLY|O_CLOEXEC);
|
||||
if (fd < 0) {
|
||||
return 0;
|
||||
}
|
||||
char buf[1024];
|
||||
int r = ::read(fd, buf, sizeof(buf) - 1);
|
||||
if (r > 0) {
|
||||
buf[r] = 0;
|
||||
ret = atoll(buf);
|
||||
}
|
||||
TEMP_FAILURE_RETRY(::close(fd));
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail)
|
||||
{
|
||||
int64_t block_size = get_vdo_stat(fd, "block_size");
|
||||
int64_t physical_blocks = get_vdo_stat(fd, "physical_blocks");
|
||||
int64_t overhead_blocks_used = get_vdo_stat(fd, "overhead_blocks_used");
|
||||
int64_t data_blocks_used = get_vdo_stat(fd, "data_blocks_used");
|
||||
if (!block_size
|
||||
|| !physical_blocks
|
||||
|| !overhead_blocks_used
|
||||
|| !data_blocks_used) {
|
||||
return false;
|
||||
}
|
||||
int64_t avail_blocks =
|
||||
physical_blocks - overhead_blocks_used - data_blocks_used;
|
||||
*total = block_size * physical_blocks;
|
||||
*avail = block_size * avail_blocks;
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string _decode_model_enc(const std::string& in)
|
||||
{
|
||||
auto v = boost::replace_all_copy(in, "\\x20", " ");
|
||||
|
@ -908,21 +819,6 @@ void get_raw_devices(const std::string& in,
|
|||
{
|
||||
}
|
||||
|
||||
int get_vdo_stats_handle(const char *devname, std::string *vdo_name)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
int64_t get_vdo_stat(int fd, const char *property)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string get_device_id(const std::string& devname,
|
||||
std::string *err)
|
||||
{
|
||||
|
@ -1083,21 +979,6 @@ void get_raw_devices(const std::string& in,
|
|||
{
|
||||
}
|
||||
|
||||
int get_vdo_stats_handle(const char *devname, std::string *vdo_name)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
int64_t get_vdo_stat(int fd, const char *property)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string get_device_id(const std::string& devname,
|
||||
std::string *err)
|
||||
{
|
||||
|
@ -1237,21 +1118,6 @@ void get_raw_devices(const std::string& in,
|
|||
{
|
||||
}
|
||||
|
||||
int get_vdo_stats_handle(const char *devname, std::string *vdo_name)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
int64_t get_vdo_stat(int fd, const char *property)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string get_device_id(const std::string& devname,
|
||||
std::string *err)
|
||||
{
|
||||
|
|
|
@ -36,12 +36,6 @@ extern int block_device_get_metrics(const std::string& devname, int timeout,
|
|||
extern void get_raw_devices(const std::string& in,
|
||||
std::set<std::string> *ls);
|
||||
|
||||
// for VDO
|
||||
/// return an op fd for the sysfs stats dir, if this is a VDO device
|
||||
extern int get_vdo_stats_handle(const char *devname, std::string *vdo_name);
|
||||
extern int64_t get_vdo_stat(int fd, const char *property);
|
||||
extern bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail);
|
||||
|
||||
class BlkDev {
|
||||
public:
|
||||
BlkDev(int fd);
|
||||
|
|
|
@ -27,6 +27,16 @@ options:
|
|||
- osd_numa_auto_affinity
|
||||
flags:
|
||||
- startup
|
||||
- name: set_keepcaps
|
||||
type: bool
|
||||
level: advanced
|
||||
desc: set the keepcaps flag before changing UID, preserving the permitted capability set
|
||||
long_desc: When ceph switches from root to the ceph uid, all capabilities in all sets are eraseed. If
|
||||
a component that is capability aware needs a specific capability, the keepcaps flag maintains
|
||||
the permitted capability set, allowing the capabilities in the effective set to be activated as needed.
|
||||
default: false
|
||||
flags:
|
||||
- startup
|
||||
- name: osd_smart_report_timeout
|
||||
type: uint
|
||||
level: advanced
|
||||
|
@ -1267,6 +1277,13 @@ options:
|
|||
default: 512
|
||||
fmt_desc: The maximum number of objects per backfill scan.p
|
||||
with_legacy: true
|
||||
- name: osd_extblkdev_plugins
|
||||
type: str
|
||||
level: advanced
|
||||
desc: extended block device plugins to load, provide compression feedback at runtime
|
||||
default: vdo
|
||||
flags:
|
||||
- startup
|
||||
# minimum number of peers
|
||||
- name: osd_heartbeat_min_peers
|
||||
type: int
|
||||
|
|
|
@ -94,21 +94,6 @@ void get_raw_devices(const std::string& in,
|
|||
{
|
||||
}
|
||||
|
||||
int get_vdo_stats_handle(const char *devname, std::string *vdo_name)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
int64_t get_vdo_stat(int fd, const char *property)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string get_device_id(const std::string& devname,
|
||||
std::string *err)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
## extended block device plugins
|
||||
|
||||
set(extblkdev_plugin_dir ${CEPH_INSTALL_PKGLIBDIR}/extblkdev)
|
||||
|
||||
add_subdirectory(vdo)
|
||||
|
||||
add_library(extblkdev STATIC ExtBlkDevPlugin.cc)
|
||||
|
||||
if(NOT WIN32)
|
||||
target_link_libraries(extblkdev cap)
|
||||
endif()
|
||||
|
||||
add_custom_target(extblkdev_plugins DEPENDS
|
||||
ceph_ebd_vdo)
|
|
@ -0,0 +1,141 @@
|
|||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
/*
|
||||
* Ceph distributed storage system
|
||||
*
|
||||
* (C) Copyright IBM Corporation 2022
|
||||
* Author: Martin Ohmacht <mohmacht@us.ibm.com>
|
||||
*
|
||||
* Based on the file ceph/src/erasure-code/ErasureCodeInterface.h
|
||||
* Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
|
||||
* Author: Loic Dachary <loic@dachary.org>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef CEPH_EXT_BLK_DEV_INTERFACE_H
|
||||
#define CEPH_EXT_BLK_DEV_INTERFACE_H
|
||||
|
||||
/*! @file ExtBlkDevInterface.h
|
||||
@brief Interface provided by extended block device plugins
|
||||
|
||||
Block devices with verdor specific capabilities rely on plugins implementing
|
||||
**ExtBlkDevInterface** to provide access to their capabilities.
|
||||
|
||||
Methods returning an **int** return **0** on success and a
|
||||
negative value on error.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <ostream>
|
||||
#include <memory>
|
||||
#ifdef __linux__
|
||||
#include <sys/capability.h>
|
||||
#else
|
||||
typedef void *cap_t;
|
||||
#endif
|
||||
|
||||
#include "common/PluginRegistry.h"
|
||||
|
||||
namespace ceph {
|
||||
class ExtBlkDevState {
|
||||
uint64_t logical_total=0;
|
||||
uint64_t logical_avail=0;
|
||||
uint64_t physical_total=0;
|
||||
uint64_t physical_avail=0;
|
||||
public:
|
||||
uint64_t get_logical_total(){return logical_total;}
|
||||
uint64_t get_logical_avail(){return logical_avail;}
|
||||
uint64_t get_physical_total(){return physical_total;}
|
||||
uint64_t get_physical_avail(){return physical_avail;}
|
||||
void set_logical_total(uint64_t alogical_total){logical_total=alogical_total;}
|
||||
void set_logical_avail(uint64_t alogical_avail){logical_avail=alogical_avail;}
|
||||
void set_physical_total(uint64_t aphysical_total){physical_total=aphysical_total;}
|
||||
void set_physical_avail(uint64_t aphysical_avail){physical_avail=aphysical_avail;}
|
||||
};
|
||||
|
||||
|
||||
class ExtBlkDevInterface {
|
||||
public:
|
||||
virtual ~ExtBlkDevInterface() {}
|
||||
|
||||
/**
|
||||
* Initialize the instance if device logdevname is supported
|
||||
*
|
||||
* Return 0 on success or a negative errno on error
|
||||
*
|
||||
* @param [in] logdevname name of device to check for support by this plugin
|
||||
* @return 0 on success or a negative errno on error.
|
||||
*/
|
||||
virtual int init(const std::string& logdevname) = 0;
|
||||
|
||||
/**
|
||||
* Return the name of the underlying device detected by **init** method
|
||||
*
|
||||
* @return the name of the underlying device
|
||||
*/
|
||||
virtual const std::string& get_devname() const = 0;
|
||||
|
||||
/**
|
||||
* Provide status of underlying physical storage after compression
|
||||
*
|
||||
* Return 0 on success or a negative errno on error.
|
||||
*
|
||||
* @param [out] state current state of the undelying device
|
||||
* @return 0 on success or a negative errno on error.
|
||||
*/
|
||||
virtual int get_state(ExtBlkDevState& state) = 0;
|
||||
|
||||
/**
|
||||
* Populate property map with meta data of device.
|
||||
*
|
||||
* @param [in] prefix prefix to be prepended to all map values by this method
|
||||
* @param [in,out] pm property map of the device, to be extended by attributes detected by this plugin
|
||||
* @return 0 on success or a negative errno on error.
|
||||
*/
|
||||
virtual int collect_metadata(const std::string& prefix, std::map<std::string,std::string> *pm) = 0;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<ExtBlkDevInterface> ExtBlkDevInterfaceRef;
|
||||
|
||||
class ExtBlkDevPlugin : public Plugin {
|
||||
public:
|
||||
|
||||
explicit ExtBlkDevPlugin(CephContext *cct) : Plugin(cct) {}
|
||||
virtual ~ExtBlkDevPlugin() {}
|
||||
|
||||
/**
|
||||
* Indicate plugin-required capabilities in permitted set
|
||||
* If a plugin requires a capability to be active in the
|
||||
* permitted set when invoked, it must indicate so by setting
|
||||
* the required flags in the cap_t structure passed into this method.
|
||||
* The cap_t structure is empty when passed into the method, and only the
|
||||
* method's modifications to the permitted set are used by ceph.
|
||||
* The plugin must elevate the capabilities into the effective
|
||||
* set at a later point when needed during the invocation of its
|
||||
* other methods, and is responsible to restore the effective set
|
||||
* before returning from the method
|
||||
*
|
||||
* @param [out] caps capability set indicating the necessary capabilities
|
||||
*/
|
||||
virtual int get_required_cap_set(cap_t caps) = 0;
|
||||
|
||||
/**
|
||||
* Factory method, creating ExtBlkDev instances
|
||||
*
|
||||
* @param [in] logdevname name of logic device, may be composed of physical devices
|
||||
* @param [out] ext_blk_dev object created on successful device support detection
|
||||
* @return 0 on success or a negative errno on error.
|
||||
*/
|
||||
virtual int factory(const std::string& logdevname,
|
||||
ExtBlkDevInterfaceRef& ext_blk_dev) = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,268 @@
|
|||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
/*
|
||||
* Ceph distributed storage system
|
||||
*
|
||||
* (C) Copyright IBM Corporation 2022
|
||||
* Author: Martin Ohmacht <mohmacht@us.ibm.com>
|
||||
*
|
||||
* Based on the file ceph/src/erasure-code/ErasureCodePlugin.cc
|
||||
* Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
|
||||
* Copyright (C) 2014 Red Hat <contact@redhat.com>
|
||||
*
|
||||
* Author: Loic Dachary <loic@dachary.org>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
#include "ceph_ver.h"
|
||||
#include "ExtBlkDevPlugin.h"
|
||||
#include "common/errno.h"
|
||||
#include "include/dlfcn_compat.h"
|
||||
#include "include/str_list.h"
|
||||
#include "include/ceph_assert.h"
|
||||
#include "common/ceph_context.h"
|
||||
#include "common/debug.h"
|
||||
|
||||
#define dout_subsys ceph_subsys_bdev
|
||||
#define dout_context cct
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ceph {
|
||||
|
||||
namespace extblkdev {
|
||||
|
||||
|
||||
#ifdef __linux__
|
||||
// iterate across plugins and determine each capability's reqirement
|
||||
// merge requirements into merge_caps set
|
||||
int get_required_caps(CephContext *cct, cap_t &merge_caps)
|
||||
{
|
||||
cap_t plugin_caps = nullptr;
|
||||
auto close_caps_on_return = make_scope_guard([&] {
|
||||
if (plugin_caps != nullptr) {
|
||||
cap_free(plugin_caps);
|
||||
}
|
||||
});
|
||||
|
||||
// plugin-private cap set to populate by a plugin
|
||||
plugin_caps = cap_init();
|
||||
if (plugin_caps == nullptr) {
|
||||
return -errno;
|
||||
}
|
||||
auto registry = cct->get_plugin_registry();
|
||||
std::lock_guard l(registry->lock);
|
||||
// did we preload any extblkdev type plugins?
|
||||
auto ptype = registry->plugins.find("extblkdev");
|
||||
if (ptype != registry->plugins.end()) {
|
||||
// iterate over all extblkdev plugins
|
||||
for (auto& it : ptype->second) {
|
||||
// clear cap set before passing to plugin
|
||||
if (cap_clear(plugin_caps) < 0) {
|
||||
return -errno;
|
||||
}
|
||||
// let plugin populate set with required caps
|
||||
auto ebdplugin = dynamic_cast<ExtBlkDevPlugin*>(it.second);
|
||||
if (ebdplugin == nullptr) {
|
||||
derr << __func__ << " Is not an extblkdev plugin: " << it.first << dendl;
|
||||
return -ENOENT;
|
||||
}
|
||||
int rc = ebdplugin->get_required_cap_set(plugin_caps);
|
||||
if (rc != 0)
|
||||
return rc;
|
||||
// iterate over capabilities and check for active bits
|
||||
for (int i = 0; i <= CAP_LAST_CAP; ++i) {
|
||||
cap_flag_value_t val;
|
||||
if (cap_get_flag(plugin_caps, i, CAP_PERMITTED, &val) < 0) {
|
||||
return -errno;
|
||||
}
|
||||
if (val != CAP_CLEAR) {
|
||||
cap_value_t arr[1];
|
||||
arr[0] = i;
|
||||
// set capability in merged set
|
||||
if (cap_set_flag(merge_caps, CAP_PERMITTED, 1, arr, CAP_SET) < 0) {
|
||||
return -errno;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// trim away all capabilities of this process that are not explicitly set in merge_set
|
||||
int trim_caps(CephContext *cct, cap_t &merge_caps)
|
||||
{
|
||||
cap_t proc_caps = nullptr;
|
||||
auto close_caps_on_return = make_scope_guard([&] {
|
||||
if (proc_caps != nullptr) {
|
||||
cap_free(proc_caps);
|
||||
}
|
||||
});
|
||||
bool changed = false;
|
||||
// get process capability set
|
||||
proc_caps = cap_get_proc();
|
||||
if (proc_caps == nullptr) {
|
||||
dout(1) << " cap_get_proc failed with errno: " << errno << dendl;
|
||||
return -errno;
|
||||
}
|
||||
{
|
||||
char *cap_str = cap_to_text(proc_caps, 0);
|
||||
if (cap_str != nullptr){
|
||||
dout(10) << " cap_get_proc yields: " << cap_str << dendl;
|
||||
cap_free(cap_str);
|
||||
}
|
||||
}
|
||||
// iterate over capabilities
|
||||
for (int i = 0; i <= CAP_LAST_CAP; ++i) {
|
||||
cap_flag_value_t val;
|
||||
if (cap_get_flag(merge_caps, i, CAP_PERMITTED, &val) < 0) {
|
||||
return -errno;
|
||||
}
|
||||
if (val == CAP_CLEAR) {
|
||||
if (cap_get_flag(proc_caps, i, CAP_PERMITTED, &val) < 0) {
|
||||
return -errno;
|
||||
}
|
||||
if (val != CAP_CLEAR) {
|
||||
// if bit clear in merged set, but set in process set, clear in process set
|
||||
changed = true;
|
||||
cap_value_t arr[1];
|
||||
arr[0] = i;
|
||||
if (cap_set_flag(proc_caps, CAP_PERMITTED, 1, arr, CAP_CLEAR) < 0) {
|
||||
return -errno;
|
||||
}
|
||||
if (cap_set_flag(proc_caps, CAP_EFFECTIVE, 1, arr, CAP_CLEAR) < 0) {
|
||||
return -errno;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// apply reduced capability set to process
|
||||
if (changed) {
|
||||
char *cap_str = cap_to_text(proc_caps, 0);
|
||||
if (cap_str != nullptr){
|
||||
dout(10) << " new caps for cap_set_proc: " << cap_str << dendl;
|
||||
cap_free(cap_str);
|
||||
}
|
||||
if (cap_set_proc(proc_caps) < 0) {
|
||||
dout(1) << " cap_set_proc failed with errno: " << errno << dendl;
|
||||
return -errno;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int limit_caps(CephContext *cct)
|
||||
{
|
||||
cap_t merge_caps = nullptr;
|
||||
auto close_caps_on_return = make_scope_guard([&] {
|
||||
if (merge_caps != nullptr) {
|
||||
cap_free(merge_caps);
|
||||
}
|
||||
});
|
||||
// collect required caps in merge_caps
|
||||
merge_caps = cap_init();
|
||||
if (merge_caps == nullptr) {
|
||||
return -errno;
|
||||
}
|
||||
int rc = get_required_caps(cct, merge_caps);
|
||||
if (rc != 0) {
|
||||
return rc;
|
||||
}
|
||||
return trim_caps(cct, merge_caps);
|
||||
}
|
||||
#endif
|
||||
|
||||
// preload set of extblkdev plugins defined in config
|
||||
int preload(CephContext *cct)
|
||||
{
|
||||
const auto& conf = cct->_conf;
|
||||
string plugins = conf.get_val<std::string>("osd_extblkdev_plugins");
|
||||
dout(10) << "starting preload of extblkdev plugins: " << plugins << dendl;
|
||||
|
||||
list<string> plugins_list;
|
||||
get_str_list(plugins, plugins_list);
|
||||
|
||||
auto registry = cct->get_plugin_registry();
|
||||
{
|
||||
std::lock_guard l(registry->lock);
|
||||
for (auto& plg : plugins_list) {
|
||||
dout(10) << "starting load of extblkdev plugin: " << plg << dendl;
|
||||
int rc = registry->load("extblkdev", std::string("ebd_") + plg);
|
||||
if (rc) {
|
||||
derr << __func__ << " failed preloading extblkdev plugin: " << plg << dendl;
|
||||
return rc;
|
||||
}else{
|
||||
dout(10) << "successful load of extblkdev plugin: " << plg << dendl;
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef __linux__
|
||||
// if we are still running as root, we do not need to trim capabilities
|
||||
// as we are intended to use the privileges
|
||||
if (geteuid() == 0) {
|
||||
return 0;
|
||||
}
|
||||
return limit_caps(cct);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// scan extblkdev plugins for support of this device
|
||||
int detect_device(CephContext *cct,
|
||||
const std::string &logdevname,
|
||||
ExtBlkDevInterfaceRef& ebd_impl)
|
||||
{
|
||||
int rc = -ENOENT;
|
||||
std::string plg_name;
|
||||
auto registry = cct->get_plugin_registry();
|
||||
std::lock_guard l(registry->lock);
|
||||
auto ptype = registry->plugins.find("extblkdev");
|
||||
if (ptype == registry->plugins.end()) {
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
for (auto& it : ptype->second) {
|
||||
|
||||
dout(10) << __func__ << " Trying to detect block device " << logdevname
|
||||
<< " using plugin " << it.first << dendl;
|
||||
auto ebdplugin = dynamic_cast<ExtBlkDevPlugin*>(it.second);
|
||||
if (ebdplugin == nullptr) {
|
||||
derr << __func__ << " Is not an extblkdev plugin: " << it.first << dendl;
|
||||
return -ENOENT;
|
||||
}
|
||||
rc = ebdplugin->factory(logdevname, ebd_impl);
|
||||
if (rc == 0) {
|
||||
plg_name = it.first;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (rc == 0) {
|
||||
dout(1) << __func__ << " using plugin " << plg_name << ", " << "volume " << ebd_impl->get_devname()
|
||||
<< " maps to " << logdevname << dendl;
|
||||
} else {
|
||||
dout(10) << __func__ << " no plugin volume maps to " << logdevname << dendl;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
// release device object
|
||||
int release_device(ExtBlkDevInterfaceRef& ebd_impl)
|
||||
{
|
||||
if (ebd_impl) {
|
||||
ebd_impl.reset();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
/*
|
||||
* Ceph distributed storage system
|
||||
*
|
||||
* (C) Copyright IBM Corporation 2022
|
||||
* Author: Martin Ohmacht <mohmacht@us.ibm.com>
|
||||
*
|
||||
* Based on the file ceph/src/erasure-code/ErasureCodePlugin.h
|
||||
* Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
|
||||
* Copyright (C) 2014 Red Hat <contact@redhat.com>
|
||||
*
|
||||
* Author: Loic Dachary <loic@dachary.org>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef CEPH_EXT_BLK_DEV_PLUGIN_H
|
||||
#define CEPH_EXT_BLK_DEV_PLUGIN_H
|
||||
|
||||
#include "ExtBlkDevInterface.h"
|
||||
|
||||
namespace ceph {
|
||||
|
||||
namespace extblkdev {
|
||||
int preload(CephContext *cct);
|
||||
int detect_device(CephContext *cct,
|
||||
const std::string &logdevname,
|
||||
ExtBlkDevInterfaceRef& ebd_impl);
|
||||
int release_device(ExtBlkDevInterfaceRef& ebd_impl);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,9 @@
|
|||
# vdo plugin
|
||||
|
||||
set(vdo_srcs
|
||||
ExtBlkDevPluginVdo.cc
|
||||
ExtBlkDevVdo.cc
|
||||
)
|
||||
|
||||
add_library(ceph_ebd_vdo SHARED ${vdo_srcs})
|
||||
install(TARGETS ceph_ebd_vdo DESTINATION ${extblkdev_plugin_dir})
|
|
@ -0,0 +1,59 @@
|
|||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
/*
|
||||
* Ceph - scalable distributed file system
|
||||
*
|
||||
* (C) Copyright IBM Corporation 2022
|
||||
* Author: Martin Ohmacht <mohmacht@us.ibm.com>
|
||||
*
|
||||
* Based on the file src/erasure-code/clay/ErasureCodePluginClay.cc
|
||||
* Copyright (C) 2018 Indian Institute of Science <office.ece@iisc.ac.in>
|
||||
*
|
||||
* Author: Myna Vajha <mynaramana@gmail.com>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "ceph_ver.h"
|
||||
#include "ExtBlkDevPluginVdo.h"
|
||||
#include "common/ceph_context.h"
|
||||
|
||||
|
||||
// This plugin does not require any capabilities to be set
|
||||
int ExtBlkDevPluginVdo::get_required_cap_set(cap_t caps)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int ExtBlkDevPluginVdo::factory(const std::string& logdevname,
|
||||
ceph::ExtBlkDevInterfaceRef& ext_blk_dev)
|
||||
{
|
||||
auto vdo = new ExtBlkDevVdo(cct);
|
||||
int r = vdo->init(logdevname);
|
||||
if (r != 0) {
|
||||
delete vdo;
|
||||
return r;
|
||||
}
|
||||
ext_blk_dev.reset(vdo);
|
||||
return 0;
|
||||
};
|
||||
|
||||
const char *__ceph_plugin_version() { return CEPH_GIT_NICE_VER; }
|
||||
|
||||
int __ceph_plugin_init(CephContext *cct,
|
||||
const std::string& type,
|
||||
const std::string& name)
|
||||
{
|
||||
auto plg = new ExtBlkDevPluginVdo(cct);
|
||||
if(plg == 0) return -ENOMEM;
|
||||
int rc = cct->get_plugin_registry()->add(type, name, plg);
|
||||
if(rc != 0){
|
||||
delete plg;
|
||||
}
|
||||
return rc;
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
/*
|
||||
* Ceph distributed storage system
|
||||
*
|
||||
* (C) Copyright IBM Corporation 2022
|
||||
* Author: Martin Ohmacht <mohmacht@us.ibm.com>
|
||||
*
|
||||
* Based on the file src/erasure-code/clay/ErasureCodePluginClay.h
|
||||
* Copyright (C) 2018 Indian Institute of Science <office.ece@iisc.ac.in>
|
||||
*
|
||||
* Author: Myna Vajha <mynaramana@gmail.com>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef CEPH_EXT_BLK_DEV_PLUGIN_VDO_H
|
||||
#define CEPH_EXT_BLK_DEV_PLUGIN_VDO_H
|
||||
|
||||
#include "ExtBlkDevVdo.h"
|
||||
|
||||
class ExtBlkDevPluginVdo : public ceph::ExtBlkDevPlugin {
|
||||
public:
|
||||
explicit ExtBlkDevPluginVdo(CephContext *cct) : ExtBlkDevPlugin(cct) {}
|
||||
int get_required_cap_set(cap_t caps) override;
|
||||
int factory(const std::string& logdevname,
|
||||
ceph::ExtBlkDevInterfaceRef& ext_blk_dev) override;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,156 @@
|
|||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
/*
|
||||
* Ceph - scalable distributed file system
|
||||
*
|
||||
* (C) Copyright IBM Corporation 2022
|
||||
* Author: Martin Ohmacht <mohmacht@us.ibm.com>
|
||||
*
|
||||
* Based on the file ceph/src/common/blkdev.cc
|
||||
* Copyright (c) 2015 Hewlett-Packard Development Company, L.P.
|
||||
*
|
||||
* This is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License version 2.1, as published by the Free Software
|
||||
* Foundation. See file COPYING.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "ExtBlkDevVdo.h"
|
||||
#include "common/blkdev.h"
|
||||
#include "include/stringify.h"
|
||||
#include <errno.h>
|
||||
#include "common/debug.h"
|
||||
|
||||
#define dout_subsys ceph_subsys_bdev
|
||||
#define dout_context cct
|
||||
#undef dout_prefix
|
||||
#define dout_prefix *_dout << "vdo(" << this << ") "
|
||||
|
||||
|
||||
int ExtBlkDevVdo::_get_vdo_stats_handle(const std::string& devname)
|
||||
{
|
||||
int rc = -ENOENT;
|
||||
dout(10) << __func__ << " VDO init checking device: " << devname << dendl;
|
||||
|
||||
// we need to go from the raw devname (e.g., dm-4) to the VDO volume name.
|
||||
// currently the best way seems to be to look at /dev/mapper/* ...
|
||||
std::string expect = std::string("../") + devname; // expected symlink target
|
||||
DIR *dir = ::opendir("/dev/mapper");
|
||||
if (!dir) {
|
||||
return -errno;
|
||||
}
|
||||
struct dirent *de = nullptr;
|
||||
while ((de = ::readdir(dir))) {
|
||||
if (de->d_name[0] == '.')
|
||||
continue;
|
||||
char fn[4096], target[4096];
|
||||
snprintf(fn, sizeof(fn), "/dev/mapper/%s", de->d_name);
|
||||
int r = readlink(fn, target, sizeof(target));
|
||||
if (r < 0 || r >= (int)sizeof(target))
|
||||
continue;
|
||||
target[r] = 0;
|
||||
if (expect == target) {
|
||||
snprintf(fn, sizeof(fn), "/sys/kvdo/%s/statistics", de->d_name);
|
||||
int vdo_fd = ::open(fn, O_RDONLY|O_CLOEXEC);
|
||||
if (vdo_fd >= 0) {
|
||||
name = de->d_name;
|
||||
vdo_dir_fd = vdo_fd;
|
||||
rc = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int ExtBlkDevVdo::get_vdo_stats_handle()
|
||||
{
|
||||
std::set<std::string> devs = { logdevname };
|
||||
while (!devs.empty()) {
|
||||
std::string dev = *devs.begin();
|
||||
devs.erase(devs.begin());
|
||||
int rc = _get_vdo_stats_handle(dev);
|
||||
if (rc == 0) {
|
||||
// yay, it's vdo
|
||||
return rc;
|
||||
}
|
||||
// ok, see if there are constituent devices
|
||||
if (dev.find("dm-") == 0) {
|
||||
get_dm_parents(dev, &devs);
|
||||
}
|
||||
}
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
int64_t ExtBlkDevVdo::get_vdo_stat(const char *property)
|
||||
{
|
||||
int64_t ret = 0;
|
||||
int fd = ::openat(vdo_dir_fd, property, O_RDONLY|O_CLOEXEC);
|
||||
if (fd < 0) {
|
||||
return 0;
|
||||
}
|
||||
char buf[1024];
|
||||
int r = ::read(fd, buf, sizeof(buf) - 1);
|
||||
if (r > 0) {
|
||||
buf[r] = 0;
|
||||
ret = atoll(buf);
|
||||
}
|
||||
VOID_TEMP_FAILURE_RETRY(::close(fd));
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int ExtBlkDevVdo::init(const std::string& alogdevname)
|
||||
{
|
||||
logdevname = alogdevname;
|
||||
// get directory handle for VDO metadata
|
||||
return get_vdo_stats_handle();
|
||||
}
|
||||
|
||||
|
||||
int ExtBlkDevVdo::get_state(ceph::ExtBlkDevState& state)
|
||||
{
|
||||
int64_t block_size = get_vdo_stat("block_size");
|
||||
int64_t physical_blocks = get_vdo_stat("physical_blocks");
|
||||
int64_t overhead_blocks_used = get_vdo_stat("overhead_blocks_used");
|
||||
int64_t data_blocks_used = get_vdo_stat("data_blocks_used");
|
||||
int64_t logical_blocks = get_vdo_stat("logical_blocks");
|
||||
int64_t logical_blocks_used = get_vdo_stat("logical_blocks_used");
|
||||
if (!block_size
|
||||
|| !physical_blocks
|
||||
|| !overhead_blocks_used
|
||||
|| !data_blocks_used
|
||||
|| !logical_blocks) {
|
||||
dout(1) << __func__ << " VDO sysfs provided zero value for at least one statistic: " << dendl;
|
||||
dout(1) << __func__ << " VDO block_size: " << block_size << dendl;
|
||||
dout(1) << __func__ << " VDO physical_blocks: " << physical_blocks << dendl;
|
||||
dout(1) << __func__ << " VDO overhead_blocks_used: " << overhead_blocks_used << dendl;
|
||||
dout(1) << __func__ << " VDO data_blocks_used: " << data_blocks_used << dendl;
|
||||
dout(1) << __func__ << " VDO logical_blocks: " << logical_blocks << dendl;
|
||||
return -1;
|
||||
}
|
||||
int64_t avail_blocks =
|
||||
physical_blocks - overhead_blocks_used - data_blocks_used;
|
||||
int64_t logical_avail_blocks =
|
||||
logical_blocks - logical_blocks_used;
|
||||
state.set_logical_total(block_size * logical_blocks);
|
||||
state.set_logical_avail(block_size * logical_avail_blocks);
|
||||
state.set_physical_total(block_size * physical_blocks);
|
||||
state.set_physical_avail(block_size * avail_blocks);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ExtBlkDevVdo::collect_metadata(const std::string& prefix, std::map<std::string,std::string> *pm)
|
||||
{
|
||||
ceph::ExtBlkDevState state;
|
||||
int rc = get_state(state);
|
||||
if(rc != 0){
|
||||
return rc;
|
||||
}
|
||||
(*pm)[prefix + "vdo"] = "true";
|
||||
(*pm)[prefix + "vdo_physical_size"] = stringify(state.get_physical_total());
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
/*
|
||||
* Ceph - scalable distributed file system
|
||||
*
|
||||
* (C) Copyright IBM Corporation 2022
|
||||
* Author: Martin Ohmacht <mohmacht@us.ibm.com>
|
||||
*
|
||||
* Based on the file ceph/src/common/blkdev.cc
|
||||
* Copyright (c) 2015 Hewlett-Packard Development Company, L.P.
|
||||
*
|
||||
* And also based on the file src/erasure-code/clay/ErasureCodeClay.h
|
||||
* Copyright (C) 2018 Indian Institute of Science <office.ece@iisc.ac.in>
|
||||
*
|
||||
* Author: Myna Vajha <mynaramana@gmail.com>
|
||||
*
|
||||
*
|
||||
* This is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License version 2.1, as published by the Free Software
|
||||
* Foundation. See file COPYING.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef CEPH_EXT_BLK_DEV_VDO_H
|
||||
#define CEPH_EXT_BLK_DEV_VDO_H
|
||||
|
||||
#include "extblkdev/ExtBlkDevInterface.h"
|
||||
#include "include/compat.h"
|
||||
|
||||
class ExtBlkDevVdo final : public ceph::ExtBlkDevInterface
|
||||
{
|
||||
int vdo_dir_fd = -1; ///< fd for vdo sysfs directory
|
||||
std::string name; // name of the underlying vdo device
|
||||
std::string logdevname; // name of the top level logical device
|
||||
CephContext *cct;
|
||||
public:
|
||||
explicit ExtBlkDevVdo(CephContext *cct) : cct(cct) {}
|
||||
~ExtBlkDevVdo(){
|
||||
if(vdo_dir_fd >= 0)
|
||||
VOID_TEMP_FAILURE_RETRY(::close(vdo_dir_fd));
|
||||
}
|
||||
int _get_vdo_stats_handle(const std::string& devname);
|
||||
int get_vdo_stats_handle();
|
||||
int64_t get_vdo_stat(const char *property);
|
||||
virtual int init(const std::string& logdevname);
|
||||
virtual const std::string& get_devname() const {return name;}
|
||||
virtual int get_state(ceph::ExtBlkDevState& state);
|
||||
virtual int collect_metadata(const std::string& prefix, std::map<std::string,std::string> *pm);
|
||||
};
|
||||
|
||||
#endif
|
|
@ -22,6 +22,7 @@
|
|||
#include "common/signal.h"
|
||||
#include "common/version.h"
|
||||
#include "erasure-code/ErasureCodePlugin.h"
|
||||
#include "extblkdev/ExtBlkDevPlugin.h"
|
||||
#include "global/global_context.h"
|
||||
#include "global/global_init.h"
|
||||
#include "global/pidfile.h"
|
||||
|
@ -317,6 +318,13 @@ global_init(const std::map<std::string,std::string> *defaults,
|
|||
<< std::endl;
|
||||
exit(1);
|
||||
}
|
||||
#if defined(HAVE_SYS_PRCTL_H)
|
||||
if (g_conf().get_val<bool>("set_keepcaps")) {
|
||||
if (prctl(PR_SET_KEEPCAPS, 1) == -1) {
|
||||
cerr << "warning: unable to set keepcaps flag: " << cpp_strerror(errno) << std::endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (setuid(uid) != 0) {
|
||||
cerr << "unable to setuid " << uid << ": " << cpp_strerror(errno)
|
||||
<< std::endl;
|
||||
|
|
|
@ -10421,15 +10421,16 @@ void BlueStore::_get_statfs_overall(struct store_statfs_t *buf)
|
|||
- buf->omap_allocated;
|
||||
}
|
||||
|
||||
uint64_t thin_total, thin_avail;
|
||||
if (bdev->get_thin_utilization(&thin_total, &thin_avail)) {
|
||||
buf->total += thin_total;
|
||||
ExtBlkDevState ebd_state;
|
||||
int rc = bdev->get_ebd_state(ebd_state);
|
||||
if (rc == 0) {
|
||||
buf->total += ebd_state.get_physical_total();
|
||||
|
||||
// we are limited by both the size of the virtual device and the
|
||||
// underlying physical device.
|
||||
bfree = std::min(bfree, thin_avail);
|
||||
bfree = std::min(bfree, ebd_state.get_physical_avail());
|
||||
|
||||
buf->allocated = thin_total - thin_avail;
|
||||
buf->allocated = ebd_state.get_physical_total() - ebd_state.get_physical_avail();;
|
||||
} else {
|
||||
buf->total += bdev->get_size();
|
||||
}
|
||||
|
|
|
@ -731,10 +731,10 @@ void FileStore::collect_metadata(map<string,string> *pm)
|
|||
(*pm)["backend_filestore_dev_node"] = string(dev_node);
|
||||
devname = dev_node;
|
||||
}
|
||||
if (rc == 0 && vdo_fd >= 0) {
|
||||
(*pm)["vdo"] = "true";
|
||||
(*pm)["vdo_physical_size"] =
|
||||
stringify(4096 * get_vdo_stat(vdo_fd, "physical_blocks"));
|
||||
// if compression device detected, collect meta data for device
|
||||
// VDO specific meta data has moved into VDO plugin
|
||||
if (rc == 0 && ebd_impl) {
|
||||
ebd_impl->collect_metadata("", pm);
|
||||
}
|
||||
if (journal) {
|
||||
journal->collect_metadata(pm);
|
||||
|
@ -778,12 +778,19 @@ int FileStore::statfs(struct store_statfs_t *buf0, osd_alert_list_t* alerts)
|
|||
buf0->omap_allocated += object_map->get_db()->get_estimated_size(kv_usage);
|
||||
}
|
||||
|
||||
uint64_t thin_total, thin_avail;
|
||||
if (get_vdo_utilization(vdo_fd, &thin_total, &thin_avail)) {
|
||||
buf0->total = thin_total;
|
||||
bfree = std::min(bfree, thin_avail);
|
||||
buf0->allocated = thin_total - thin_avail;
|
||||
buf0->data_stored = bfree;
|
||||
if (ebd_impl) {
|
||||
ExtBlkDevState state;
|
||||
int rc = ebd_impl->get_state(state);
|
||||
if (rc == 0){
|
||||
buf0->total = state.get_physical_total();
|
||||
bfree = std::min(bfree, state.get_physical_avail());
|
||||
buf0->allocated = state.get_physical_total() - state.get_physical_avail();
|
||||
buf0->data_stored = bfree;
|
||||
} else {
|
||||
buf0->total = buf.f_blocks * buf.f_bsize;
|
||||
buf0->allocated = bfree;
|
||||
buf0->data_stored = bfree;
|
||||
}
|
||||
} else {
|
||||
buf0->total = buf.f_blocks * buf.f_bsize;
|
||||
buf0->allocated = bfree;
|
||||
|
@ -1287,16 +1294,11 @@ int FileStore::_detect_fs()
|
|||
return r;
|
||||
}
|
||||
|
||||
// vdo
|
||||
{
|
||||
char dev_node[PATH_MAX];
|
||||
if (int rc = BlkDev{fsid_fd}.wholedisk(dev_node, PATH_MAX); rc == 0) {
|
||||
vdo_fd = get_vdo_stats_handle(dev_node, &vdo_name);
|
||||
if (vdo_fd >= 0) {
|
||||
dout(0) << __func__ << " VDO volume " << vdo_name << " for " << dev_node
|
||||
<< dendl;
|
||||
}
|
||||
}
|
||||
// check if any extended block device plugin recognizes this device
|
||||
// detect_vdo has moved into the VDO plugin
|
||||
int rc = extblkdev::detect_device(cct, devname, ebd_impl);
|
||||
if (rc != 0) {
|
||||
dout(20) << __func__ << " no plugin volume maps to " << devname << dendl;
|
||||
}
|
||||
|
||||
// test xattrs
|
||||
|
@ -2092,10 +2094,7 @@ int FileStore::umount()
|
|||
(*it)->stop();
|
||||
}
|
||||
|
||||
if (vdo_fd >= 0) {
|
||||
VOID_TEMP_FAILURE_RETRY(::close(vdo_fd));
|
||||
vdo_fd = -1;
|
||||
}
|
||||
extblkdev::release_device(ebd_impl);
|
||||
if (fsid_fd >= 0) {
|
||||
VOID_TEMP_FAILURE_RETRY(::close(fsid_fd));
|
||||
fsid_fd = -1;
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
#include "WBThrottle.h"
|
||||
|
||||
#include "include/uuid.h"
|
||||
#include "extblkdev/ExtBlkDevPlugin.h"
|
||||
|
||||
#if defined(__linux__)
|
||||
# ifndef BTRFS_SUPER_MAGIC
|
||||
|
@ -162,8 +163,7 @@ private:
|
|||
|
||||
std::string devname;
|
||||
|
||||
int vdo_fd = -1;
|
||||
std::string vdo_name;
|
||||
ExtBlkDevInterfaceRef ebd_impl; // structure for retrieving compression state from extended block device
|
||||
|
||||
deque<uint64_t> snaps;
|
||||
|
||||
|
|
Loading…
Reference in New Issue