From b4dbfcc87988d72b76029bf79647516a3bc0c94f Mon Sep 17 00:00:00 2001 From: Mykola Golub Date: Sun, 14 May 2017 09:00:24 +0000 Subject: [PATCH] rbd-ggate: tool to map images on FreeBSD via GEOM Gate rbd-ggate spawns a process responsible for the creation of ggate device and forwarding I/O requests between the GEOM Gate kernel subsystem and RADOS. On FreeBSD it provides functionality similar to rbd-nbd on Linux. Signed-off-by: Mykola Golub --- doc/man/8/CMakeLists.txt | 7 +- doc/man/8/rbd-ggate.rst | 79 ++++++ qa/workunits/rbd/rbd-ggate.sh | 182 ++++++++++++++ src/test/cli/rbd/help.t | 4 + src/tools/CMakeLists.txt | 3 + src/tools/rbd/CMakeLists.txt | 4 + src/tools/rbd/action/Ggate.cc | 187 ++++++++++++++ src/tools/rbd_ggate/CMakeLists.txt | 9 + src/tools/rbd_ggate/Driver.cc | 166 +++++++++++++ src/tools/rbd_ggate/Driver.h | 49 ++++ src/tools/rbd_ggate/Request.h | 55 +++++ src/tools/rbd_ggate/Server.cc | 270 +++++++++++++++++++++ src/tools/rbd_ggate/Server.h | 88 +++++++ src/tools/rbd_ggate/Watcher.cc | 48 ++++ src/tools/rbd_ggate/Watcher.h | 34 +++ src/tools/rbd_ggate/debug.cc | 55 +++++ src/tools/rbd_ggate/debug.h | 17 ++ src/tools/rbd_ggate/ggate_drv.c | 375 +++++++++++++++++++++++++++++ src/tools/rbd_ggate/ggate_drv.h | 57 +++++ src/tools/rbd_ggate/main.cc | 364 ++++++++++++++++++++++++++++ 20 files changed, 2052 insertions(+), 1 deletion(-) create mode 100644 doc/man/8/rbd-ggate.rst create mode 100755 qa/workunits/rbd/rbd-ggate.sh create mode 100644 src/tools/rbd/action/Ggate.cc create mode 100644 src/tools/rbd_ggate/CMakeLists.txt create mode 100644 src/tools/rbd_ggate/Driver.cc create mode 100644 src/tools/rbd_ggate/Driver.h create mode 100644 src/tools/rbd_ggate/Request.h create mode 100644 src/tools/rbd_ggate/Server.cc create mode 100644 src/tools/rbd_ggate/Server.h create mode 100644 src/tools/rbd_ggate/Watcher.cc create mode 100644 src/tools/rbd_ggate/Watcher.h create mode 100644 src/tools/rbd_ggate/debug.cc create mode 100644 src/tools/rbd_ggate/debug.h create mode 100644 src/tools/rbd_ggate/ggate_drv.c create mode 100644 src/tools/rbd_ggate/ggate_drv.h create mode 100644 src/tools/rbd_ggate/main.cc diff --git a/doc/man/8/CMakeLists.txt b/doc/man/8/CMakeLists.txt index 76bab3a9b35..8a2204c7137 100644 --- a/doc/man/8/CMakeLists.txt +++ b/doc/man/8/CMakeLists.txt @@ -54,12 +54,17 @@ if(WITH_RBD) list(APPEND man_srcs ceph-rbdnamer.rst rbd-mirror.rst - rbd-nbd.rst rbd-replay-many.rst rbd-replay-prep.rst rbd-replay.rst rbdmap.rst rbd.rst) + if(LINUX) + list(APPEND man_srcs rbd-nbd.rst) + endif() + if(FREEBSD) + list(APPEND man_srcs rbd-ggate.rst) + endif() endif() foreach(man ${man_srcs}) diff --git a/doc/man/8/rbd-ggate.rst b/doc/man/8/rbd-ggate.rst new file mode 100644 index 00000000000..67d0c81e87e --- /dev/null +++ b/doc/man/8/rbd-ggate.rst @@ -0,0 +1,79 @@ +:orphan: + +================================================== + rbd-ggate -- map rbd images via FreeBSD GEOM Gate +================================================== + +.. program:: rbd-ggate + +Synopsis +======== + +| **rbd-ggate** [--read-only] [--exclusive] [--device *ggate device*] map *image-spec* | *snap-spec* +| **rbd-ggate** unmap *ggate device* +| **rbd-ggate** list + +Description +=========== + +**rbd-ggate** is a client for RADOS block device (rbd) images. It will +map a rbd image to a ggate (FreeBSD GEOM Gate class) device, allowing +access it as regular local block device. + +Commands +======== + +map +--- + +Spawn a process responsible for the creation of ggate device and +forwarding I/O requests between the GEOM Gate kernel subsystem and +RADOS. + +unmap +----- + +Destroy ggate device and terminate the process responsible for it. + +list +---- + +List mapped ggate devices. + +Options +======= + +.. option:: --device *ggate device* + + Specify ggate device path. + +.. option:: --read-only + + Map read-only. + +.. option:: --exclusive + + Forbid writes by other clients. + +Image and snap specs +==================== + +| *image-spec* is [*pool-name*]/*image-name* +| *snap-spec* is [*pool-name*]/*image-name*\ @\ *snap-name* + +The default for *pool-name* is "rbd". If an image name contains a slash +character ('/'), *pool-name* is required. + +Availability +============ + +**rbd-ggate** is part of Ceph, a massively scalable, open-source, +distributed storage system. Please refer to the Ceph documentation at +http://ceph.com/docs for more information. + + +See also +======== + +:doc:`rbd `\(8) +:doc:`ceph `\(8) diff --git a/qa/workunits/rbd/rbd-ggate.sh b/qa/workunits/rbd/rbd-ggate.sh new file mode 100755 index 00000000000..536070a2f96 --- /dev/null +++ b/qa/workunits/rbd/rbd-ggate.sh @@ -0,0 +1,182 @@ +#!/bin/sh -ex + +POOL=testrbdggate$$ +IMAGE=test +SIZE=64 +DATA= +DEV= + +if which xmlstarlet > /dev/null 2>&1; then + XMLSTARLET=xmlstarlet +elif which xml > /dev/null 2>&1; then + XMLSTARLET=xml +else + echo "Missing xmlstarlet binary!" + exit 1 +fi + +_sudo() +{ + local cmd + + if [ `id -u` -eq 0 ] + then + "$@" + return $? + fi + + # Look for the command in the user path. If it fails run it as is, + # supposing it is in sudo path. + cmd=`which $1 2>/dev/null` || cmd=$1 + shift + sudo -nE "${cmd}" "$@" +} + +setup() +{ + if [ -e CMakeCache.txt ]; then + # running under cmake build dir + + CEPH_SRC=$(readlink -f $(dirname $0)/../../../src) + CEPH_ROOT=${PWD} + CEPH_BIN=${CEPH_ROOT}/bin + + export LD_LIBRARY_PATH=${CEPH_ROOT}/lib:${LD_LIBRARY_PATH} + export PYTHONPATH=${PYTHONPATH}:${CEPH_SRC}/pybind + for x in ${CEPH_ROOT}/lib/cython_modules/lib* ; do + PYTHONPATH="${PYTHONPATH}:${x}" + done + PATH=${CEPH_BIN}:${PATH} + fi + + _sudo echo test sudo + + trap cleanup INT TERM EXIT + TEMPDIR=`mktemp -d` + DATA=${TEMPDIR}/data + dd if=/dev/urandom of=${DATA} bs=1M count=${SIZE} + ceph osd pool create ${POOL} 64 64 + rbd --dest-pool ${POOL} --no-progress import ${DATA} ${IMAGE} +} + +cleanup() +{ + set +e + rm -Rf ${TEMPDIR} + if [ -n "${DEV}" ] + then + _sudo rbd-ggate unmap ${DEV} + fi + ceph osd pool delete ${POOL} ${POOL} --yes-i-really-really-mean-it +} + +expect_false() +{ + if "$@"; then return 1; else return 0; fi +} + +# +# main +# + +setup + +# exit status test +expect_false rbd-ggate +expect_false rbd-ggate INVALIDCMD +if [ `id -u` -ne 0 ] +then + expect_false rbd-ggate map ${IMAGE} +fi +expect_false _sudo rbd-ggate map INVALIDIMAGE + +# map test using the first unused device +DEV=`_sudo rbd-ggate map ${POOL}/${IMAGE}` +_sudo rbd-ggate list | grep "^${DEV}$" + +# map test specifying the device +expect_false _sudo rbd-ggate --device ${DEV} map ${POOL}/${IMAGE} +dev1=${DEV} +_sudo rbd-ggate unmap ${DEV} +_sudo rbd-ggate list | expect_false grep "^${DEV}$" +DEV= +# XXX: race possible when the device is reused by other process +DEV=`_sudo rbd-ggate --device ${dev1} map ${POOL}/${IMAGE}` +[ "${DEV}" = "${dev1}" ] +_sudo rbd-ggate list | grep "^${DEV}$" + +# read test +[ "`dd if=${DATA} bs=1M | md5`" = "`_sudo dd if=${DEV} bs=1M | md5`" ] + +# write test +dd if=/dev/urandom of=${DATA} bs=1M count=${SIZE} +_sudo dd if=${DATA} of=${DEV} bs=1M +_sudo sync +[ "`dd if=${DATA} bs=1M | md5`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5`" ] + +# trim test +provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} | + $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .` +used=`rbd -p ${POOL} --format xml du ${IMAGE} | + $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .` +[ "${used}" -eq "${provisioned}" ] +_sudo newfs -E ${DEV} +_sudo sync +provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} | + $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .` +used=`rbd -p ${POOL} --format xml du ${IMAGE} | + $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .` +[ "${used}" -lt "${provisioned}" ] + +# resize test +devname=$(basename ${DEV}) +size=$(geom gate list ${devname} | awk '$1 ~ /Mediasize:/ {print $2}') +test -n "${size}" +rbd resize ${POOL}/${IMAGE} --size $((SIZE * 2))M +rbd info ${POOL}/${IMAGE} +if [ -z "$RBD_GGATE_RESIZE_SUPPORTED" ]; then + # XXX: ggate device resize is not supported by vanila kernel. + # rbd-ggate should terminate when detecting resize. + _sudo rbd-ggate list | expect_false grep "^${DEV}$" +else + _sudo rbd-ggate list | grep "^${DEV}$" + size2=$(geom gate list ${devname} | awk '$1 ~ /Mediasize:/ {print $2}') + test -n "${size2}" + test ${size2} -eq $((size * 2)) + dd if=/dev/urandom of=${DATA} bs=1M count=$((SIZE * 2)) + _sudo dd if=${DATA} of=${DEV} bs=1M + _sudo sync + [ "`dd if=${DATA} bs=1M | md5`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5`" ] + rbd resize ${POOL}/${IMAGE} --allow-shrink --size ${SIZE}M + rbd info ${POOL}/${IMAGE} + size2=$(geom gate list ${devname} | awk '$1 ~ /Mediasize:/ {print $2}') + test -n "${size2}" + test ${size2} -eq ${size} + truncate -s ${SIZE}M ${DATA} + [ "`dd if=${DATA} bs=1M | md5`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5`" ] + _sudo rbd-ggate unmap ${DEV} +fi +DEV= + +# read-only option test +DEV=`_sudo rbd-ggate map --read-only ${POOL}/${IMAGE}` +devname=$(basename ${DEV}) +_sudo rbd-ggate list | grep "^${DEV}$" +access=$(geom gate list ${devname} | awk '$1 == "access:" {print $2}') +test "${access}" = "read-only" +_sudo dd if=${DEV} of=/dev/null bs=1M +expect_false _sudo dd if=${DATA} of=${DEV} bs=1M +_sudo rbd-ggate unmap ${DEV} + +# exclusive option test +DEV=`_sudo rbd-ggate map --exclusive ${POOL}/${IMAGE}` +_sudo rbd-ggate list | grep "^${DEV}$" +_sudo dd if=${DATA} of=${DEV} bs=1M +_sudo sync +expect_false timeout 10 \ + rbd -p ${POOL} bench ${IMAGE} --io-type=write --io-size=1024 --io-total=1024 +_sudo rbd-ggate unmap ${DEV} +DEV= +rbd bench -p ${POOL} ${IMAGE} --io-type=write --io-size=1024 --io-total=1024 + +echo OK diff --git a/src/test/cli/rbd/help.t b/src/test/cli/rbd/help.t index 7c38ae60255..de9ff8a332c 100644 --- a/src/test/cli/rbd/help.t +++ b/src/test/cli/rbd/help.t @@ -1,3 +1,7 @@ +Skip test on FreeBSD as it generates different output there. + + $ test "$(uname)" = "FreeBSD" && exit 80 || true + $ rbd --help usage: rbd ... diff --git a/src/tools/CMakeLists.txt b/src/tools/CMakeLists.txt index ea241cceef2..ed19c63bc3c 100644 --- a/src/tools/CMakeLists.txt +++ b/src/tools/CMakeLists.txt @@ -100,4 +100,7 @@ if(WITH_RBD) if(LINUX) add_subdirectory(rbd_nbd) endif() + if(FREEBSD) + add_subdirectory(rbd_ggate) + endif() endif(WITH_RBD) diff --git a/src/tools/rbd/CMakeLists.txt b/src/tools/rbd/CMakeLists.txt index ad4005ebf0a..21250690e6d 100644 --- a/src/tools/rbd/CMakeLists.txt +++ b/src/tools/rbd/CMakeLists.txt @@ -36,6 +36,10 @@ set(rbd_srcs action/Status.cc action/Trash.cc action/Watch.cc) +if(FREEBSD) + list(APPEND rbd_srcs action/Ggate.cc) +endif() + add_executable(rbd ${rbd_srcs} $) set_target_properties(rbd PROPERTIES OUTPUT_NAME rbd) diff --git a/src/tools/rbd/action/Ggate.cc b/src/tools/rbd/action/Ggate.cc new file mode 100644 index 00000000000..a87751bd35b --- /dev/null +++ b/src/tools/rbd/action/Ggate.cc @@ -0,0 +1,187 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include +#include +#include + +#include "include/stringify.h" +#include "common/SubProcess.h" + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" + +#include +#include +#include + +#include + +namespace rbd { +namespace action { +namespace ggate { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int call_ggate_cmd(const po::variables_map &vm, + const std::vector &args) +{ + SubProcess process("rbd-ggate", SubProcess::KEEP, SubProcess::KEEP, + SubProcess::KEEP); + + if (vm.count("conf")) { + process.add_cmd_arg("--conf"); + process.add_cmd_arg(vm["conf"].as().c_str()); + } + if (vm.count("cluster")) { + process.add_cmd_arg("--cluster"); + process.add_cmd_arg(vm["cluster"].as().c_str()); + } + if (vm.count("id")) { + process.add_cmd_arg("--id"); + process.add_cmd_arg(vm["id"].as().c_str()); + } + if (vm.count("name")) { + process.add_cmd_arg("--name"); + process.add_cmd_arg(vm["name"].as().c_str()); + } + if (vm.count("mon_host")) { + process.add_cmd_arg("--mon_host"); + process.add_cmd_arg(vm["mon_host"].as().c_str()); + } + if (vm.count("keyfile")) { + process.add_cmd_arg("--keyfile"); + process.add_cmd_arg(vm["keyfile"].as().c_str()); + } + if (vm.count("keyring")) { + process.add_cmd_arg("--keyring"); + process.add_cmd_arg(vm["keyring"].as().c_str()); + } + + for (std::vector::const_iterator p = args.begin(); + p != args.end(); ++p) + process.add_cmd_arg(*p); + + if (process.spawn()) { + std::cerr << "rbd: failed to run rbd-ggate: " << process.err() << std::endl; + return -EINVAL; + } else if (process.join()) { + std::cerr << "rbd: rbd-ggate failed with error: " << process.err() + << std::endl; + return -EINVAL; + } + + return 0; +} + +void get_list_arguments(po::options_description *positional, + po::options_description *options) +{ } + +int execute_list(const po::variables_map &vm) +{ + std::vector args; + + args.push_back("list"); + + return call_ggate_cmd(vm, args); +} + +void get_map_arguments(po::options_description *positional, + po::options_description *options) +{ + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + options->add_options() + ("read-only", po::bool_switch(), "map read-only") + ("exclusive", po::bool_switch(), "forbid writes by other clients") + ("device", po::value(), "specify ggate device"); +} + +int execute_map(const po::variables_map &vm) +{ + size_t arg_index = 0; + std::string pool_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &image_name, + &snap_name, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::vector args; + + args.push_back("map"); + std::string img; + img.append(pool_name); + img.append("/"); + img.append(image_name); + if (!snap_name.empty()) { + img.append("@"); + img.append(snap_name); + } + args.push_back(img.c_str()); + + if (vm["read-only"].as()) + args.push_back("--read-only"); + + if (vm["exclusive"].as()) + args.push_back("--exclusive"); + + if (vm.count("device")) { + args.push_back("--device"); + args.push_back(vm["device"].as().c_str()); + } + + return call_ggate_cmd(vm, args); +} + +void get_unmap_arguments(po::options_description *positional, + po::options_description *options) +{ + positional->add_options() + ("device-spec", "specify ggate device"); +} + +int execute_unmap(const po::variables_map &vm) +{ + std::string device_name = utils::get_positional_argument(vm, 0); + if (!boost::starts_with(device_name, "/dev/")) { + device_name.clear(); + } + + if (device_name.empty()) { + std::cerr << "rbd: ggate unmap requires device path" << std::endl; + return -EINVAL; + } + + std::vector args; + + args.push_back("unmap"); + args.push_back(device_name.c_str()); + + return call_ggate_cmd(vm, args); +} + +Shell::SwitchArguments switched_arguments({"read-only", "exclusive"}); + +Shell::Action action_list( + {"ggate", "list"}, {"ggate", "ls"}, "List mapped ggate devices.", "", + &get_list_arguments, &execute_list); + +Shell::Action action_map( + {"ggate", "map"}, {}, "Map an image to a ggate device.", "", + &get_map_arguments, &execute_map); + +Shell::Action action_unmap( + {"ggate", "unmap"}, {}, "Unmap a ggate device.", "", + &get_unmap_arguments, &execute_unmap); + +} // namespace ggate +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd_ggate/CMakeLists.txt b/src/tools/rbd_ggate/CMakeLists.txt new file mode 100644 index 00000000000..5c5572c48fe --- /dev/null +++ b/src/tools/rbd_ggate/CMakeLists.txt @@ -0,0 +1,9 @@ +add_executable(rbd-ggate + Driver.cc + Server.cc + Watcher.cc + debug.cc + ggate_drv.c + main.cc) +target_link_libraries(rbd-ggate geom librbd librados global) +install(TARGETS rbd-ggate DESTINATION bin) diff --git a/src/tools/rbd_ggate/Driver.cc b/src/tools/rbd_ggate/Driver.cc new file mode 100644 index 00000000000..cf63fc66024 --- /dev/null +++ b/src/tools/rbd_ggate/Driver.cc @@ -0,0 +1,166 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include + +#include "common/debug.h" +#include "common/errno.h" +#include "Driver.h" +#include "Request.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd::ggate::Driver: " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace ggate { + +int Driver::load() { + + return ggate_drv_load(); +} + +int Driver::kill(const std::string &devname) { + + int r = ggate_drv_kill(devname.c_str()); + + return r; +} + +int Driver::list(std::list &devs) { + size_t size = 1024; + char **devs_ = nullptr; + int r; + + while (size <= 1024 * 1024) { + devs_ = static_cast( + realloc(static_cast(devs_), size * sizeof(*devs_))); + r = ggate_drv_list(devs_, &size); + if (r != -ERANGE) { + break; + } + size *= 2; + } + if (r < 0) { + goto free; + } + + devs.clear(); + for (size_t i = 0; i < size; i++) { + devs.push_back(devs_[i]); + } + + ggate_drv_list_free(devs_, size); +free: + free(devs_); + + return r; +} + +Driver::Driver(const std::string &devname, size_t sectorsize, size_t mediasize, + bool readonly, const std::string &info) + : m_devname(devname), m_sectorsize(sectorsize), m_mediasize(mediasize), + m_readonly(readonly), m_info(info) { +} + +int Driver::init() { + dout(20) << dendl; + + char name[PATH_MAX]; + size_t namelen; + + if (m_devname.empty()) { + name[0] = '\0'; + namelen = PATH_MAX; + } else { + namelen = m_devname.size(); + if (namelen >= PATH_MAX) { + return -ENAMETOOLONG; + } + strncpy(name, m_devname.c_str(), namelen + 1); + } + + int r = ggate_drv_create(name, namelen, m_sectorsize, m_mediasize, m_readonly, + m_info.c_str(), &m_drv); + if (r < 0) { + return r; + } + + if (m_devname.empty()) { + m_devname = name; + } + + return 0; +} + +std::string Driver::get_devname() const { + dout(30) << m_devname << dendl; + + return m_devname; +} + +void Driver::shut_down() { + dout(20) << dendl; + + ggate_drv_destroy(m_drv); +} + +int Driver::resize(size_t newsize) { + dout(20) << "newsize=" << newsize << dendl; + + int r = ggate_drv_resize(m_drv, newsize); + if (r < 0) { + return r; + } + + m_mediasize = newsize; + return 0; +} + +int Driver::recv(Request **req) { + dout(20) << dendl; + + ggate_drv_req_t req_; + + int r = ggate_drv_recv(m_drv, &req_); + if (r < 0) { + return r; + } + + *req = new Request(req_); + + dout(20) << "req=" << *req << dendl; + + if (ggate_drv_req_cmd(req_) == GGATE_DRV_CMD_WRITE) { + bufferptr ptr(buffer::claim_malloc( + ggate_drv_req_length(req_), + static_cast(ggate_drv_req_release_buf(req_)))); + (*req)->bl.push_back(ptr); + } + + return 0; +} + +int Driver::send(Request *req) { + dout(20) << "req=" << req << dendl; + + if (ggate_drv_req_cmd(req->req) == GGATE_DRV_CMD_READ && + ggate_drv_req_error(req->req) == 0) { + assert(req->bl.length() == ggate_drv_req_length(req->req)); + // TODO: avoid copying? + req->bl.copy(0, ggate_drv_req_length(req->req), + static_cast(ggate_drv_req_buf(req->req))); + dout(20) << "copied resulting " << req->bl.length() << " bytes to " + << ggate_drv_req_buf(req->req) << dendl; + } + + int r = ggate_drv_send(m_drv, req->req); + + delete req; + return r; +} + +} // namespace ggate +} // namespace rbd diff --git a/src/tools/rbd_ggate/Driver.h b/src/tools/rbd_ggate/Driver.h new file mode 100644 index 00000000000..b52b48ab112 --- /dev/null +++ b/src/tools/rbd_ggate/Driver.h @@ -0,0 +1,49 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_GGATE_DRIVER_H +#define CEPH_RBD_GGATE_DRIVER_H + +#include +#include + +#include "ggate_drv.h" + +namespace rbd { +namespace ggate { + +struct Request; + +class Driver { +public: + static int load(); + static int kill(const std::string &devname); + static int list(std::list &devs); + + Driver(const std::string &devname, size_t sectorsize, size_t mediasize, + bool readonly, const std::string &info); + + int init(); + void shut_down(); + + std::string get_devname() const; + + int recv(Request **req); + int send(Request *req); + + int resize(size_t newsize); + +private: + std::string m_devname; + size_t m_sectorsize; + size_t m_mediasize; + bool m_readonly; + std::string m_info; + ggate_drv_t m_drv = 0; +}; + +} // namespace ggate +} // namespace rbd + +#endif // CEPH_RBD_GGATE_DRIVER_H + diff --git a/src/tools/rbd_ggate/Request.h b/src/tools/rbd_ggate/Request.h new file mode 100644 index 00000000000..66f2198589a --- /dev/null +++ b/src/tools/rbd_ggate/Request.h @@ -0,0 +1,55 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_GGATE_REQUEST_H +#define CEPH_RBD_GGATE_REQUEST_H + +#include "ggate_drv.h" + +namespace rbd { +namespace ggate { + +struct Request { + enum Command { + Unknown = 0, + Write = 1, + Read = 2, + Flush = 3, + Discard = 4, + }; + + ggate_drv_req_t req; + bufferlist bl; + + Request(ggate_drv_req_t req) : req(req) { + } + + uint64_t get_id() { + return ggate_drv_req_id(req); + } + + Command get_cmd() { + return static_cast(ggate_drv_req_cmd(req)); + } + + size_t get_length() { + return ggate_drv_req_length(req); + } + + uint64_t get_offset() { + return ggate_drv_req_offset(req); + } + + uint64_t get_error() { + return ggate_drv_req_error(req); + } + + void set_error(int error) { + ggate_drv_req_set_error(req, error); + } +}; + +} // namespace ggate +} // namespace rbd + +#endif // CEPH_RBD_GGATE_REQUEST_H diff --git a/src/tools/rbd_ggate/Server.cc b/src/tools/rbd_ggate/Server.cc new file mode 100644 index 00000000000..6fde848dbc2 --- /dev/null +++ b/src/tools/rbd_ggate/Server.cc @@ -0,0 +1,270 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" +#include "Driver.h" +#include "Server.h" +#include "Request.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd::ggate::Server: " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace ggate { + +Server::Server(Driver *drv, librbd::Image& image) + : m_drv(drv), m_image(image), m_lock("rbd::ggate::Server::m_lock"), + m_reader_thread(this, &Server::reader_entry), + m_writer_thread(this, &Server::writer_entry) { +} + +void Server::run() { + dout(10) << dendl; + + int r = start(); + assert(r == 0); + + dout(20) << "entering run loop" << dendl; + + { + Mutex::Locker locker(m_lock); + while (!m_stopping) { + m_cond.WaitInterval(m_lock, utime_t(1, 0)); + } + } + + dout(20) << "exiting run loop" << dendl; + + stop(); +} + +int Server::start() { + dout(10) << dendl; + + m_reader_thread.create("rbd_reader"); + m_writer_thread.create("rbd_writer"); + return 0; +} + +void Server::stop() { + dout(10) << dendl; + + { + Mutex::Locker locker(m_lock); + assert(m_stopping); + } + + m_reader_thread.join(); + m_writer_thread.join(); + + wait_clean(); +} + +void Server::io_start(IOContext *ctx) { + dout(20) << ctx << dendl; + + Mutex::Locker locker(m_lock); + m_io_pending.push_back(&ctx->item); +} + +void Server::io_finish(IOContext *ctx) { + dout(20) << ctx << dendl; + + Mutex::Locker locker(m_lock); + assert(ctx->item.is_on_list()); + + ctx->item.remove_myself(); + m_io_finished.push_back(&ctx->item); + m_cond.Signal(); +} + +Server::IOContext *Server::wait_io_finish() { + dout(20) << dendl; + + Mutex::Locker locker(m_lock); + + while (m_io_finished.empty() && !m_stopping) { + m_cond.Wait(m_lock); + } + + if (m_io_finished.empty()) { + return nullptr; + } + + IOContext *ret = m_io_finished.front(); + m_io_finished.pop_front(); + + return ret; +} + +void Server::wait_clean() { + dout(20) << dendl; + + assert(!m_reader_thread.is_started()); + + Mutex::Locker locker(m_lock); + + while (!m_io_pending.empty()) { + m_cond.Wait(m_lock); + } + + while (!m_io_finished.empty()) { + ceph::unique_ptr free_ctx(m_io_finished.front()); + m_io_finished.pop_front(); + } +} + +void Server::aio_callback(librbd::completion_t cb, void *arg) { + librbd::RBD::AioCompletion *aio_completion = + reinterpret_cast(cb); + + IOContext *ctx = reinterpret_cast(arg); + int r = aio_completion->get_return_value(); + + ctx->server->handle_aio(ctx, r); + aio_completion->release(); +} + +void Server::handle_aio(IOContext *ctx, int r) { + dout(20) << ctx << ": r=" << r << dendl; + + if (r == -EINVAL) { + // if shrinking an image, a pagecache writeback might reference + // extents outside of the range of the new image extents + dout(5) << "masking IO out-of-bounds error" << dendl; + ctx->req->bl.clear(); + r = 0; + } + + if (r < 0) { + ctx->req->set_error(-r); + } else if ((ctx->req->get_cmd() == Request::Read) && + r != static_cast(ctx->req->get_length())) { + int pad_byte_count = static_cast (ctx->req->get_length()) - r; + ctx->req->bl.append_zero(pad_byte_count); + dout(20) << ctx << ": pad byte count: " << pad_byte_count << dendl; + ctx->req->set_error(0); + } else { + ctx->req->set_error(0); + } + io_finish(ctx); +} + +void Server::reader_entry() { + dout(20) << dendl; + + while (!m_stopping) { + ceph::unique_ptr ctx(new IOContext(this)); + + dout(20) << "waiting for ggate request" << dendl; + + int r = m_drv->recv(&ctx->req); + if (r < 0) { + if (r != -ECANCELED) { + derr << "recv: " << cpp_strerror(r) << dendl; + } + Mutex::Locker locker(m_lock); + m_stopping = true; + m_cond.Signal(); + return; + } + + IOContext *pctx = ctx.release(); + + dout(20) << pctx << ": start: " << *pctx << dendl; + + io_start(pctx); + librbd::RBD::AioCompletion *c = + new librbd::RBD::AioCompletion(pctx, aio_callback); + switch (pctx->req->get_cmd()) + { + case rbd::ggate::Request::Write: + m_image.aio_write(pctx->req->get_offset(), pctx->req->get_length(), + pctx->req->bl, c); + break; + case rbd::ggate::Request::Read: + m_image.aio_read(pctx->req->get_offset(), pctx->req->get_length(), + pctx->req->bl, c); + break; + case rbd::ggate::Request::Flush: + m_image.aio_flush(c); + break; + case rbd::ggate::Request::Discard: + m_image.aio_discard(pctx->req->get_offset(), pctx->req->get_length(), c); + break; + default: + derr << pctx << ": invalid request command: " << pctx->req->get_cmd() + << dendl; + c->release(); + Mutex::Locker locker(m_lock); + m_stopping = true; + m_cond.Signal(); + return; + } + } + dout(20) << "terminated" << dendl; +} + +void Server::writer_entry() { + dout(20) << dendl; + + while (!m_stopping) { + dout(20) << "waiting for io request" << dendl; + + ceph::unique_ptr ctx(wait_io_finish()); + if (!ctx) { + dout(20) << "no io requests, terminating" << dendl; + return; + } + + dout(20) << ctx.get() << ": got: " << *ctx << dendl; + + int r = m_drv->send(ctx->req); + if (r < 0) { + derr << ctx.get() << ": send: " << cpp_strerror(r) << dendl; + Mutex::Locker locker(m_lock); + m_stopping = true; + m_cond.Signal(); + return; + } + dout(20) << ctx.get() << " finish" << dendl; + } + dout(20) << "terminated" << dendl; +} + +std::ostream &operator<<(std::ostream &os, const Server::IOContext &ctx) { + + os << "[" << ctx.req->get_id(); + + switch (ctx.req->get_cmd()) + { + case rbd::ggate::Request::Write: + os << " Write "; + break; + case rbd::ggate::Request::Read: + os << " Read "; + break; + case rbd::ggate::Request::Flush: + os << " Flush "; + break; + case rbd::ggate::Request::Discard: + os << " Discard "; + break; + default: + os << " Unknow(" << ctx.req->get_cmd() << ") "; + break; + } + + os << ctx.req->get_offset() << "~" << ctx.req->get_length() << " " + << ctx.req->get_error() << "]"; + + return os; +} + +} // namespace ggate +} // namespace rbd + diff --git a/src/tools/rbd_ggate/Server.h b/src/tools/rbd_ggate/Server.h new file mode 100644 index 00000000000..8ed4f512010 --- /dev/null +++ b/src/tools/rbd_ggate/Server.h @@ -0,0 +1,88 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_GGATE_SERVER_H +#define CEPH_RBD_GGATE_SERVER_H + +#include "include/rbd/librbd.hpp" +#include "include/xlist.h" +#include "common/Cond.h" +#include "common/Mutex.h" +#include "common/Thread.h" + +namespace rbd { +namespace ggate { + +class Driver; +struct Request; + +class Server { +public: + Server(Driver *drv, librbd::Image& image); + + void run(); + +private: + struct IOContext { + xlist::item item; + Server *server; + Request *req = nullptr; + + IOContext(Server *server) : item(this), server(server) { + } + }; + + class ThreadHelper : public Thread { + public: + typedef void (Server::*entry_func)(); + + ThreadHelper(Server *server, entry_func func) + : server(server), func(func) { + } + + protected: + virtual void* entry() { + (server->*func)(); + return nullptr; + } + + private: + Server *server; + entry_func func; + }; + + friend std::ostream &operator<<(std::ostream &os, const IOContext &ctx); + + Driver *m_drv; + librbd::Image &m_image; + + mutable Mutex m_lock; + Cond m_cond; + bool m_stopping = false; + ThreadHelper m_reader_thread, m_writer_thread; + xlist m_io_pending; + xlist m_io_finished; + + static void aio_callback(librbd::completion_t cb, void *arg); + + int start(); + void stop(); + + void reader_entry(); + void writer_entry(); + + void io_start(IOContext *ctx); + void io_finish(IOContext *ctx); + + IOContext *wait_io_finish(); + void wait_clean(); + + void handle_aio(IOContext *ctx, int r); +}; + +std::ostream &operator<<(std::ostream &os, const Server::IOContext &ctx); + +} // namespace ggate +} // namespace rbd + +#endif // CEPH_RBD_GGATE_SERVER_H diff --git a/src/tools/rbd_ggate/Watcher.cc b/src/tools/rbd_ggate/Watcher.cc new file mode 100644 index 00000000000..57b3f960e45 --- /dev/null +++ b/src/tools/rbd_ggate/Watcher.cc @@ -0,0 +1,48 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" +#include "Driver.h" +#include "Watcher.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd::ggate::Watcher: " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace ggate { + +Watcher::Watcher(Driver *drv, librados::IoCtx &ioctx, librbd::Image &image, + size_t size) + : m_drv(drv), m_ioctx(ioctx), m_image(image), m_size(size) { +} + +void Watcher::handle_notify() { + dout(20) << dendl; + + librbd::image_info_t info; + + if (m_image.stat(info, sizeof(info)) == 0) { + size_t new_size = info.size; + + if (new_size != m_size) { + int r = m_drv->resize(new_size); + if (r < 0) { + derr << "resize failed: " << cpp_strerror(r) << dendl; + m_drv->shut_down(); + } + r = m_image.invalidate_cache(); + if (r < 0) { + derr << "invalidate rbd cache failed: " << cpp_strerror(r) << dendl; + m_drv->shut_down(); + } + m_size = new_size; + } + } +} + +} // namespace ggate +} // namespace rbd diff --git a/src/tools/rbd_ggate/Watcher.h b/src/tools/rbd_ggate/Watcher.h new file mode 100644 index 00000000000..8f524b43fc3 --- /dev/null +++ b/src/tools/rbd_ggate/Watcher.h @@ -0,0 +1,34 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_GGATE_WATCHER_H +#define CEPH_RBD_GGATE_WATCHER_H + +#include "include/rbd/librbd.hpp" + +namespace rbd { +namespace ggate { + +class Driver; + +class Watcher : public librbd::UpdateWatchCtx +{ +public: + Watcher(Driver *m_drv, librados::IoCtx &ioctx, librbd::Image &image, + size_t size); + + void handle_notify() override; + +private: + Driver *m_drv; + librados::IoCtx &m_ioctx; + librbd::Image &m_image; + size_t m_size; +}; + + +} // namespace ggate +} // namespace rbd + +#endif // CEPH_RBD_GGATE_WATCHER_H + diff --git a/src/tools/rbd_ggate/debug.cc b/src/tools/rbd_ggate/debug.cc new file mode 100644 index 00000000000..8cf912c3ece --- /dev/null +++ b/src/tools/rbd_ggate/debug.cc @@ -0,0 +1,55 @@ +#include "common/debug.h" +#include "common/errno.h" +#include "debug.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd::ggate: " + +extern "C" void debugv(int level, const char *fmt, va_list ap) { + char *msg; + int saved_errno = errno; + + if (g_ceph_context == nullptr) { + return; + } + + vasprintf(&msg, fmt, ap); + + dout(level) << msg << dendl; + + free(msg); + errno = saved_errno; +} + +extern "C" void debug(int level, const char *fmt, ...) { + va_list ap; + + va_start(ap, fmt); + debugv(level, fmt, ap); + va_end(ap); +} + +extern "C" void errx(const char *fmt, ...) { + va_list ap; + + va_start(ap, fmt); + debugv(-1, fmt, ap); + va_end(ap); +} + +extern "C" void err(const char *fmt, ...) { + va_list ap; + char *msg; + int saved_errno = errno; + + va_start(ap, fmt); + vasprintf(&msg, fmt, ap); + va_end(ap); + errno = saved_errno; + + errx("%s: %s", msg, cpp_strerror(errno).c_str()); + + free(msg); +} diff --git a/src/tools/rbd_ggate/debug.h b/src/tools/rbd_ggate/debug.h new file mode 100644 index 00000000000..da9b46a381d --- /dev/null +++ b/src/tools/rbd_ggate/debug.h @@ -0,0 +1,17 @@ +#ifndef CEPH_RBD_GGATE_DEBUG_H +#define CEPH_RBD_GGATE_DEBUG_H + +#ifdef __cplusplus +extern "C" { +#endif + +void debug(int level, const char *fmt, ...) __printflike(2, 3); +void debugv(int level, const char *fmt, va_list ap) __printflike(2, 0); +void err(const char *fmt, ...) __printflike(1, 2); +void errx(const char *fmt, ...) __printflike(1, 2); + +#ifdef __cplusplus +} +#endif + +#endif // CEPH_RBD_GGATE_DEBUG_H diff --git a/src/tools/rbd_ggate/ggate_drv.c b/src/tools/rbd_ggate/ggate_drv.c new file mode 100644 index 00000000000..8b02b1bef42 --- /dev/null +++ b/src/tools/rbd_ggate/ggate_drv.c @@ -0,0 +1,375 @@ +// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "ggate_drv.h" + +uint64_t ggate_drv_req_id(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + return ggio->gctl_seq; +} + +int ggate_drv_req_cmd(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + switch (ggio->gctl_cmd) { + case BIO_WRITE: + return GGATE_DRV_CMD_WRITE; + case BIO_READ: + return GGATE_DRV_CMD_READ; + case BIO_FLUSH: + return GGATE_DRV_CMD_FLUSH; + case BIO_DELETE: + return GGATE_DRV_CMD_DISCARD; + default: + return GGATE_DRV_CMD_UNKNOWN; + } +} + +uint64_t ggate_drv_req_offset(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + return ggio->gctl_offset; +} + +size_t ggate_drv_req_length(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + return ggio->gctl_length; +} + +void *ggate_drv_req_buf(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + return ggio->gctl_data; +} + +int ggate_drv_req_error(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + return ggio->gctl_error; +} + +void ggate_drv_req_set_error(ggate_drv_req_t req, int error) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + ggio->gctl_error = error; +} + +void *ggate_drv_req_release_buf(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + void *data = ggio->gctl_data; + ggio->gctl_data = NULL; + + return data; +} + +struct ggate_drv { + int fd; + int unit; +}; + +int ggate_drv_load() { + if (modfind("g_gate") != -1) { + /* Present in kernel. */ + return 0; + } + + if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) { + if (errno != EEXIST) { + err("failed to load geom_gate module"); + return -errno; + } + } + return 0; +} + +int ggate_drv_create(char *name, size_t namelen, size_t sectorsize, + size_t mediasize, bool readonly, const char *info, ggate_drv_t *drv_) { + struct ggate_drv *drv; + struct g_gate_ctl_create ggiocreate; + + debug(20, "%s: name=%s, sectorsize=%zd, mediasize=%zd, readonly=%d, info=%s", + __func__, name, sectorsize, mediasize, (int)readonly, info); + + if (*name != '\0') { + if (namelen > sizeof(ggiocreate.gctl_name) - 1) { + return -ENAMETOOLONG; + } + } + + /* + * We communicate with ggate via /dev/ggctl. Open it. + */ + int fd = open("/dev/" G_GATE_CTL_NAME, O_RDWR); + if (fd == -1) { + err("failed to open /dev/" G_GATE_CTL_NAME); + return -errno; + } + + drv = calloc(1, sizeof(*drv)); + if (drv == NULL) { + errno = -ENOMEM; + goto fail_close; + } + + /* + * Create provider. + */ + memset(&ggiocreate, 0, sizeof(ggiocreate)); + ggiocreate.gctl_version = G_GATE_VERSION; + ggiocreate.gctl_mediasize = mediasize; + ggiocreate.gctl_sectorsize = sectorsize; + ggiocreate.gctl_flags = readonly ? G_GATE_FLAG_READONLY : 0; + ggiocreate.gctl_maxcount = 0; + ggiocreate.gctl_timeout = 0; + if (*name != '\0') { + ggiocreate.gctl_unit = G_GATE_NAME_GIVEN; + strlcpy(ggiocreate.gctl_name, name, sizeof(ggiocreate.gctl_name)); + } else { + ggiocreate.gctl_unit = G_GATE_UNIT_AUTO; + } + strlcpy(ggiocreate.gctl_info, info, sizeof(ggiocreate.gctl_info)); + if (ioctl(fd, G_GATE_CMD_CREATE, &ggiocreate) == -1) { + err("failed to create " G_GATE_PROVIDER_NAME " device"); + goto fail; + } + + debug(20, "%s: created, unit: %d, name: %s", __func__, ggiocreate.gctl_unit, + ggiocreate.gctl_name); + + drv->fd = fd; + drv->unit = ggiocreate.gctl_unit; + *drv_ = drv; + + if (*name == '\0') { + snprintf(name, namelen, "%s%d", G_GATE_PROVIDER_NAME, drv->unit); + } + + return 0; + +fail: + free(drv); +fail_close: + close(fd); + return -errno; +} + +void ggate_drv_destroy(ggate_drv_t drv_) { + struct ggate_drv *drv = (struct ggate_drv *)drv_; + struct g_gate_ctl_destroy ggiodestroy; + + debug(20, "%s %p", __func__, drv); + + memset(&ggiodestroy, 0, sizeof(ggiodestroy)); + ggiodestroy.gctl_version = G_GATE_VERSION; + ggiodestroy.gctl_unit = drv->unit; + ggiodestroy.gctl_force = 1; + + // Remember errno. + int rerrno = errno; + + int r = ioctl(drv->fd, G_GATE_CMD_DESTROY, &ggiodestroy); + if (r == -1) { + err("failed to destroy /dev/%s%d device", G_GATE_PROVIDER_NAME, + drv->unit); + } + // Restore errno. + errno = rerrno; + + free(drv); +} + +int ggate_drv_resize(ggate_drv_t drv_, size_t newsize) { + struct ggate_drv *drv = (struct ggate_drv *)drv_; + + debug(20, "%s %p: newsize=%zd", __func__, drv, newsize); + + struct g_gate_ctl_modify ggiomodify; + + memset(&ggiomodify, 0, sizeof(ggiomodify)); + ggiomodify.gctl_version = G_GATE_VERSION; + ggiomodify.gctl_unit = drv->unit; + ggiomodify.gctl_modify = GG_MODIFY_MEDIASIZE; + ggiomodify.gctl_mediasize = newsize; + + int r = ioctl(drv->fd, G_GATE_CMD_MODIFY, &ggiomodify); + if (r == -1) { + r = -errno; + err("failed to resize /dev/%s%d device", G_GATE_PROVIDER_NAME, drv->unit); + } + return r; +} + +int ggate_drv_kill(const char *devname) { + debug(20, "%s %s", __func__, devname); + + int fd = open("/dev/" G_GATE_CTL_NAME, O_RDWR); + if (fd == -1) { + err("failed to open /dev/" G_GATE_CTL_NAME); + return -errno; + } + + struct g_gate_ctl_destroy ggiodestroy; + memset(&ggiodestroy, 0, sizeof(ggiodestroy)); + ggiodestroy.gctl_version = G_GATE_VERSION; + ggiodestroy.gctl_unit = G_GATE_NAME_GIVEN; + ggiodestroy.gctl_force = 1; + + strlcpy(ggiodestroy.gctl_name, devname, sizeof(ggiodestroy.gctl_name)); + + int r = ioctl(fd, G_GATE_CMD_DESTROY, &ggiodestroy); + if (r == -1) { + r = -errno; + err("failed to destroy %s device", devname); + } + + close(fd); + return r; +} + +int ggate_drv_recv(ggate_drv_t drv_, ggate_drv_req_t *req) { + struct ggate_drv *drv = (struct ggate_drv *)drv_; + struct g_gate_ctl_io *ggio; + int error, r; + + debug(20, "%s", __func__); + + ggio = calloc(1, sizeof(*ggio)); + if (ggio == NULL) { + return -ENOMEM; + } + + ggio->gctl_version = G_GATE_VERSION; + ggio->gctl_unit = drv->unit; + ggio->gctl_data = malloc(MAXPHYS); + ggio->gctl_length = MAXPHYS; + + debug(20, "%s: waiting for request from kernel", __func__); + if (ioctl(drv->fd, G_GATE_CMD_START, ggio) == -1) { + err("%s: G_GATE_CMD_START failed", __func__); + return -errno; + } + + debug(20, "%s: got request from kernel: " + "unit=%d, seq=%ju, cmd=%u, offset=%ju, length=%ju, error=%d, data=%p", + __func__, ggio->gctl_unit, (uintmax_t)ggio->gctl_seq, ggio->gctl_cmd, + (uintmax_t)ggio->gctl_offset, (uintmax_t)ggio->gctl_length, + ggio->gctl_error, ggio->gctl_data); + + error = ggio->gctl_error; + switch (error) { + case 0: + break; + case ECANCELED: + debug(10, "%s: canceled: exit gracefully", __func__); + r = -error; + goto fail; + case ENOMEM: + /* + * Buffer too small? Impossible, we allocate MAXPHYS + * bytes - request can't be bigger than that. + */ + /* FALLTHROUGH */ + case ENXIO: + default: + errno = error; + err("%s: G_GATE_CMD_START failed", __func__); + r = -error; + goto fail; + } + + *req = ggio; + return 0; + +fail: + free(ggio->gctl_data); + free(ggio); + return r; +} + +int ggate_drv_send(ggate_drv_t drv_, ggate_drv_req_t req) { + struct ggate_drv *drv = (struct ggate_drv *)drv_; + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + int r = 0; + + debug(20, "%s: send request to kernel: " + "unit=%d, seq=%ju, cmd=%u, offset=%ju, length=%ju, error=%d, data=%p", + __func__, ggio->gctl_unit, (uintmax_t)ggio->gctl_seq, ggio->gctl_cmd, + (uintmax_t)ggio->gctl_offset, (uintmax_t)ggio->gctl_length, + ggio->gctl_error, ggio->gctl_data); + + if (ioctl(drv->fd, G_GATE_CMD_DONE, ggio) == -1) { + err("%s: G_GATE_CMD_DONE failed", __func__); + r = -errno; + } + + free(ggio->gctl_data); + free(ggio); + return r; +} + +int ggate_drv_list(char **devs, size_t *size) { + struct gmesh mesh; + struct gclass *class; + struct ggeom *gp; + int r; + size_t max_size; + + r = geom_gettree(&mesh); + if (r != 0) { + return -errno; + } + + max_size = *size; + *size = 0; + + LIST_FOREACH(class, &mesh.lg_class, lg_class) { + if (strcmp(class->lg_name, G_GATE_CLASS_NAME) == 0) { + LIST_FOREACH(gp, &class->lg_geom, lg_geom) { + (*size)++; + } + if (*size > max_size) { + r = -ERANGE; + goto done; + } + LIST_FOREACH(gp, &class->lg_geom, lg_geom) { + *devs = strdup(gp->lg_name); + devs++; + } + } + } + +done: + geom_deletetree(&mesh); + return r; +} + +void ggate_drv_list_free(char **devs, size_t size) { + size_t i; + + for (i = 0; i < size; i++) { + free(devs[i]); + } +} diff --git a/src/tools/rbd_ggate/ggate_drv.h b/src/tools/rbd_ggate/ggate_drv.h new file mode 100644 index 00000000000..5ea5f32b93e --- /dev/null +++ b/src/tools/rbd_ggate/ggate_drv.h @@ -0,0 +1,57 @@ +// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_GGATE_GGATE_DRV_H +#define CEPH_RBD_GGATE_GGATE_DRV_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +typedef void *ggate_drv_t; +typedef void *ggate_drv_req_t; + +/* + * GGATE driver commands. They are mapped to GgateReq::Command. + */ +enum { + GGATE_DRV_CMD_UNKNOWN = 0, + GGATE_DRV_CMD_WRITE = 1, + GGATE_DRV_CMD_READ = 2, + GGATE_DRV_CMD_FLUSH = 3, + GGATE_DRV_CMD_DISCARD = 4, +}; + +uint64_t ggate_drv_req_id(ggate_drv_req_t req); +int ggate_drv_req_cmd(ggate_drv_req_t req); +void *ggate_drv_req_buf(ggate_drv_req_t req); +size_t ggate_drv_req_length(ggate_drv_req_t req); +uint64_t ggate_drv_req_offset(ggate_drv_req_t req); +int ggate_drv_req_error(ggate_drv_req_t req); + +void ggate_drv_req_set_error(ggate_drv_req_t req, int error); +void *ggate_drv_req_release_buf(ggate_drv_req_t req); + +int ggate_drv_load(); + +int ggate_drv_create(char *name, size_t namelen, size_t sectorsize, + size_t mediasize, bool readonly, const char *info, ggate_drv_t *drv); +void ggate_drv_destroy(ggate_drv_t drv); + +int ggate_drv_recv(ggate_drv_t drv, ggate_drv_req_t *req); +int ggate_drv_send(ggate_drv_t drv, ggate_drv_req_t req); + +int ggate_drv_resize(ggate_drv_t drv, size_t newsize); + +int ggate_drv_kill(const char *devname); +int ggate_drv_list(char **devs, size_t *size); +void ggate_drv_list_free(char **devs, size_t size); + +#ifdef __cplusplus +} +#endif + +#endif // CEPH_RBD_GGATE_GGATE_DRV_H diff --git a/src/tools/rbd_ggate/main.cc b/src/tools/rbd_ggate/main.cc new file mode 100644 index 00000000000..1a0e3140c26 --- /dev/null +++ b/src/tools/rbd_ggate/main.cc @@ -0,0 +1,364 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/int_types.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "common/Preforker.h" +#include "common/ceph_argparse.h" +#include "common/config.h" +#include "common/debug.h" +#include "common/errno.h" +#include "global/global_init.h" +#include "global/signal_handler.h" + +#include "include/rados/librados.hpp" +#include "include/rbd/librbd.hpp" + +#include "Driver.h" +#include "Server.h" +#include "Watcher.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd-ggate: " << __func__ << ": " + +static void usage() { + std::cout << "Usage: rbd-ggate [options] map Map an image to ggate device\n" + << " unmap Unmap ggate device\n" + << " list List mapped ggate devices\n" + << "Options:\n" + << " --device Specify ggate device path\n" + << " --read-only Map readonly\n" + << " --exclusive Forbid writes by other clients\n" + << std::endl; + generic_server_usage(); +} + +static std::string devpath, poolname("rbd"), imgname, snapname; +static bool readonly = false; +static bool exclusive = false; + +static std::unique_ptr drv; + +static void handle_signal(int signum) +{ + derr << "*** Got signal " << sig_str(signum) << " ***" << dendl; + + assert(signum == SIGINT || signum == SIGTERM); + assert(drv); + + drv->shut_down(); +} + +static int do_map(int argc, const char *argv[]) +{ + int r; + + librados::Rados rados; + librbd::RBD rbd; + librados::IoCtx io_ctx; + librbd::Image image; + + librbd::image_info_t info; + std::string desc; + + Preforker forker; + + vector args; + argv_to_vec(argc, argv, args); + env_to_vec(args); + + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + g_ceph_context->_conf->set_val_or_die("pid_file", ""); + + if (global_init_prefork(g_ceph_context) >= 0) { + std::string err; + r = forker.prefork(err); + if (r < 0) { + cerr << err << std::endl; + return r; + } + + if (forker.is_parent()) { + global_init_postfork_start(g_ceph_context); + if (forker.parent_wait(err) != 0) { + return -ENXIO; + } + return 0; + } + } + + common_init_finish(g_ceph_context); + global_init_chdir(g_ceph_context); + + std::string devname = (devpath.compare(0, 5, "/dev/") == 0) ? + devpath.substr(5) : devpath; + std::unique_ptr watcher; + uint64_t handle; + + r = rados.init_with_context(g_ceph_context); + if (r < 0) { + goto done; + } + + r = rados.connect(); + if (r < 0) { + goto done; + } + + r = rados.ioctx_create(poolname.c_str(), io_ctx); + if (r < 0) { + goto done; + } + + r = rbd.open(io_ctx, image, imgname.c_str()); + if (r < 0) { + goto done; + } + + if (exclusive) { + r = image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE); + if (r < 0) { + cerr << "rbd-ggate: failed to acquire exclusive lock: " << cpp_strerror(r) + << std::endl; + goto done; + } + } + + desc = "RBD " + poolname + "/" + imgname; + + if (!snapname.empty()) { + r = image.snap_set(snapname.c_str()); + if (r < 0) { + goto done; + } + readonly = true; + desc += "@" + snapname; + } + + r = image.stat(info, sizeof(info)); + if (r < 0) { + goto done; + } + + rbd::ggate::Driver::load(); + drv.reset(new rbd::ggate::Driver(devname, 512, info.size, readonly, desc)); + r = drv->init(); + if (r < 0) { + r = -errno; + goto done; + } + + watcher.reset(new rbd::ggate::Watcher(drv.get(), io_ctx, image, info.size)); + r = image.update_watch(watcher.get(), &handle); + if (r < 0) { + drv->shut_down(); + goto done; + } + + std::cout << "/dev/" << drv->get_devname() << std::endl; + + if (g_conf->daemonize) { + forker.daemonize(); + global_init_postfork_start(g_ceph_context); + global_init_postfork_finish(g_ceph_context); + } + + init_async_signal_handler(); + register_async_signal_handler(SIGHUP, sighup_handler); + register_async_signal_handler_oneshot(SIGINT, handle_signal); + register_async_signal_handler_oneshot(SIGTERM, handle_signal); + + rbd::ggate::Server(drv.get(), image).run(); + + unregister_async_signal_handler(SIGHUP, sighup_handler); + unregister_async_signal_handler(SIGINT, handle_signal); + unregister_async_signal_handler(SIGTERM, handle_signal); + shutdown_async_signal_handler(); + + r = image.update_unwatch(handle); + assert(r == 0); + +done: + image.close(); + io_ctx.close(); + rados.shutdown(); + + forker.exit(r < 0 ? EXIT_FAILURE : 0); + // Unreachable; + return r; +} + +static int do_unmap() +{ + std::string devname = (devpath.compare(0, 5, "/dev/") == 0) ? + devpath.substr(5) : devpath; + + int r = rbd::ggate::Driver::kill(devname); + if (r < 0) { + cerr << "rbd-ggate: failed to destroy " << devname << ": " + << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +static int parse_imgpath(const std::string &imgpath) +{ + boost::regex pattern("^(?:([^/@]+)/)?([^/@]+)(?:@([^/@]+))?$"); + boost::smatch match; + if (!boost::regex_match(imgpath, match, pattern)) { + std::cerr << "rbd-ggate: invalid spec '" << imgpath << "'" << std::endl; + return -EINVAL; + } + + if (match[1].matched) { + poolname = match[1]; + } + + imgname = match[2]; + + if (match[3].matched) { + snapname = match[3]; + } + + return 0; +} + +static int do_list() +{ + rbd::ggate::Driver::load(); + + std::list devs; + int r = rbd::ggate::Driver::list(devs); + if (r < 0) { + return -r; + } + + for (auto &devname : devs) { + cout << "/dev/" << devname << std::endl; + } + return 0; +} + +int main(int argc, const char *argv[]) { + int r; + enum { + None, + Connect, + Disconnect, + List + } cmd = None; + + vector args; + + argv_to_vec(argc, argv, args); + md_config_t().parse_argv(args); + + std::vector::iterator i; + + for (i = args.begin(); i != args.end(); ) { + if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { + usage(); + return 0; + } else if (ceph_argparse_witharg(args, i, &devpath, "--device", + (char *)NULL)) { + } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) { + readonly = true; + } else if (ceph_argparse_flag(args, i, "--exclusive", (char *)NULL)) { + exclusive = true; + } else { + ++i; + } + } + + if (args.begin() != args.end()) { + if (strcmp(*args.begin(), "map") == 0) { + cmd = Connect; + } else if (strcmp(*args.begin(), "unmap") == 0) { + cmd = Disconnect; + } else if (strcmp(*args.begin(), "list") == 0) { + cmd = List; + } else { + cerr << "rbd-ggate: unknown command: " << *args.begin() << std::endl; + return EXIT_FAILURE; + } + args.erase(args.begin()); + } + + if (cmd == None) { + cerr << "rbd-ggate: must specify command" << std::endl; + return EXIT_FAILURE; + } + + switch (cmd) { + case Connect: + if (args.begin() == args.end()) { + cerr << "rbd-ggate: must specify image-or-snap-spec" << std::endl; + return EXIT_FAILURE; + } + if (parse_imgpath(string(*args.begin())) < 0) + return EXIT_FAILURE; + args.erase(args.begin()); + break; + case Disconnect: + if (args.begin() == args.end()) { + cerr << "rbd-ggate: must specify ggate device path" << std::endl; + return EXIT_FAILURE; + } + devpath = *args.begin(); + args.erase(args.begin()); + break; + default: + break; + } + + if (args.begin() != args.end()) { + cerr << "rbd-ggate: unknown args: " << *args.begin() << std::endl; + return EXIT_FAILURE; + } + + switch (cmd) { + case Connect: + if (imgname.empty()) { + cerr << "rbd-ggate: image name was not specified" << std::endl; + return EXIT_FAILURE; + } + + r = do_map(argc, argv); + if (r < 0) + return EXIT_FAILURE; + break; + case Disconnect: + r = do_unmap(); + if (r < 0) + return EXIT_FAILURE; + break; + case List: + r = do_list(); + if (r < 0) + return EXIT_FAILURE; + break; + default: + usage(); + return EXIT_FAILURE; + } + + return 0; +}