mirror of
https://github.com/ceph/ceph
synced 2025-03-11 02:39:05 +00:00
Merge pull request #16025 from liewegas/wip-19964
mon: do crushtool test with fork and timeout, but w/o exec of crushtool Reviewed-by: Mykola Golub <mgolub@mirantis.com>
This commit is contained in:
commit
a0ba660afc
@ -2242,65 +2242,6 @@ function test_mon_tell()
|
||||
ceph_watch_wait 'mon.b \[DBG\] from.*cmd=\[{"prefix": "version"}\]: dispatch'
|
||||
}
|
||||
|
||||
function test_mon_crushmap_validation()
|
||||
{
|
||||
local map=$TEMP_DIR/map
|
||||
ceph osd getcrushmap -o $map
|
||||
|
||||
local crushtool_path="${TEMP_DIR}/crushtool"
|
||||
touch "${crushtool_path}"
|
||||
chmod +x "${crushtool_path}"
|
||||
local crushtool_path_old=`ceph-conf --show-config-value crushtool`
|
||||
ceph tell mon.\* injectargs --crushtool "${crushtool_path}"
|
||||
|
||||
printf "%s\n" \
|
||||
"#!/bin/sh
|
||||
cat > /dev/null
|
||||
exit 0" > "${crushtool_path}"
|
||||
|
||||
ceph osd setcrushmap -i $map
|
||||
|
||||
printf "%s\n" \
|
||||
"#!/bin/sh
|
||||
cat > /dev/null
|
||||
exit 1" > "${crushtool_path}"
|
||||
|
||||
expect_false ceph osd setcrushmap -i $map
|
||||
|
||||
printf "%s\n" \
|
||||
"#!/bin/sh
|
||||
cat > /dev/null
|
||||
echo 'TEST FAIL' >&2
|
||||
exit 1" > "${crushtool_path}"
|
||||
|
||||
expect_false ceph osd setcrushmap -i $map 2> $TMPFILE
|
||||
check_response "Error EINVAL: Failed crushmap test: TEST FAIL"
|
||||
|
||||
local mon_lease=`ceph-conf --show-config-value mon_lease`
|
||||
mon_lease=`echo ${mon_lease} | awk '{ printf $1 + 0 }'`
|
||||
|
||||
test "${mon_lease}" -gt 0
|
||||
|
||||
printf "%s\n" \
|
||||
"#!/bin/sh
|
||||
cat > /dev/null
|
||||
sleep $((mon_lease - 1))" > "${crushtool_path}"
|
||||
|
||||
ceph osd setcrushmap -i $map
|
||||
|
||||
printf "%s\n" \
|
||||
"#!/bin/sh
|
||||
cat > /dev/null
|
||||
sleep $((mon_lease + 1))" > "${crushtool_path}"
|
||||
|
||||
expect_false ceph osd setcrushmap -i $map 2> $TMPFILE
|
||||
check_response "Error EINVAL: Failed crushmap test: ${crushtool_path}: timed out (${mon_lease} sec)"
|
||||
|
||||
ceph tell mon.\* injectargs --crushtool "${crushtool_path_old}"
|
||||
|
||||
rm -f "${crushtool_path}"
|
||||
}
|
||||
|
||||
function test_mon_ping()
|
||||
{
|
||||
ceph ping mon.a
|
||||
@ -2446,7 +2387,6 @@ MON_TESTS+=" mon_osd_erasure_code"
|
||||
MON_TESTS+=" mon_osd_misc"
|
||||
MON_TESTS+=" mon_heap_profiler"
|
||||
MON_TESTS+=" mon_tell"
|
||||
MON_TESTS+=" mon_crushmap_validation"
|
||||
MON_TESTS+=" mon_ping"
|
||||
MON_TESTS+=" mon_deprecated_commands"
|
||||
MON_TESTS+=" mon_caps"
|
||||
|
@ -28,7 +28,6 @@ OPTION(lockdep_force_backtrace, OPT_BOOL, false) // always gather current backtr
|
||||
OPTION(run_dir, OPT_STR, "/var/run/ceph") // the "/var/run/ceph" dir, created on daemon startup
|
||||
OPTION(admin_socket, OPT_STR, "$run_dir/$cluster-$name.asok") // default changed by common_preinit()
|
||||
OPTION(admin_socket_mode, OPT_STR, "") // permission bits to set for admin socket file, e.g., "0775", "0755"
|
||||
OPTION(crushtool, OPT_STR, "crushtool") // crushtool utility path
|
||||
|
||||
OPTION(daemonize, OPT_BOOL, false) // default changed by common_preinit()
|
||||
OPTION(setuser, OPT_STR, "") // uid or user name
|
||||
|
161
src/common/fork_function.h
Normal file
161
src/common/fork_function.h
Normal file
@ -0,0 +1,161 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
|
||||
// Run a function in a forked child, with a timeout.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <signal.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/types.h>
|
||||
#include <common/errno.h>
|
||||
#include <ostream>
|
||||
#include "common/errno.h"
|
||||
|
||||
static void _fork_function_dummy_sighandler(int sig) {}
|
||||
|
||||
// Run a function post-fork, with a timeout. Function can return
|
||||
// int8_t only due to unix exit code limitations. Returns -ETIMEDOUT
|
||||
// if timeout is reached.
|
||||
|
||||
static inline int fork_function(
|
||||
int timeout,
|
||||
std::ostream& errstr,
|
||||
std::function<int8_t(void)> f)
|
||||
{
|
||||
// first fork the forker.
|
||||
pid_t forker_pid = fork();
|
||||
if (forker_pid) {
|
||||
// just wait
|
||||
int status;
|
||||
while (waitpid(forker_pid, &status, 0) == -1) {
|
||||
assert(errno == EINTR);
|
||||
}
|
||||
if (WIFSIGNALED(status)) {
|
||||
errstr << ": got signal: " << WTERMSIG(status) << "\n";
|
||||
return 128 + WTERMSIG(status);
|
||||
}
|
||||
if (WIFEXITED(status)) {
|
||||
int8_t r = WEXITSTATUS(status);
|
||||
errstr << ": exit status: " << (int)r << "\n";
|
||||
return r;
|
||||
}
|
||||
errstr << ": waitpid: unknown status returned\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
// we are forker (first child)
|
||||
|
||||
// close all fds
|
||||
int maxfd = sysconf(_SC_OPEN_MAX);
|
||||
if (maxfd == -1)
|
||||
maxfd = 16384;
|
||||
for (int fd = 0; fd <= maxfd; fd++) {
|
||||
if (fd == STDIN_FILENO)
|
||||
continue;
|
||||
if (fd == STDOUT_FILENO)
|
||||
continue;
|
||||
if (fd == STDERR_FILENO)
|
||||
continue;
|
||||
::close(fd);
|
||||
}
|
||||
|
||||
sigset_t mask, oldmask;
|
||||
int pid;
|
||||
|
||||
// Restore default action for SIGTERM in case the parent process decided
|
||||
// to ignore it.
|
||||
if (signal(SIGTERM, SIG_DFL) == SIG_ERR) {
|
||||
std::cerr << ": signal failed: " << cpp_strerror(errno) << "\n";
|
||||
goto fail_exit;
|
||||
}
|
||||
// Because SIGCHLD is ignored by default, setup dummy handler for it,
|
||||
// so we can mask it.
|
||||
if (signal(SIGCHLD, _fork_function_dummy_sighandler) == SIG_ERR) {
|
||||
std::cerr << ": signal failed: " << cpp_strerror(errno) << "\n";
|
||||
goto fail_exit;
|
||||
}
|
||||
// Setup timeout handler.
|
||||
if (signal(SIGALRM, timeout_sighandler) == SIG_ERR) {
|
||||
std::cerr << ": signal failed: " << cpp_strerror(errno) << "\n";
|
||||
goto fail_exit;
|
||||
}
|
||||
// Block interesting signals.
|
||||
sigemptyset(&mask);
|
||||
sigaddset(&mask, SIGINT);
|
||||
sigaddset(&mask, SIGTERM);
|
||||
sigaddset(&mask, SIGCHLD);
|
||||
sigaddset(&mask, SIGALRM);
|
||||
if (sigprocmask(SIG_SETMASK, &mask, &oldmask) == -1) {
|
||||
std::cerr << ": sigprocmask failed: "
|
||||
<< cpp_strerror(errno) << "\n";
|
||||
goto fail_exit;
|
||||
}
|
||||
|
||||
pid = fork();
|
||||
|
||||
if (pid == -1) {
|
||||
std::cerr << ": fork failed: " << cpp_strerror(errno) << "\n";
|
||||
goto fail_exit;
|
||||
}
|
||||
|
||||
if (pid == 0) { // we are second child
|
||||
// Restore old sigmask.
|
||||
if (sigprocmask(SIG_SETMASK, &oldmask, NULL) == -1) {
|
||||
std::cerr << ": sigprocmask failed: "
|
||||
<< cpp_strerror(errno) << "\n";
|
||||
goto fail_exit;
|
||||
}
|
||||
(void)setpgid(0, 0); // Become process group leader.
|
||||
int8_t r = f();
|
||||
_exit((uint8_t)r);
|
||||
}
|
||||
|
||||
// Parent
|
||||
(void)alarm(timeout);
|
||||
|
||||
for (;;) {
|
||||
int signo;
|
||||
if (sigwait(&mask, &signo) == -1) {
|
||||
std::cerr << ": sigwait failed: " << cpp_strerror(errno) << "\n";
|
||||
goto fail_exit;
|
||||
}
|
||||
switch (signo) {
|
||||
case SIGCHLD:
|
||||
int status;
|
||||
if (waitpid(pid, &status, WNOHANG) == -1) {
|
||||
std::cerr << ": waitpid failed: " << cpp_strerror(errno) << "\n";
|
||||
goto fail_exit;
|
||||
}
|
||||
if (WIFEXITED(status))
|
||||
_exit(WEXITSTATUS(status));
|
||||
if (WIFSIGNALED(status))
|
||||
_exit(128 + WTERMSIG(status));
|
||||
std::cerr << ": unknown status returned\n";
|
||||
goto fail_exit;
|
||||
case SIGINT:
|
||||
case SIGTERM:
|
||||
// Pass SIGINT and SIGTERM, which are usually used to terminate
|
||||
// a process, to the child.
|
||||
if (::kill(pid, signo) == -1) {
|
||||
std::cerr << ": kill failed: " << cpp_strerror(errno) << "\n";
|
||||
goto fail_exit;
|
||||
}
|
||||
continue;
|
||||
case SIGALRM:
|
||||
std::cerr << ": timed out (" << timeout << " sec)\n";
|
||||
if (::killpg(pid, SIGKILL) == -1) {
|
||||
std::cerr << ": kill failed: " << cpp_strerror(errno) << "\n";
|
||||
goto fail_exit;
|
||||
}
|
||||
_exit(-ETIMEDOUT);
|
||||
default:
|
||||
std::cerr << ": sigwait: invalid signal: " << signo << "\n";
|
||||
goto fail_exit;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
fail_exit:
|
||||
_exit(EXIT_FAILURE);
|
||||
}
|
@ -19,6 +19,7 @@
|
||||
#include <boost/icl/interval_map.hpp>
|
||||
#include <boost/algorithm/string/join.hpp>
|
||||
#include "common/SubProcess.h"
|
||||
#include "common/fork_function.h"
|
||||
|
||||
void CrushTester::set_device_weight(int dev, float f)
|
||||
{
|
||||
@ -359,47 +360,16 @@ void CrushTester::write_integer_indexed_scalar_data_string(vector<string> &dst,
|
||||
dst.push_back( data_buffer.str() );
|
||||
}
|
||||
|
||||
int CrushTester::test_with_crushtool(const char *crushtool_cmd,
|
||||
int max_id, int timeout,
|
||||
int ruleset)
|
||||
int CrushTester::test_with_fork(int timeout)
|
||||
{
|
||||
SubProcessTimed crushtool(crushtool_cmd, SubProcess::PIPE, SubProcess::CLOSE, SubProcess::PIPE, timeout);
|
||||
string opt_max_id = boost::lexical_cast<string>(max_id);
|
||||
crushtool.add_cmd_args(
|
||||
"-i", "-",
|
||||
"--test", "--check", opt_max_id.c_str(),
|
||||
"--min-x", "1",
|
||||
"--max-x", "50",
|
||||
NULL);
|
||||
if (ruleset >= 0) {
|
||||
crushtool.add_cmd_args(
|
||||
"--ruleset",
|
||||
stringify(ruleset).c_str(),
|
||||
NULL);
|
||||
ostringstream sink;
|
||||
int r = fork_function(timeout, sink, [&]() {
|
||||
return test();
|
||||
});
|
||||
if (r == -ETIMEDOUT) {
|
||||
err << "timed out during smoke test (" << timeout << " seconds)";
|
||||
}
|
||||
int ret = crushtool.spawn();
|
||||
if (ret != 0) {
|
||||
err << "failed run crushtool: " << crushtool.err();
|
||||
return ret;
|
||||
}
|
||||
|
||||
bufferlist bl;
|
||||
::encode(crush, bl, CEPH_FEATURES_SUPPORTED_DEFAULT);
|
||||
bl.write_fd(crushtool.get_stdin());
|
||||
crushtool.close_stdin();
|
||||
bl.clear();
|
||||
ret = bl.read_fd(crushtool.get_stderr(), 100 * 1024);
|
||||
if (ret < 0) {
|
||||
err << "failed read from crushtool: " << cpp_strerror(-ret);
|
||||
return ret;
|
||||
}
|
||||
bl.write_stream(err);
|
||||
if (crushtool.join() != 0) {
|
||||
err << crushtool.err();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return r;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
@ -358,10 +358,7 @@ public:
|
||||
*/
|
||||
void check_overlapped_rules() const;
|
||||
int test();
|
||||
int test_with_crushtool(const char *crushtool_cmd = "crushtool",
|
||||
int max_id = -1,
|
||||
int timeout = 0,
|
||||
int ruleset = -1);
|
||||
int test_with_fork(int timeout);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -5629,19 +5629,13 @@ int OSDMonitor::prepare_new_pool(string& name, uint64_t auid,
|
||||
_get_pending_crush(newcrush);
|
||||
ostringstream err;
|
||||
CrushTester tester(newcrush, err);
|
||||
// use the internal crush tester if crushtool config is empty
|
||||
if (g_conf->crushtool.empty()) {
|
||||
r = tester.test();
|
||||
} else {
|
||||
r = tester.test_with_crushtool(g_conf->crushtool.c_str(),
|
||||
osdmap.get_max_osd(),
|
||||
g_conf->mon_lease,
|
||||
crush_rule);
|
||||
}
|
||||
if (r) {
|
||||
dout(10) << " tester.test_with_crushtool returns " << r
|
||||
tester.set_max_x(50);
|
||||
tester.set_rule(crush_rule);
|
||||
r = tester.test_with_fork(g_conf->mon_lease);
|
||||
if (r < 0) {
|
||||
dout(10) << " tester.test_with_fork returns " << r
|
||||
<< ": " << err.str() << dendl;
|
||||
*ss << "crushtool check failed with " << r << ": " << err.str();
|
||||
*ss << "crush test failed with " << r << ": " << err.str();
|
||||
return r;
|
||||
}
|
||||
unsigned size, min_size;
|
||||
@ -7044,16 +7038,12 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
|
||||
dout(10) << " testing map" << dendl;
|
||||
stringstream ess;
|
||||
CrushTester tester(crush, ess);
|
||||
// XXX: Use mon_lease as a timeout value for crushtool.
|
||||
// If the crushtool consistently takes longer than 'mon_lease' seconds,
|
||||
// then we would consistently trigger an election before the command
|
||||
// finishes, having a flapping monitor unable to hold quorum.
|
||||
int r = tester.test_with_crushtool(g_conf->crushtool.c_str(),
|
||||
osdmap.get_max_osd(),
|
||||
g_conf->mon_lease);
|
||||
tester.set_max_x(50);
|
||||
int r = tester.test_with_fork(g_conf->mon_lease);
|
||||
if (r < 0) {
|
||||
derr << "error on crush map: " << ess.str() << dendl;
|
||||
ss << "Failed crushmap test: " << ess.str();
|
||||
dout(10) << " tester.test_with_fork returns " << r
|
||||
<< ": " << ess.str() << dendl;
|
||||
ss << "crush smoke test failed with " << r << ": " << ess.str();
|
||||
err = r;
|
||||
goto reply;
|
||||
}
|
||||
|
@ -119,15 +119,6 @@ function TEST_crush_rule_create_erasure() {
|
||||
ceph osd erasure-code-profile ls | grep default || return 1
|
||||
ceph osd crush rule rm $ruleset || return 1
|
||||
! ceph osd crush rule ls | grep $ruleset || return 1
|
||||
#
|
||||
# verify that if the crushmap contains a bugous ruleset,
|
||||
# it will prevent the creation of a pool.
|
||||
#
|
||||
local crushtool_path_old=`ceph-conf --show-config-value crushtool`
|
||||
ceph tell mon.\* injectargs --crushtool "false"
|
||||
|
||||
expect_failure $dir "Error EINVAL" \
|
||||
ceph osd pool create mypool 1 1 erasure || return 1
|
||||
}
|
||||
|
||||
function check_ruleset_id_match_rule_id() {
|
||||
@ -239,69 +230,6 @@ function TEST_crush_tree() {
|
||||
$XMLSTARLET val -e -r $CEPH_ROOT/src/test/mon/osd-crush-tree.rng - || return 1
|
||||
}
|
||||
|
||||
# NB: disable me if i am too time consuming
|
||||
function TEST_crush_repair_faulty_crushmap() {
|
||||
local dir=$1
|
||||
fsid=$(uuidgen)
|
||||
MONA=127.0.0.1:7113 # git grep '\<7113\>' : there must be only one
|
||||
MONB=127.0.0.1:7114 # git grep '\<7114\>' : there must be only one
|
||||
MONC=127.0.0.1:7115 # git grep '\<7115\>' : there must be only one
|
||||
CEPH_ARGS_orig=$CEPH_ARGS
|
||||
CEPH_ARGS="--fsid=$fsid --auth-supported=none "
|
||||
CEPH_ARGS+="--mon-initial-members=a,b,c "
|
||||
CEPH_ARGS+="--mon-host=$MONA,$MONB,$MONC "
|
||||
run_mon $dir a --public-addr $MONA || return 1
|
||||
run_mon $dir b --public-addr $MONB || return 1
|
||||
run_mon $dir c --public-addr $MONC || return 1
|
||||
|
||||
ceph osd pool create rbd 8
|
||||
|
||||
local empty_map=$dir/empty_map
|
||||
:> $empty_map.txt
|
||||
crushtool -c $empty_map.txt -o $empty_map.map || return 1
|
||||
|
||||
local crushtool_path_old=`ceph-conf --show-config-value crushtool`
|
||||
ceph tell mon.\* injectargs --crushtool "true"
|
||||
|
||||
|
||||
#import empty crushmap should failture.because the default pool rbd use the rule
|
||||
ceph osd setcrushmap -i $empty_map.map 2>&1|grep "Error EINVAL: the crush rule no"|| return 1
|
||||
|
||||
#remove the default pool rbd
|
||||
ceph osd pool delete rbd rbd --yes-i-really-really-mean-it || return 1
|
||||
|
||||
#now it can be successful to set the empty crush map
|
||||
ceph osd setcrushmap -i $empty_map.map || return 1
|
||||
|
||||
# should be an empty crush map without any buckets
|
||||
success=false
|
||||
for delay in 1 2 4 8 16 32 64 128 256 ; do
|
||||
if test $(ceph osd crush dump --format=json | \
|
||||
jq '.buckets | length == 0') == true ; then
|
||||
success=true
|
||||
break
|
||||
fi
|
||||
sleep $delay
|
||||
done
|
||||
if ! $success ; then
|
||||
ceph osd crush dump --format=json-pretty
|
||||
return 1
|
||||
fi
|
||||
# bring them down, the "ceph" commands will try to hunt for other monitor in
|
||||
# vain, after mon.a is offline
|
||||
kill_daemons $dir || return 1
|
||||
# rewrite the monstore with the good crush map,
|
||||
$CEPH_ROOT/src/tools/ceph-monstore-update-crush.sh --rewrite $dir/a || return 1
|
||||
|
||||
run_mon $dir a --public-addr $MONA || return 1
|
||||
run_mon $dir b --public-addr $MONB || return 1
|
||||
run_mon $dir c --public-addr $MONC || return 1
|
||||
# the buckets are back
|
||||
test $(ceph osd crush dump --format=json | \
|
||||
jq '.buckets | length > 0') == true || return 1
|
||||
CEPH_ARGS=$CEPH_ARGS_orig
|
||||
}
|
||||
|
||||
main osd-crush "$@"
|
||||
|
||||
# Local Variables:
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "common/SubProcess.h"
|
||||
#include "common/safe_io.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "common/fork_function.h"
|
||||
|
||||
bool read_from_fd(int fd, std::string &out) {
|
||||
out.clear();
|
||||
@ -266,3 +267,27 @@ TEST(SubProcessTimed, SubshellTimedout)
|
||||
std::cerr << "err: " << sh.err() << std::endl;
|
||||
ASSERT_FALSE(sh.err().c_str()[0] == '\0');
|
||||
}
|
||||
|
||||
TEST(fork_function, normal)
|
||||
{
|
||||
ASSERT_EQ(0, fork_function(10, std::cerr, [&]() { return 0; }));
|
||||
ASSERT_EQ(1, fork_function(10, std::cerr, [&]() { return 1; }));
|
||||
ASSERT_EQ(13, fork_function(10, std::cerr, [&]() { return 13; }));
|
||||
ASSERT_EQ(-1, fork_function(10, std::cerr, [&]() { return -1; }));
|
||||
ASSERT_EQ(-13, fork_function(10, std::cerr, [&]() { return -13; }));
|
||||
ASSERT_EQ(-ETIMEDOUT,
|
||||
fork_function(10, std::cerr, [&]() { return -ETIMEDOUT; }));
|
||||
}
|
||||
|
||||
TEST(fork_function, timeout)
|
||||
{
|
||||
ASSERT_EQ(-ETIMEDOUT, fork_function(2, std::cerr, [&]() {
|
||||
sleep(60);
|
||||
return 0; }));
|
||||
ASSERT_EQ(-ETIMEDOUT, fork_function(2, std::cerr, [&]() {
|
||||
sleep(60);
|
||||
return 1; }));
|
||||
ASSERT_EQ(-ETIMEDOUT, fork_function(2, std::cerr, [&]() {
|
||||
sleep(60);
|
||||
return -111; }));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user