Merge pull request #7075 from shun-s/shun-fix

global: do not start two daemons with a single pid-file

Reviewed-by: Kefu Chai <kchai@redhat.com>
This commit is contained in:
Sage Weil 2016-01-29 08:28:30 -05:00
commit 71501a3ab4
5 changed files with 229 additions and 64 deletions

View File

@ -261,14 +261,19 @@ int global_init_prefork(CephContext *cct)
{
if (g_code_env != CODE_ENVIRONMENT_DAEMON)
return -1;
const md_config_t *conf = cct->_conf;
if (!conf->daemonize) {
if (pidfile_open(g_conf) < 0) {
exit(1);
}
if (atexit(pidfile_remove_void)) {
derr << "global_init_daemonize: failed to set pidfile_remove function "
<< "to run at exit." << dendl;
}
pidfile_write(g_conf);
pidfile_write();
return -1;
}
@ -292,7 +297,7 @@ void global_init_daemonize(CephContext *cct)
<< cpp_strerror(ret) << dendl;
exit(1);
}
global_init_postfork_start(cct);
global_init_postfork_finish(cct);
#else
@ -305,6 +310,10 @@ void global_init_postfork_start(CephContext *cct)
// restart log thread
g_ceph_context->_log->start();
if (pidfile_open(g_conf) < 0) {
exit(1);
}
if (atexit(pidfile_remove_void)) {
derr << "global_init_daemonize: failed to set pidfile_remove function "
<< "to run at exit." << dendl;
@ -333,7 +342,7 @@ void global_init_postfork_start(CephContext *cct)
exit(1);
}
pidfile_write(g_conf);
pidfile_write();
}
void global_init_postfork_finish(CephContext *cct)

View File

@ -31,68 +31,144 @@
#define dout_prefix *_dout
static char pid_file[PATH_MAX] = "";
int pidfile_write(const md_config_t *conf)
{
int ret, fd;
if (conf->pid_file.empty()) {
return pidfile_remove();
struct pidfh {
int pf_fd;
char pf_path[PATH_MAX + 1];
dev_t pf_dev;
ino_t pf_ino;
pidfh() : pf_fd(-1), pf_dev(0), pf_ino(0) {
memset(pf_path, 0, sizeof(pf_path));
}
snprintf(pid_file, PATH_MAX, "%s", conf->pid_file.c_str());
void close() {
pf_fd = -1;
pf_path[0] = '\0';
pf_dev = 0;
pf_ino = 0;
}
};
static pidfh pfh;
fd = TEMP_FAILURE_RETRY(::open(pid_file,
O_CREAT|O_TRUNC|O_WRONLY, 0644));
static int pidfile_verify() {
struct stat st;
if (pfh.pf_fd == -1) {
return -EINVAL;
}
/*
* Check remembered descriptor
*/
if (fstat(pfh.pf_fd, &st) == -1) {
return -errno;
}
if (st.st_dev != pfh.pf_dev || st.st_ino != pfh.pf_ino) {
return -ESTALE;
}
return 0;
}
int pidfile_write()
{
if (!pfh.pf_path[0]) {
return 0;
}
int ret;
if ((ret = pidfile_verify()) < 0) {
return ret;
}
char buf[32];
int len = snprintf(buf, sizeof(buf), "%d\n", getpid());
ret = safe_write(pfh.pf_fd, buf, len);
if (ret < 0) {
derr << "write_pid_file: failed to write to pid file '"
<< pfh.pf_path << "': " << cpp_strerror(ret) << dendl;
pfh.close();
return ret;
}
return 0;
}
int pidfile_remove()
{
if (!pfh.pf_path[0]) {
return 0;
}
int ret;
if ((ret = pidfile_verify()) < 0) {
if (pfh.pf_fd != -1) {
::close(pfh.pf_fd);
}
return ret;
}
// only remove it if it has OUR pid in it!
char buf[32];
memset(buf, 0, sizeof(buf));
ssize_t res = safe_read(pfh.pf_fd, buf, sizeof(buf));
if (pfh.pf_fd != -1) {
::close(pfh.pf_fd);
}
if (res < 0) {
return res;
}
int a = atoi(buf);
if (a != getpid()) {
return -EDOM;
}
res = ::unlink(pfh.pf_path);
if (res) {
return res;
}
pfh.close();
return 0;
}
int pidfile_open(const md_config_t *conf)
{
if (conf->pid_file.empty()) {
return 0;
}
int len = snprintf(pfh.pf_path, sizeof(pfh.pf_path),
"%s", conf->pid_file.c_str());
if (len >= (int)sizeof(pfh.pf_path)) {
return -ENAMETOOLONG;
}
int fd;
fd = ::open(pfh.pf_path, O_CREAT|O_WRONLY, 0644);
if (fd < 0) {
int err = errno;
derr << "write_pid_file: failed to open pid file '"
<< pid_file << "': " << cpp_strerror(err) << dendl;
return err;
}
char buf[20];
int len = snprintf(buf, sizeof(buf), "%d\n", getpid());
ret = safe_write(fd, buf, len);
if (ret < 0) {
derr << "write_pid_file: failed to write to pid file '"
<< pid_file << "': " << cpp_strerror(ret) << dendl;
VOID_TEMP_FAILURE_RETRY(::close(fd));
return ret;
}
if (TEMP_FAILURE_RETRY(::close(fd))) {
ret = errno;
derr << "SimpleMessenger::write_pid_file: failed to close to pid file '"
<< pid_file << "': " << cpp_strerror(ret) << dendl;
return -ret;
}
return 0;
}
int pidfile_remove(void)
{
if (!pid_file[0])
return 0;
// only remove it if it has OUR pid in it!
int fd = TEMP_FAILURE_RETRY(::open(pid_file, O_RDONLY));
if (fd < 0)
<< pfh.pf_path << "': " << cpp_strerror(err) << dendl;
pfh.close();
return -errno;
char buf[32];
memset(buf, 0, sizeof(buf));
ssize_t res = safe_read(fd, buf, sizeof(buf));
VOID_TEMP_FAILURE_RETRY(::close(fd));
if (res < 0)
return res;
int a = atoi(buf);
if (a != getpid())
return -EDOM;
res = ::unlink(pid_file);
if (res)
return res;
pid_file[0] = '\0';
}
struct stat st;
if (fstat(fd, &st) == -1) {
close(fd);
pfh.close();
return -errno;
}
pfh.pf_fd = fd;
pfh.pf_dev = st.st_dev;
pfh.pf_ino = st.st_ino;
struct flock l = { F_WRLCK, SEEK_SET, 0, 0, 0 };
int r = ::fcntl(pfh.pf_fd, F_SETLK, &l);
if (r < 0) {
derr << "failed to lock pidfile - " << pfh.pf_path << ". is there another process in use?" << dendl;
close(pfh.pf_fd);
pfh.close();
return -errno;
}
return 0;
}

View File

@ -19,10 +19,13 @@ struct md_config_t;
// Write a pidfile with the current pid, using the configuration in the
// provided conf structure.
int pidfile_write(const md_config_t *conf);
int pidfile_write();
// Remove the pid file that was previously written by pidfile_write.
// This is safe to call in a signal handler context.
int pidfile_remove(void);
int pidfile_remove();
// test whether the pid_file is being used by another process
int pidfile_open(const md_config_t *conf);
#endif

View File

@ -89,7 +89,8 @@ check_SCRIPTS += \
test/osd/osd-markdown.sh \
test/mon/mon-handle-forward.sh \
test/libradosstriper/rados-striper.sh \
test/test_objectstore_memstore.sh
test/test_objectstore_memstore.sh \
test/test_pidfile.sh
check_SCRIPTS += test/ceph-disk.sh

76
src/test/test_pidfile.sh Executable file
View File

@ -0,0 +1,76 @@
#!/bin/bash
#
# test pidfile here
#
# Includes
source ../qa/workunits/ceph-helpers.sh
function run() {
local dir=$1
shift
export CEPH_MON="127.0.0.1:17108"
export CEPH_ARGS
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
CEPH_ARGS+="--mon-host=$CEPH_MON "
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
for func in $funcs ; do
$func $dir || return 1
done
}
function TEST_without_pidfile() {
local dir=$1
setup $dir
local RUNID=`uuidgen`
run_mon $dir a --pid-file= --daemonize=$RUNID || { teardown_unexist_pidfile $dir; return 1; }
run_osd $dir 0 --pid-file= --daemonize=$RUNID || { teardown_unexist_pidfile $dir; return 1; }
teardown_unexist_pidfile $dir $RUNID || return 1
}
function teardown_unexist_pidfile() {
local dir=$1
shift
local RUNID=$1
shift
local delays=${4:-0 0 1 1 1 2 3 5 5 5 10 10 20 60 60 60 120}
local pids=$(ps aux|awk "/cep[h].*$RUNID.*/ {print \$2}")
local status=0
for i in $pids ; do
local kill_complete=false
for try in $delays ; do
if kill $i 2> /dev/null ; then
kill_complete=false
sleep $try
else
kill_complete=true
break
fi
done
if ! $kill_complete ; then
status=1
fi
done
if [ $(stat -f -c '%T' .) == "btrfs" ]; then
__teardown_btrfs $dir
fi
rm -fr $dir
return $status
}
function TEST_contend_pidfile() {
local dir=$1
setup $dir
run_mon $dir a
run_mon $dir a 2>&1 | grep "failed to lock pidfile" || return 1
run_osd $dir 0
run_osd $dir 0 2>&1 | grep "failed to lock pidfile" || return 1
teardown $dir || return 1
}
main pidfile