Merge pull request #7208 from dillaman/wip-14163-jewel

helgrind: fix real (and imaginary) race conditions

Reviewed-by: Josh Durgin <jdurgin@redhat.com>
This commit is contained in:
Josh Durgin 2016-01-13 16:56:29 -08:00
commit 0108b9a477
22 changed files with 124 additions and 23 deletions

View File

@ -133,6 +133,7 @@ BuildRequires: python-requests
BuildRequires: python-virtualenv
BuildRequires: snappy-devel
BuildRequires: util-linux
BuildRequires: valgrind-devel
BuildRequires: xfsprogs
BuildRequires: xfsprogs-devel
BuildRequires: xmlstarlet

1
debian/control vendored
View File

@ -57,6 +57,7 @@ Build-Depends: autoconf,
python-virtualenv,
sdparm | hdparm,
uuid-runtime,
valgrind,
xfslibs-dev,
xfsprogs,
xmlstarlet,

View File

@ -21,6 +21,7 @@
#include "HeartbeatMap.h"
#include "ceph_context.h"
#include "common/errno.h"
#include "common/valgrind.h"
#include "debug.h"
#define dout_subsys ceph_subsys_heartbeatmap
@ -48,6 +49,10 @@ heartbeat_handle_d *HeartbeatMap::add_worker(const string& name)
m_rwlock.get_write();
ldout(m_cct, 10) << "add_worker '" << name << "'" << dendl;
heartbeat_handle_d *h = new heartbeat_handle_d(name);
ANNOTATE_BENIGN_RACE_SIZED(&h->timeout, sizeof(h->timeout),
"heartbeat_handle_d timeout");
ANNOTATE_BENIGN_RACE_SIZED(&h->suicide_timeout, sizeof(h->suicide_timeout),
"heartbeat_handle_d suicide_timeout");
m_workers.push_front(h);
h->list_item = m_workers.begin();
m_rwlock.put_write();

View File

@ -20,6 +20,7 @@
#include "include/stringify.h"
#include "include/utime.h"
#include "common/Clock.h"
#include "common/valgrind.h"
Mutex::Mutex(const std::string &n, bool r, bool ld,
bool bt,
@ -27,6 +28,9 @@ Mutex::Mutex(const std::string &n, bool r, bool ld,
name(n), id(-1), recursive(r), lockdep(ld), backtrace(bt), nlock(0),
locked_by(0), cct(cct), logger(0)
{
ANNOTATE_BENIGN_RACE_SIZED(&id, sizeof(id), "Mutex lockdep id");
ANNOTATE_BENIGN_RACE_SIZED(&nlock, sizeof(nlock), "Mutex nlock");
ANNOTATE_BENIGN_RACE_SIZED(&locked_by, sizeof(locked_by), "Mutex locked_by");
if (cct) {
PerfCountersBuilder b(cct, string("mutex-") + name,
l_mutex_first, l_mutex_last);
@ -71,7 +75,11 @@ Mutex::Mutex(const std::string &n, bool r, bool ld,
Mutex::~Mutex() {
assert(nlock == 0);
// helgrind gets confused by condition wakeups leading to mutex destruction
ANNOTATE_BENIGN_RACE_SIZED(&_m, sizeof(_m), "Mutex primitive");
pthread_mutex_destroy(&_m);
if (cct && logger) {
cct->get_perfcounters_collection()->remove(logger);
delete logger;

View File

@ -22,6 +22,7 @@
#include <include/assert.h>
#include "lockdep.h"
#include "include/atomic.h"
#include "common/valgrind.h"
class RWLock
{
@ -39,6 +40,9 @@ public:
RWLock(const std::string &n, bool track_lock=true) : name(n), id(-1), nrlock(0), nwlock(0), track(track_lock) {
pthread_rwlock_init(&L, NULL);
ANNOTATE_BENIGN_RACE_SIZED(&id, sizeof(id), "RWLock lockdep id");
ANNOTATE_BENIGN_RACE_SIZED(&nrlock, sizeof(nrlock), "RWlock nrlock");
ANNOTATE_BENIGN_RACE_SIZED(&nwlock, sizeof(nwlock), "RWlock nwlock");
if (g_lockdep) id = lockdep_register(name.c_str());
}

View File

@ -19,6 +19,7 @@
#include "common/Cond.h"
#include "include/atomic.h"
#include "common/ceph_context.h"
#include "common/valgrind.h"
struct RefCountedObject {
private:
@ -41,8 +42,13 @@ public:
void put() {
CephContext *local_cct = cct;
int v = nref.dec();
if (v == 0)
if (v == 0) {
ANNOTATE_HAPPENS_AFTER(&nref);
ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&nref);
delete this;
} else {
ANNOTATE_HAPPENS_BEFORE(&nref);
}
if (local_cct)
lsubdout(local_cct, refs, 1) << "RefCountedObject::put " << this << " "
<< (v + 1) << " -> " << v

View File

@ -353,10 +353,6 @@ public:
template<typename T>
class PointerWQ : public WorkQueue_ {
public:
PointerWQ(string n, time_t ti, time_t sti, ThreadPool* p)
: WorkQueue_(n, ti, sti), m_pool(p), m_processing(0) {
m_pool->add_work_queue(this);
}
~PointerWQ() {
m_pool->remove_work_queue(this);
assert(m_processing == 0);
@ -382,6 +378,9 @@ public:
return _empty();
}
protected:
PointerWQ(string n, time_t ti, time_t sti, ThreadPool* p)
: WorkQueue_(n, ti, sti), m_pool(p), m_processing(0) {
}
virtual void _clear() {
assert(m_pool->_lock.is_locked());
m_items.clear();
@ -579,6 +578,7 @@ public:
ContextWQ(const string &name, time_t ti, ThreadPool *tp)
: ThreadPool::PointerWQ<Context>(name, ti, 0, tp),
m_lock("ContextWQ::m_lock") {
tp->add_work_queue(this);
}
void queue(Context *ctx, int result = 0) {

View File

@ -20,6 +20,7 @@
#include "common/simple_spin.h"
#include "common/strtol.h"
#include "common/likely.h"
#include "common/valgrind.h"
#include "include/atomic.h"
#include "common/RWLock.h"
#include "include/types.h"
@ -746,7 +747,11 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
_raw = tr->clone();
_raw->nref.set(1);
if (unlikely(tr->nref.dec() == 0)) {
ANNOTATE_HAPPENS_AFTER(&tr->nref);
ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&tr->nref);
delete tr;
} else {
ANNOTATE_HAPPENS_BEFORE(&tr->nref);
}
}
return *this;
@ -771,7 +776,11 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
bdout << "ptr " << this << " release " << _raw << bendl;
if (_raw->nref.dec() == 0) {
//cout << "hosing raw " << (void*)_raw << " len " << _raw->len << std::endl;
ANNOTATE_HAPPENS_AFTER(&_raw->nref);
ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&_raw->nref);
delete _raw; // dealloc old (if any)
} else {
ANNOTATE_HAPPENS_BEFORE(&_raw->nref);
}
_raw = 0;
}

View File

@ -33,6 +33,7 @@
#include "common/Mutex.h"
#include "common/Cond.h"
#include "common/PluginRegistry.h"
#include "common/valgrind.h"
#include <iostream>
#include <pthread.h>
@ -533,6 +534,16 @@ CephContext::~CephContext()
ceph::crypto::shutdown();
}
void CephContext::put() {
if (nref.dec() == 0) {
ANNOTATE_HAPPENS_AFTER(&nref);
ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&nref);
delete this;
} else {
ANNOTATE_HAPPENS_BEFORE(&nref);
}
}
void CephContext::init_crypto()
{
ceph::crypto::init(this);

View File

@ -67,10 +67,7 @@ public:
nref.inc();
return this;
}
void put() {
if (nref.dec() == 0)
delete this;
}
void put();
md_config_t *_conf;
ceph::log::Log *_log;

View File

@ -21,6 +21,7 @@
#include "common/dout.h"
#include "common/errno.h"
#include "common/safe_io.h"
#include "common/valgrind.h"
#include "common/version.h"
#include "include/color.h"
@ -36,6 +37,7 @@ CephContext *common_preinit(const CephInitParameters &iparams,
enum code_environment_t code_env, int flags)
{
// set code environment
ANNOTATE_BENIGN_RACE_SIZED(&g_code_env, sizeof(g_code_env), "g_code_env");
g_code_env = code_env;
// Create a configuration object

View File

@ -616,17 +616,21 @@ void md_config_t::_apply_changes(std::ostream *oss)
void md_config_t::call_all_observers()
{
Mutex::Locker l(lock);
expand_all_meta();
std::map<md_config_obs_t*,std::set<std::string> > obs;
for (obs_map_t::iterator r = observers.begin(); r != observers.end(); ++r)
obs[r->second].insert(r->first);
{
Mutex::Locker l(lock);
expand_all_meta();
for (obs_map_t::iterator r = observers.begin(); r != observers.end(); ++r) {
obs[r->second].insert(r->first);
}
}
for (std::map<md_config_obs_t*,std::set<std::string> >::iterator p = obs.begin();
p != obs.end();
++p)
++p) {
p->first->handle_conf_change(this, p->second);
}
}
int md_config_t::injectargs(const std::string& s, std::ostream *oss)

View File

@ -15,6 +15,7 @@
#include "Clock.h"
#include "common/dout.h"
#include "common/environment.h"
#include "common/valgrind.h"
#include "include/types.h"
#include "lockdep.h"
@ -67,6 +68,10 @@ void lockdep_register_ceph_context(CephContext *cct)
{
pthread_mutex_lock(&lockdep_mutex);
if (g_lockdep_ceph_ctx == NULL) {
ANNOTATE_BENIGN_RACE_SIZED(&g_lockdep_ceph_ctx, sizeof(g_lockdep_ceph_ctx),
"lockdep cct");
ANNOTATE_BENIGN_RACE_SIZED(&g_lockdep, sizeof(g_lockdep),
"lockdep enabled");
g_lockdep = true;
g_lockdep_ceph_ctx = cct;
lockdep_dout(0) << "lockdep start" << dendl;

View File

@ -18,6 +18,7 @@
#include "common/dout.h"
#include "common/errno.h"
#include "common/Formatter.h"
#include "common/valgrind.h"
#include <errno.h>
#include <map>
@ -180,6 +181,9 @@ void PerfCounters::set(int idx, uint64_t amt)
perf_counter_data_any_d& data(m_data[idx - m_lower_bound - 1]);
if (!(data.type & PERFCOUNTER_U64))
return;
ANNOTATE_BENIGN_RACE_SIZED(&data.u64, sizeof(data.u64),
"perf counter atomic");
if (data.type & PERFCOUNTER_LONGRUNAVG) {
data.avgcount.inc();
data.u64.set(amt);

View File

@ -4,12 +4,16 @@
#ifndef CEPH_VALGRIND_H
#define CEPH_VALGRIND_H
#include "acconfig.h"
#ifdef HAVE_VALGRIND_HELGRIND_H
#include <valgrind/helgrind.h>
#else
#define ANNOTATE_HAPPENS_AFTER(x) do {} while (0)
#define ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(x) ANNOTATE_HAPPENS_AFTER(x)
#define ANNOTATE_HAPPENS_BEFORE(x) ANNOTATE_HAPPENS_AFTER(x)
#define ANNOTATE_HAPPENS_AFTER(x) (void)0
#define ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(x) (void)0
#define ANNOTATE_HAPPENS_BEFORE(x) (void)0
#define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) (void)0
#endif
#endif // CEPH_VALGRIND_H

View File

@ -409,6 +409,7 @@ bool librados::RadosClient::ms_dispatch(Message *m)
{
bool ret;
Mutex::Locker l(lock);
if (state == DISCONNECTED) {
ldout(cct, 10) << "disconnected, discarding " << *m << dendl;
m->put();
@ -435,12 +436,11 @@ void librados::RadosClient::ms_handle_remote_reset(Connection *con)
bool librados::RadosClient::_dispatch(Message *m)
{
assert(lock.is_locked());
switch (m->get_type()) {
// OSD
case CEPH_MSG_OSD_MAP:
lock.Lock();
cond.Signal();
lock.Unlock();
m->put();
break;
@ -873,7 +873,7 @@ int librados::RadosClient::monitor_log(const string& level, rados_log_callback_t
void librados::RadosClient::handle_log(MLog *m)
{
Mutex::Locker l(lock);
assert(lock.is_locked());
ldout(cct, 10) << __func__ << " version " << m->version << dendl;
if (log_last_version < m->version) {

View File

@ -27,6 +27,7 @@ AioImageRequestWQ::AioImageRequestWQ(ImageCtx *image_ctx, const string &name,
m_shutdown(false), m_on_shutdown(nullptr) {
CephContext *cct = m_image_ctx.cct;
ldout(cct, 5) << this << " " << ": ictx=" << image_ctx << dendl;
tp->add_work_queue(this);
}
ssize_t AioImageRequestWQ::read(uint64_t off, uint64_t len, char *buf,

View File

@ -12,6 +12,7 @@
#include "common/errno.h"
#include "common/safe_io.h"
#include "common/Clock.h"
#include "common/valgrind.h"
#include "include/assert.h"
#include "include/compat.h"
#include "include/on_exit.h"
@ -189,6 +190,8 @@ Entry *Log::create_entry(int level, int subsys)
Entry *Log::create_entry(int level, int subsys, size_t* expected_size)
{
if (true) {
ANNOTATE_BENIGN_RACE_SIZED(expected_size, sizeof(*expected_size),
"Log hint");
size_t size = __atomic_load_n(expected_size, __ATOMIC_RELAXED);
void *ptr = ::operator new(sizeof(Entry) + size);
return new(ptr) Entry(ceph_clock_now(NULL),

View File

@ -27,6 +27,7 @@
#include "common/debug.h"
#include "common/errno.h"
#include "common/valgrind.h"
// Below included to get encode_encrypt(); That probably should be in Crypto.h, instead
@ -83,6 +84,9 @@ Pipe::Pipe(SimpleMessenger *r, int st, PipeConnection *con)
send_keepalive_ack(false),
connect_seq(0), peer_global_seq(0),
out_seq(0), in_seq(0), in_seq_acked(0) {
ANNOTATE_BENIGN_RACE_SIZED(&state, sizeof(state), "Pipe state");
ANNOTATE_BENIGN_RACE_SIZED(&recv_len, sizeof(recv_len), "Pipe recv_len");
ANNOTATE_BENIGN_RACE_SIZED(&recv_ofs, sizeof(recv_ofs), "Pipe recv_ofs");
if (con) {
connection_state = con;
connection_state->reset_pipe(this);

View File

@ -22,6 +22,7 @@
#include "common/config.h"
#include "common/Timer.h"
#include "common/errno.h"
#include "common/valgrind.h"
#include "auth/Crypto.h"
#include "include/Spinlock.h"
@ -53,6 +54,8 @@ SimpleMessenger::SimpleMessenger(CephContext *cct, entity_name_t name,
timeout(0),
local_connection(new PipeConnection(cct, this))
{
ANNOTATE_BENIGN_RACE_SIZED(&timeout, sizeof(timeout),
"SimpleMessenger read timeout");
ceph_spin_init(&global_seq_lock);
local_features = features;
init_local_connection();
@ -67,6 +70,7 @@ SimpleMessenger::~SimpleMessenger()
assert(!did_bind); // either we didn't bind or we shut down the Accepter
assert(rank_pipe.empty()); // we don't have any running Pipes.
assert(!reaper_started); // the reaper thread is stopped
ceph_spin_destroy(&global_seq_lock);
}
void SimpleMessenger::ready()
@ -701,6 +705,8 @@ void SimpleMessenger::learned_addr(const entity_addr_t &peer_addr_for_me)
if (need_addr) {
entity_addr_t t = peer_addr_for_me;
t.set_port(my_inst.addr.get_port());
ANNOTATE_BENIGN_RACE_SIZED(&my_inst.addr.addr, sizeof(my_inst.addr.addr),
"SimpleMessenger learned addr");
my_inst.addr.addr = t.addr;
ldout(cct,1) << "learned my addr " << my_inst.addr << dendl;
need_addr = false;

View File

@ -168,6 +168,7 @@ void Objecter::handle_conf_change(const struct md_config_t *conf,
void Objecter::update_crush_location()
{
RWLock::WLocker rwlocker(rwlock);
crush_location.clear();
vector<string> lvec;
get_str_vec(cct->_conf->crush_location, ";, \t", lvec);
@ -1925,7 +1926,7 @@ void Objecter::tick()
for (map<int,OSDSession*>::iterator siter = osd_sessions.begin(); siter != osd_sessions.end(); ++siter) {
OSDSession *s = siter->second;
RWLock::RLocker l(s->lock);
RWLock::WLocker l(s->lock);
bool found = false;
for (map<ceph_tid_t,Op*>::iterator p = s->ops.begin();
p != s->ops.end();

View File

@ -198,3 +198,28 @@
fun:_dl_init
obj:*
}
# PK11_CreateContextBySymKey
{
<insert_a_suppression_name_here>
Helgrind:Race
obj:/usr/*lib*/libfreebl*3.so
...
obj:/usr/*lib*/libsoftokn3.so
...
obj:/usr/*lib*/libnss3.so
...
fun:PK11_CreateContextBySymKey
...
}
# _dl_allocate_tls_init
{
<insert_a_suppression_name_here>
Helgrind:Race
fun:mempcpy
fun:_dl_allocate_tls_init
...
fun:pthread_create@*
...
}