1
0
mirror of https://github.com/ceph/ceph synced 2025-03-29 15:03:39 +00:00

Merge PR into master

* refs/pull/24787/head:
	Merge PR  into nautilus
	osd: fix heartbeat_reset unlock
	Merge PR  into nautilus
	Merge PR  into nautilus
	Merge PR  into nautilus
	osd: fix race between op_wq and context_queue
	test: Make sure kill_daemons failure will be easy to find
	test: Add flush_pg_stats to make test more deterministic
This commit is contained in:
Sage Weil 2018-10-29 08:36:34 -05:00
commit c40685ebdd
3 changed files with 15 additions and 11 deletions

View File

@ -321,6 +321,7 @@ function TEST_rep_read_unfound() {
sleep 5
flush_pg_stats
ceph --format=json pg dump pgs | jq '.'
if ! ceph --format=json pg dump pgs | jq '.pg_stats | .[0].state' | grep -q recovery_unfound

View File

@ -28,7 +28,7 @@ function TEST_import_after_merge_and_gap() {
wait_for_clean || return 1
rados -p foo bench 3 write -b 1024 --no-cleanup || return 1
kill_daemons $dir TERM osd.0
kill_daemons $dir TERM osd.0 || return 1
ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.1 --file $dir/1.1 --force || return 1
ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0 --force || return 1
activate_osd $dir 0 || return 1
@ -39,7 +39,7 @@ function TEST_import_after_merge_and_gap() {
wait_for_clean || return 1
#
kill_daemons $dir TERM osd.0
kill_daemons $dir TERM osd.0 || return 1
ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1
# this will import both halves the original pg
ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
@ -58,7 +58,7 @@ function TEST_import_after_merge_and_gap() {
sleep 3
wait_for_clean || return 1
kill_daemons $dir TERM osd.0
kill_daemons $dir TERM osd.0 || return 1
# this should fail.. 1.1 still doesn't exist
! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
@ -85,7 +85,7 @@ function TEST_import_after_split() {
wait_for_clean || return 1
rados -p foo bench 3 write -b 1024 --no-cleanup || return 1
kill_daemons $dir TERM osd.0
kill_daemons $dir TERM osd.0 || return 1
ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0 --force || return 1
activate_osd $dir 0 || return 1
@ -94,7 +94,7 @@ function TEST_import_after_split() {
while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 1 ; do sleep 1 ; done
wait_for_clean || return 1
kill_daemons $dir TERM osd.0
kill_daemons $dir TERM osd.0 || return 1
ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1

View File

@ -4961,7 +4961,6 @@ bool OSD::heartbeat_reset(Connection *con)
auto s = con->get_priv();
if (s) {
if (is_stopping()) {
heartbeat_lock.Unlock();
return true;
}
auto heartbeat_session = static_cast<HeartbeatSession*>(s.get());
@ -10195,17 +10194,21 @@ void OSD::ShardedOpWQ::_process(uint32_t thread_index, heartbeat_handle_d *hb)
auto& sdata = osd->shards[shard_index];
ceph_assert(sdata);
// If all threads of shards do oncommits, there is a out-of-order problem.
// So we choose the thread which has the smallest thread_index(thread_index < num_shards) of shard
// to do oncommit callback.
// If all threads of shards do oncommits, there is a out-of-order
// problem. So we choose the thread which has the smallest
// thread_index(thread_index < num_shards) of shard to do oncommit
// callback.
bool is_smallest_thread_index = thread_index < osd->num_shards;
// peek at spg_t
sdata->shard_lock.Lock();
if (sdata->pqueue->empty() &&
!(is_smallest_thread_index && !sdata->context_queue.empty())) {
(!is_smallest_thread_index || sdata->context_queue.empty())) {
sdata->sdata_wait_lock.Lock();
if (!sdata->stop_waiting) {
if (is_smallest_thread_index && !sdata->context_queue.empty()) {
// we raced with a context_queue addition, don't wait
sdata->sdata_wait_lock.Unlock();
} else if (!sdata->stop_waiting) {
dout(20) << __func__ << " empty q, waiting" << dendl;
osd->cct->get_heartbeat_map()->clear_timeout(hb);
sdata->shard_lock.Unlock();