mirror of
https://github.com/ceph/ceph
synced 2025-03-29 15:03:39 +00:00
Merge PR #24787 into master
* refs/pull/24787/head: Merge PR #24796 into nautilus osd: fix heartbeat_reset unlock Merge PR #24780 into nautilus Merge PR #24761 into nautilus Merge PR #24651 into nautilus osd: fix race between op_wq and context_queue test: Make sure kill_daemons failure will be easy to find test: Add flush_pg_stats to make test more deterministic
This commit is contained in:
commit
c40685ebdd
@ -321,6 +321,7 @@ function TEST_rep_read_unfound() {
|
||||
|
||||
sleep 5
|
||||
|
||||
flush_pg_stats
|
||||
ceph --format=json pg dump pgs | jq '.'
|
||||
|
||||
if ! ceph --format=json pg dump pgs | jq '.pg_stats | .[0].state' | grep -q recovery_unfound
|
||||
|
@ -28,7 +28,7 @@ function TEST_import_after_merge_and_gap() {
|
||||
wait_for_clean || return 1
|
||||
rados -p foo bench 3 write -b 1024 --no-cleanup || return 1
|
||||
|
||||
kill_daemons $dir TERM osd.0
|
||||
kill_daemons $dir TERM osd.0 || return 1
|
||||
ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.1 --file $dir/1.1 --force || return 1
|
||||
ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0 --force || return 1
|
||||
activate_osd $dir 0 || return 1
|
||||
@ -39,7 +39,7 @@ function TEST_import_after_merge_and_gap() {
|
||||
wait_for_clean || return 1
|
||||
|
||||
#
|
||||
kill_daemons $dir TERM osd.0
|
||||
kill_daemons $dir TERM osd.0 || return 1
|
||||
ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1
|
||||
# this will import both halves the original pg
|
||||
ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
|
||||
@ -58,7 +58,7 @@ function TEST_import_after_merge_and_gap() {
|
||||
sleep 3
|
||||
wait_for_clean || return 1
|
||||
|
||||
kill_daemons $dir TERM osd.0
|
||||
kill_daemons $dir TERM osd.0 || return 1
|
||||
|
||||
# this should fail.. 1.1 still doesn't exist
|
||||
! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
|
||||
@ -85,7 +85,7 @@ function TEST_import_after_split() {
|
||||
wait_for_clean || return 1
|
||||
rados -p foo bench 3 write -b 1024 --no-cleanup || return 1
|
||||
|
||||
kill_daemons $dir TERM osd.0
|
||||
kill_daemons $dir TERM osd.0 || return 1
|
||||
ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0 --force || return 1
|
||||
activate_osd $dir 0 || return 1
|
||||
|
||||
@ -94,7 +94,7 @@ function TEST_import_after_split() {
|
||||
while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 1 ; do sleep 1 ; done
|
||||
wait_for_clean || return 1
|
||||
|
||||
kill_daemons $dir TERM osd.0
|
||||
kill_daemons $dir TERM osd.0 || return 1
|
||||
|
||||
ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1
|
||||
|
||||
|
@ -4961,7 +4961,6 @@ bool OSD::heartbeat_reset(Connection *con)
|
||||
auto s = con->get_priv();
|
||||
if (s) {
|
||||
if (is_stopping()) {
|
||||
heartbeat_lock.Unlock();
|
||||
return true;
|
||||
}
|
||||
auto heartbeat_session = static_cast<HeartbeatSession*>(s.get());
|
||||
@ -10195,17 +10194,21 @@ void OSD::ShardedOpWQ::_process(uint32_t thread_index, heartbeat_handle_d *hb)
|
||||
auto& sdata = osd->shards[shard_index];
|
||||
ceph_assert(sdata);
|
||||
|
||||
// If all threads of shards do oncommits, there is a out-of-order problem.
|
||||
// So we choose the thread which has the smallest thread_index(thread_index < num_shards) of shard
|
||||
// to do oncommit callback.
|
||||
// If all threads of shards do oncommits, there is a out-of-order
|
||||
// problem. So we choose the thread which has the smallest
|
||||
// thread_index(thread_index < num_shards) of shard to do oncommit
|
||||
// callback.
|
||||
bool is_smallest_thread_index = thread_index < osd->num_shards;
|
||||
|
||||
// peek at spg_t
|
||||
sdata->shard_lock.Lock();
|
||||
if (sdata->pqueue->empty() &&
|
||||
!(is_smallest_thread_index && !sdata->context_queue.empty())) {
|
||||
(!is_smallest_thread_index || sdata->context_queue.empty())) {
|
||||
sdata->sdata_wait_lock.Lock();
|
||||
if (!sdata->stop_waiting) {
|
||||
if (is_smallest_thread_index && !sdata->context_queue.empty()) {
|
||||
// we raced with a context_queue addition, don't wait
|
||||
sdata->sdata_wait_lock.Unlock();
|
||||
} else if (!sdata->stop_waiting) {
|
||||
dout(20) << __func__ << " empty q, waiting" << dendl;
|
||||
osd->cct->get_heartbeat_map()->clear_timeout(hb);
|
||||
sdata->shard_lock.Unlock();
|
||||
|
Loading…
Reference in New Issue
Block a user