From 136dfdeb7014f391a89c982536ae28f885e5a037 Mon Sep 17 00:00:00 2001 From: Colin Patrick McCabe Date: Tue, 23 Nov 2010 14:30:06 -0800 Subject: [PATCH] osd: don't mark objs as lost unless we're active We don't have enough information to mark objects as lost until we activate the PG. might_have_unfound isn't even built until PG::activate. Signed-off-by: Colin McCabe --- src/osd/OSD.cc | 97 +++++++++++++++++++++++------------------ src/test/test_common.sh | 2 +- src/test/test_lost.sh | 6 ++- 3 files changed, 61 insertions(+), 44 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 90af2959d6f..744e8a75e41 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1880,49 +1880,62 @@ void OSD::handle_command(MMonCommand *m) ss << g_conf.name << " stopped profiler"; logclient.log(LOG_INFO, ss); } - else if (m->cmd.size() == 2 && m->cmd[0] == "dump_missing") { - const string &file_name(m->cmd[1]); - std::ofstream fout(file_name.c_str()); - if (!fout.is_open()) { - stringstream ss; - ss << "failed to open file '" << file_name << "'"; - logclient.log(LOG_INFO, ss); - goto done; - } - - std::set keys; - for (hash_map::const_iterator pg_map_e = pg_map.begin(); - pg_map_e != pg_map.end(); ++pg_map_e) { - keys.insert(pg_map_e->first); - } - - fout << "*** osd " << whoami << ": dump_missing ***" << std::endl; - for (std::set ::iterator p = keys.begin(); - p != keys.end(); ++p) { - hash_map::iterator q = pg_map.find(*p); - assert(q != pg_map.end()); - PG *pg = q->second; - pg->lock(); - - fout << *pg << std::endl; - std::map::iterator mend = pg->missing.missing.end(); - std::map::iterator m = pg->missing.missing.begin(); - for (; m != mend; ++m) { - fout << m->first << " -> " << m->second << std::endl; - map >::const_iterator mli = - pg->missing_loc.find(m->first); - if (mli == pg->missing_loc.end()) - continue; - const set &mls(mli->second); - if (mls.empty()) - continue; - fout << "missing_loc: " << mls << std::endl; + else if (m->cmd.size() > 1 && m->cmd[0] == "debug") { + if (m->cmd.size() == 3 && m->cmd[1] == "dump_missing") { + const string &file_name(m->cmd[2]); + std::ofstream fout(file_name.c_str()); + if (!fout.is_open()) { + stringstream ss; + ss << "failed to open file '" << file_name << "'"; + logclient.log(LOG_INFO, ss); + goto done; } - pg->unlock(); - fout << std::endl; - } - fout.close(); + std::set keys; + for (hash_map::const_iterator pg_map_e = pg_map.begin(); + pg_map_e != pg_map.end(); ++pg_map_e) { + keys.insert(pg_map_e->first); + } + + fout << "*** osd " << whoami << ": dump_missing ***" << std::endl; + for (std::set ::iterator p = keys.begin(); + p != keys.end(); ++p) { + hash_map::iterator q = pg_map.find(*p); + assert(q != pg_map.end()); + PG *pg = q->second; + pg->lock(); + + fout << *pg << std::endl; + std::map::iterator mend = pg->missing.missing.end(); + std::map::iterator m = pg->missing.missing.begin(); + for (; m != mend; ++m) { + fout << m->first << " -> " << m->second << std::endl; + map >::const_iterator mli = + pg->missing_loc.find(m->first); + if (mli == pg->missing_loc.end()) + continue; + const set &mls(mli->second); + if (mls.empty()) + continue; + fout << "missing_loc: " << mls << std::endl; + } + pg->unlock(); + fout << std::endl; + } + + fout.close(); + } + else if (m->cmd.size() == 3 && m->cmd[1] == "kick_recovery_wq") { + g_conf.osd_recovery_delay_start = atoi(m->cmd[2].c_str()); + stringstream ss; + ss << "kicking recovery queue. set osd_recovery_delay_start to " + << g_conf.osd_recovery_delay_start; + logclient.log(LOG_INFO, ss); + + defer_recovery_until = g_clock.now(); + defer_recovery_until += g_conf.osd_recovery_delay_start; + recovery_wq._kick(); + } } else dout(0) << "unrecognized command! " << m->cmd << dendl; @@ -3079,7 +3092,7 @@ void OSD::activate_map(ObjectStore::Transaction& t, list& tfin) if (g_conf.osd_check_for_log_corruption) pg->check_log_for_corruption(store); - if (pg->is_primary() && + if (pg->is_active() && pg->is_primary() && (pg->missing.num_missing() > pg->missing_loc.size())) { if (pg->all_unfound_are_lost(osdmap)) { pg->mark_all_unfound_as_lost(); diff --git a/src/test/test_common.sh b/src/test/test_common.sh index 0c437dc9e02..f34fda5ffd3 100755 --- a/src/test/test_common.sh +++ b/src/test/test_common.sh @@ -130,7 +130,7 @@ start_recovery() { CEPH_NUM_OSD=$1 osd=0 while [ $osd -lt $CEPH_NUM_OSD ]; do - ./ceph osd tell $osd injectargs 'osd recovery delay start = 0' + ./ceph osd tell $osd debug kick_recovery_wq 0 osd=$((osd+1)) done } diff --git a/src/test/test_lost.sh b/src/test/test_lost.sh index 6279dcc7977..c93f1a656a5 100755 --- a/src/test/test_lost.sh +++ b/src/test/test_lost.sh @@ -9,7 +9,7 @@ source "`dirname $0`/test_common.sh" # Functions my_write_objects() { - write_objects $1 $2 10 1000000 + write_objects $1 $2 200 4000 } setup() { @@ -49,6 +49,10 @@ recovery1_impl() { [ $? -eq 1 ] || die "Failed to see unfound objects." echo "Got unfound objects." + restart_osd 0 + sleep 20 + start_recovery 2 + # Turn on recovery and wait for it to complete. poll_cmd "./ceph pg debug unfound_objects_exist" FALSE 3 120 [ $? -eq 1 ] || die "Failed to recover unfound objects."