osd: don't mark objs as lost unless we're active

We don't have enough information to mark objects as lost until we
activate the PG. might_have_unfound isn't even built until PG::activate.

Signed-off-by: Colin McCabe <colinm@hq.newdream.net>
This commit is contained in:
Colin Patrick McCabe 2010-11-23 14:30:06 -08:00
parent c0e60afea5
commit 136dfdeb70
3 changed files with 61 additions and 44 deletions

View File

@ -1880,49 +1880,62 @@ void OSD::handle_command(MMonCommand *m)
ss << g_conf.name << " stopped profiler";
logclient.log(LOG_INFO, ss);
}
else if (m->cmd.size() == 2 && m->cmd[0] == "dump_missing") {
const string &file_name(m->cmd[1]);
std::ofstream fout(file_name.c_str());
if (!fout.is_open()) {
stringstream ss;
ss << "failed to open file '" << file_name << "'";
logclient.log(LOG_INFO, ss);
goto done;
}
std::set <pg_t> keys;
for (hash_map<pg_t, PG*>::const_iterator pg_map_e = pg_map.begin();
pg_map_e != pg_map.end(); ++pg_map_e) {
keys.insert(pg_map_e->first);
}
fout << "*** osd " << whoami << ": dump_missing ***" << std::endl;
for (std::set <pg_t>::iterator p = keys.begin();
p != keys.end(); ++p) {
hash_map<pg_t, PG*>::iterator q = pg_map.find(*p);
assert(q != pg_map.end());
PG *pg = q->second;
pg->lock();
fout << *pg << std::endl;
std::map<sobject_t, PG::Missing::item>::iterator mend = pg->missing.missing.end();
std::map<sobject_t, PG::Missing::item>::iterator m = pg->missing.missing.begin();
for (; m != mend; ++m) {
fout << m->first << " -> " << m->second << std::endl;
map<sobject_t, set<int> >::const_iterator mli =
pg->missing_loc.find(m->first);
if (mli == pg->missing_loc.end())
continue;
const set<int> &mls(mli->second);
if (mls.empty())
continue;
fout << "missing_loc: " << mls << std::endl;
else if (m->cmd.size() > 1 && m->cmd[0] == "debug") {
if (m->cmd.size() == 3 && m->cmd[1] == "dump_missing") {
const string &file_name(m->cmd[2]);
std::ofstream fout(file_name.c_str());
if (!fout.is_open()) {
stringstream ss;
ss << "failed to open file '" << file_name << "'";
logclient.log(LOG_INFO, ss);
goto done;
}
pg->unlock();
fout << std::endl;
}
fout.close();
std::set <pg_t> keys;
for (hash_map<pg_t, PG*>::const_iterator pg_map_e = pg_map.begin();
pg_map_e != pg_map.end(); ++pg_map_e) {
keys.insert(pg_map_e->first);
}
fout << "*** osd " << whoami << ": dump_missing ***" << std::endl;
for (std::set <pg_t>::iterator p = keys.begin();
p != keys.end(); ++p) {
hash_map<pg_t, PG*>::iterator q = pg_map.find(*p);
assert(q != pg_map.end());
PG *pg = q->second;
pg->lock();
fout << *pg << std::endl;
std::map<sobject_t, PG::Missing::item>::iterator mend = pg->missing.missing.end();
std::map<sobject_t, PG::Missing::item>::iterator m = pg->missing.missing.begin();
for (; m != mend; ++m) {
fout << m->first << " -> " << m->second << std::endl;
map<sobject_t, set<int> >::const_iterator mli =
pg->missing_loc.find(m->first);
if (mli == pg->missing_loc.end())
continue;
const set<int> &mls(mli->second);
if (mls.empty())
continue;
fout << "missing_loc: " << mls << std::endl;
}
pg->unlock();
fout << std::endl;
}
fout.close();
}
else if (m->cmd.size() == 3 && m->cmd[1] == "kick_recovery_wq") {
g_conf.osd_recovery_delay_start = atoi(m->cmd[2].c_str());
stringstream ss;
ss << "kicking recovery queue. set osd_recovery_delay_start to "
<< g_conf.osd_recovery_delay_start;
logclient.log(LOG_INFO, ss);
defer_recovery_until = g_clock.now();
defer_recovery_until += g_conf.osd_recovery_delay_start;
recovery_wq._kick();
}
}
else dout(0) << "unrecognized command! " << m->cmd << dendl;
@ -3079,7 +3092,7 @@ void OSD::activate_map(ObjectStore::Transaction& t, list<Context*>& tfin)
if (g_conf.osd_check_for_log_corruption)
pg->check_log_for_corruption(store);
if (pg->is_primary() &&
if (pg->is_active() && pg->is_primary() &&
(pg->missing.num_missing() > pg->missing_loc.size())) {
if (pg->all_unfound_are_lost(osdmap)) {
pg->mark_all_unfound_as_lost();

View File

@ -130,7 +130,7 @@ start_recovery() {
CEPH_NUM_OSD=$1
osd=0
while [ $osd -lt $CEPH_NUM_OSD ]; do
./ceph osd tell $osd injectargs 'osd recovery delay start = 0'
./ceph osd tell $osd debug kick_recovery_wq 0
osd=$((osd+1))
done
}

View File

@ -9,7 +9,7 @@ source "`dirname $0`/test_common.sh"
# Functions
my_write_objects() {
write_objects $1 $2 10 1000000
write_objects $1 $2 200 4000
}
setup() {
@ -49,6 +49,10 @@ recovery1_impl() {
[ $? -eq 1 ] || die "Failed to see unfound objects."
echo "Got unfound objects."
restart_osd 0
sleep 20
start_recovery 2
# Turn on recovery and wait for it to complete.
poll_cmd "./ceph pg debug unfound_objects_exist" FALSE 3 120
[ $? -eq 1 ] || die "Failed to recover unfound objects."