2006-10-05 05:39:29 +00:00
|
|
|
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
2007-06-01 19:51:31 +00:00
|
|
|
// vim: ts=8 sw=2 smarttab
|
2006-10-05 05:39:29 +00:00
|
|
|
/*
|
|
|
|
* Ceph - scalable distributed file system
|
|
|
|
*
|
|
|
|
* Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
|
|
|
|
*
|
|
|
|
* This is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License version 2.1, as published by the Free Software
|
|
|
|
* Foundation. See file COPYING.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2008-01-16 14:06:02 +00:00
|
|
|
/* Object Store Device (OSD) Monitor
|
|
|
|
*/
|
2006-10-05 05:39:29 +00:00
|
|
|
|
2010-06-12 13:04:11 +00:00
|
|
|
#ifndef CEPH_OSDMONITOR_H
|
|
|
|
#define CEPH_OSDMONITOR_H
|
2006-10-05 05:39:29 +00:00
|
|
|
|
|
|
|
#include <map>
|
|
|
|
#include <set>
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
#include "include/types.h"
|
|
|
|
#include "msg/Messenger.h"
|
|
|
|
|
|
|
|
#include "osd/OSDMap.h"
|
|
|
|
|
2007-07-01 14:43:27 +00:00
|
|
|
#include "PaxosService.h"
|
2009-09-21 21:30:19 +00:00
|
|
|
#include "Session.h"
|
2006-10-05 05:39:29 +00:00
|
|
|
|
2007-07-01 14:43:27 +00:00
|
|
|
class Monitor;
|
|
|
|
class MOSDBoot;
|
2008-02-28 18:21:48 +00:00
|
|
|
class MMonCommand;
|
2009-06-16 21:25:02 +00:00
|
|
|
class MPoolSnap;
|
2009-10-10 05:27:38 +00:00
|
|
|
class MOSDMap;
|
2012-09-04 20:04:58 +00:00
|
|
|
class MOSDFailure;
|
2006-10-05 05:39:29 +00:00
|
|
|
|
2012-09-18 21:38:47 +00:00
|
|
|
/// information about a particular peer's failure reports for one osd
|
|
|
|
struct failure_reporter_t {
|
|
|
|
int num_reports; ///< reports from this reporter
|
|
|
|
utime_t failed_since; ///< when they think it failed
|
2012-09-04 20:04:58 +00:00
|
|
|
MOSDFailure *msg; ///< most recent failure message
|
2012-09-18 21:38:47 +00:00
|
|
|
|
2012-09-04 20:04:58 +00:00
|
|
|
failure_reporter_t() : num_reports(0), msg(NULL) {}
|
|
|
|
failure_reporter_t(utime_t s) : num_reports(1), failed_since(s), msg(NULL) {}
|
2012-09-18 21:38:47 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/// information about all failure reports for one osd
|
|
|
|
struct failure_info_t {
|
|
|
|
map<int, failure_reporter_t> reporters; ///< reporter -> # reports
|
|
|
|
utime_t max_failed_since; ///< most recent failed_since
|
|
|
|
int num_reports;
|
|
|
|
|
|
|
|
failure_info_t() : num_reports(0) {}
|
|
|
|
|
2012-09-04 18:50:30 +00:00
|
|
|
utime_t get_failed_since() {
|
|
|
|
if (max_failed_since == utime_t() && reporters.size()) {
|
|
|
|
// the old max must have canceled; recalculate.
|
|
|
|
for (map<int, failure_reporter_t>::iterator p = reporters.begin();
|
|
|
|
p != reporters.end();
|
|
|
|
++p)
|
|
|
|
if (p->second.failed_since > max_failed_since)
|
|
|
|
max_failed_since = p->second.failed_since;
|
|
|
|
}
|
|
|
|
return max_failed_since;
|
|
|
|
}
|
|
|
|
|
2012-09-04 20:04:58 +00:00
|
|
|
// set the message for the latest report. return any old message we had,
|
|
|
|
// if any, so we can discard it.
|
|
|
|
MOSDFailure *add_report(int who, utime_t failed_since, MOSDFailure *msg) {
|
2012-09-18 21:38:47 +00:00
|
|
|
map<int, failure_reporter_t>::iterator p = reporters.find(who);
|
|
|
|
if (p == reporters.end()) {
|
|
|
|
if (max_failed_since == utime_t())
|
|
|
|
max_failed_since = failed_since;
|
|
|
|
else if (max_failed_since < failed_since)
|
|
|
|
max_failed_since = failed_since;
|
2012-09-04 20:04:58 +00:00
|
|
|
p = reporters.insert(map<int, failure_reporter_t>::value_type(who, failure_reporter_t(failed_since))).first;
|
2012-09-18 21:38:47 +00:00
|
|
|
} else {
|
|
|
|
p->second.num_reports++;
|
|
|
|
}
|
|
|
|
num_reports++;
|
2012-09-04 20:04:58 +00:00
|
|
|
|
|
|
|
MOSDFailure *ret = p->second.msg;
|
|
|
|
p->second.msg = msg;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void take_report_messages(list<MOSDFailure*>& ls) {
|
|
|
|
for (map<int, failure_reporter_t>::iterator p = reporters.begin();
|
|
|
|
p != reporters.end();
|
|
|
|
++p) {
|
|
|
|
if (p->second.msg) {
|
|
|
|
ls.push_back(p->second.msg);
|
|
|
|
p->second.msg = NULL;
|
|
|
|
}
|
|
|
|
}
|
2012-09-18 21:38:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void cancel_report(int who) {
|
|
|
|
map<int, failure_reporter_t>::iterator p = reporters.find(who);
|
|
|
|
if (p == reporters.end())
|
|
|
|
return;
|
|
|
|
num_reports -= p->second.num_reports;
|
|
|
|
reporters.erase(p);
|
|
|
|
if (reporters.empty())
|
|
|
|
max_failed_since = utime_t();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2007-07-01 14:43:27 +00:00
|
|
|
class OSDMonitor : public PaxosService {
|
2006-10-24 16:55:51 +00:00
|
|
|
public:
|
2006-10-11 21:36:16 +00:00
|
|
|
OSDMap osdmap;
|
2006-10-05 05:39:29 +00:00
|
|
|
|
2006-10-24 16:55:51 +00:00
|
|
|
private:
|
2009-10-10 05:27:38 +00:00
|
|
|
map<epoch_t, list<PaxosServiceMessage*> > waiting_for_map;
|
2006-10-09 19:10:44 +00:00
|
|
|
|
|
|
|
// [leader]
|
|
|
|
OSDMap::Incremental pending_inc;
|
2012-09-18 21:38:47 +00:00
|
|
|
map<int, failure_info_t> failure_info;
|
2006-10-09 19:10:44 +00:00
|
|
|
map<int,utime_t> down_pending_out; // osd down -> out
|
|
|
|
|
2007-09-28 21:07:08 +00:00
|
|
|
map<int,double> osd_weight;
|
2012-04-25 16:23:49 +00:00
|
|
|
|
|
|
|
// map thrashing
|
|
|
|
int thrash_map;
|
|
|
|
int thrash_last_up_osd;
|
|
|
|
bool thrash();
|
|
|
|
|
2007-07-01 14:43:27 +00:00
|
|
|
// svc
|
2008-03-10 23:23:41 +00:00
|
|
|
public:
|
2011-11-11 18:45:27 +00:00
|
|
|
void create_initial();
|
2008-03-10 23:23:41 +00:00
|
|
|
private:
|
2012-02-06 23:31:20 +00:00
|
|
|
void update_from_paxos();
|
2007-07-01 14:43:27 +00:00
|
|
|
void create_pending(); // prepare a new pending
|
|
|
|
void encode_pending(bufferlist &bl);
|
2011-12-08 20:53:14 +00:00
|
|
|
void on_active();
|
2006-10-09 19:10:44 +00:00
|
|
|
|
2012-08-13 17:46:30 +00:00
|
|
|
void update_msgr_features();
|
|
|
|
|
2011-11-18 17:56:10 +00:00
|
|
|
void share_map_with_random_osd();
|
2007-09-09 23:47:09 +00:00
|
|
|
|
2011-12-08 20:53:14 +00:00
|
|
|
void update_logger();
|
|
|
|
|
2009-06-23 21:03:34 +00:00
|
|
|
void handle_query(PaxosServiceMessage *m);
|
|
|
|
bool preprocess_query(PaxosServiceMessage *m); // true if processed.
|
|
|
|
bool prepare_update(PaxosServiceMessage *m);
|
2007-08-27 02:49:41 +00:00
|
|
|
bool should_propose(double &delay);
|
2006-10-09 19:10:44 +00:00
|
|
|
|
2012-04-25 18:15:34 +00:00
|
|
|
bool can_mark_down(int o);
|
|
|
|
bool can_mark_up(int o);
|
|
|
|
bool can_mark_out(int o);
|
|
|
|
bool can_mark_in(int o);
|
|
|
|
|
2007-07-01 14:43:27 +00:00
|
|
|
// ...
|
|
|
|
void send_to_waiting(); // send current map to waiters.
|
2011-10-04 20:43:25 +00:00
|
|
|
MOSDMap *build_latest_full();
|
2010-09-09 18:09:57 +00:00
|
|
|
MOSDMap *build_incremental(epoch_t first, epoch_t last);
|
2011-10-04 20:43:25 +00:00
|
|
|
void send_full(PaxosServiceMessage *m);
|
2010-09-09 18:09:57 +00:00
|
|
|
void send_incremental(PaxosServiceMessage *m, epoch_t first);
|
2011-10-04 20:43:25 +00:00
|
|
|
void send_incremental(epoch_t first, entity_inst_t& dest, bool onetime);
|
2010-07-15 18:05:16 +00:00
|
|
|
|
2010-07-30 20:16:24 +00:00
|
|
|
void remove_redundant_pg_temp();
|
2010-12-15 00:21:39 +00:00
|
|
|
int reweight_by_utilization(int oload, std::string& out_str);
|
2007-07-01 14:43:27 +00:00
|
|
|
|
|
|
|
bool preprocess_failure(class MOSDFailure *m);
|
|
|
|
bool prepare_failure(class MOSDFailure *m);
|
2012-09-04 20:04:58 +00:00
|
|
|
void _reported_failure(list<MOSDFailure*>& m);
|
2007-07-01 14:43:27 +00:00
|
|
|
|
|
|
|
bool preprocess_boot(class MOSDBoot *m);
|
|
|
|
bool prepare_boot(class MOSDBoot *m);
|
2009-04-27 17:50:37 +00:00
|
|
|
void _booted(MOSDBoot *m, bool logit);
|
2007-07-01 14:43:27 +00:00
|
|
|
|
2008-05-13 21:54:29 +00:00
|
|
|
bool preprocess_alive(class MOSDAlive *m);
|
|
|
|
bool prepare_alive(class MOSDAlive *m);
|
2009-10-10 05:27:38 +00:00
|
|
|
void _reply_map(PaxosServiceMessage *m, epoch_t e);
|
2009-08-06 23:15:48 +00:00
|
|
|
|
|
|
|
bool preprocess_pgtemp(class MOSDPGTemp *m);
|
|
|
|
bool prepare_pgtemp(class MOSDPGTemp *m);
|
2008-05-13 19:19:05 +00:00
|
|
|
|
2011-08-25 20:30:49 +00:00
|
|
|
int _prepare_remove_pool(uint64_t pool);
|
2012-06-29 21:51:32 +00:00
|
|
|
int _prepare_rename_pool(uint64_t pool, string newname);
|
2011-05-26 20:17:12 +00:00
|
|
|
|
2009-07-07 22:06:33 +00:00
|
|
|
bool preprocess_pool_op ( class MPoolOp *m);
|
2009-07-07 23:04:58 +00:00
|
|
|
bool preprocess_pool_op_create ( class MPoolOp *m);
|
2009-07-07 22:06:33 +00:00
|
|
|
bool prepare_pool_op (MPoolOp *m);
|
2009-07-07 23:04:58 +00:00
|
|
|
bool prepare_pool_op_create (MPoolOp *m);
|
2010-02-12 22:25:57 +00:00
|
|
|
bool prepare_pool_op_delete(MPoolOp *m);
|
2010-03-10 20:56:37 +00:00
|
|
|
bool prepare_pool_op_auid(MPoolOp *m);
|
2012-01-10 19:25:25 +00:00
|
|
|
int prepare_new_pool(string& name, uint64_t auid, int crush_rule,
|
|
|
|
unsigned pg_num, unsigned pgp_num);
|
2010-03-08 14:44:51 +00:00
|
|
|
int prepare_new_pool(MPoolOp *m);
|
2012-04-24 03:33:48 +00:00
|
|
|
|
|
|
|
bool prepare_set_flag(MMonCommand *m, int flag);
|
|
|
|
bool prepare_unset_flag(MMonCommand *m, int flag);
|
2010-03-10 20:56:37 +00:00
|
|
|
|
2011-07-07 21:13:14 +00:00
|
|
|
void _pool_op_reply(MPoolOp *m, int ret, epoch_t epoch, bufferlist *blp=NULL);
|
2009-06-16 21:22:32 +00:00
|
|
|
|
2008-05-13 19:19:05 +00:00
|
|
|
struct C_Booted : public Context {
|
2007-07-01 14:43:27 +00:00
|
|
|
OSDMonitor *cmon;
|
|
|
|
MOSDBoot *m;
|
2010-01-29 21:05:44 +00:00
|
|
|
bool logit;
|
|
|
|
C_Booted(OSDMonitor *cm, MOSDBoot *m_, bool l=true) :
|
|
|
|
cmon(cm), m(m_), logit(l) {}
|
2007-07-01 14:43:27 +00:00
|
|
|
void finish(int r) {
|
|
|
|
if (r >= 0)
|
2010-01-29 21:05:44 +00:00
|
|
|
cmon->_booted(m, logit);
|
2007-07-01 14:43:27 +00:00
|
|
|
else
|
2009-06-23 21:03:34 +00:00
|
|
|
cmon->dispatch((PaxosServiceMessage*)m);
|
2007-07-01 14:43:27 +00:00
|
|
|
}
|
|
|
|
};
|
2009-06-23 21:03:34 +00:00
|
|
|
|
2009-08-06 23:15:48 +00:00
|
|
|
struct C_ReplyMap : public Context {
|
2008-05-13 19:19:05 +00:00
|
|
|
OSDMonitor *osdmon;
|
2009-10-10 05:27:38 +00:00
|
|
|
PaxosServiceMessage *m;
|
2009-08-06 23:15:48 +00:00
|
|
|
epoch_t e;
|
2009-10-10 05:27:38 +00:00
|
|
|
C_ReplyMap(OSDMonitor *o, PaxosServiceMessage *mm, epoch_t ee) : osdmon(o), m(mm), e(ee) {}
|
2008-05-13 19:19:05 +00:00
|
|
|
void finish(int r) {
|
2009-08-06 23:15:48 +00:00
|
|
|
osdmon->_reply_map(m, e);
|
2008-05-13 19:19:05 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
struct C_Reported : public Context {
|
2007-07-01 14:43:27 +00:00
|
|
|
OSDMonitor *cmon;
|
2012-09-04 20:04:58 +00:00
|
|
|
list<MOSDFailure*> msgs;
|
|
|
|
C_Reported(OSDMonitor *cm, list<MOSDFailure*>& m_)
|
|
|
|
: cmon(cm) {
|
|
|
|
msgs.swap(m_);
|
|
|
|
}
|
2007-07-01 14:43:27 +00:00
|
|
|
void finish(int r) {
|
|
|
|
if (r >= 0)
|
2012-09-04 20:04:58 +00:00
|
|
|
cmon->_reported_failure(msgs);
|
|
|
|
else {
|
|
|
|
while (!msgs.empty()) {
|
|
|
|
cmon->dispatch((PaxosServiceMessage*)msgs.front());
|
|
|
|
msgs.pop_front();
|
|
|
|
}
|
|
|
|
}
|
2007-07-01 14:43:27 +00:00
|
|
|
}
|
|
|
|
};
|
2009-07-07 22:06:33 +00:00
|
|
|
struct C_PoolOp : public Context {
|
2009-06-16 21:22:32 +00:00
|
|
|
OSDMonitor *osdmon;
|
2009-07-07 22:06:33 +00:00
|
|
|
MPoolOp *m;
|
2009-06-16 21:22:32 +00:00
|
|
|
int replyCode;
|
|
|
|
int epoch;
|
2010-04-09 23:53:13 +00:00
|
|
|
bufferlist *reply_data;
|
|
|
|
C_PoolOp(OSDMonitor * osd, MPoolOp *m_, int rc, int e, bufferlist *rd=NULL) :
|
|
|
|
osdmon(osd), m(m_), replyCode(rc), epoch(e), reply_data(rd) {}
|
2009-06-16 21:22:32 +00:00
|
|
|
void finish(int r) {
|
2011-07-07 21:13:14 +00:00
|
|
|
osdmon->_pool_op_reply(m, replyCode, epoch, reply_data);
|
2009-06-16 21:22:32 +00:00
|
|
|
}
|
|
|
|
};
|
2007-07-01 14:43:27 +00:00
|
|
|
|
2008-08-07 21:21:16 +00:00
|
|
|
bool preprocess_remove_snaps(class MRemoveSnaps *m);
|
|
|
|
bool prepare_remove_snaps(class MRemoveSnaps *m);
|
|
|
|
|
2006-10-05 05:39:29 +00:00
|
|
|
public:
|
2011-10-14 04:26:13 +00:00
|
|
|
OSDMonitor(Monitor *mn, Paxos *p);
|
2006-10-05 05:39:29 +00:00
|
|
|
|
|
|
|
void tick(); // check state, take actions
|
|
|
|
|
2012-09-10 22:45:50 +00:00
|
|
|
int parse_osd_id(const char *s, stringstream *pss);
|
2012-09-11 18:35:20 +00:00
|
|
|
void parse_loc_map(const vector<string>& args, int start, map<string,string> *ploc);
|
2012-09-10 22:45:50 +00:00
|
|
|
|
2012-03-07 04:55:11 +00:00
|
|
|
void get_health(list<pair<health_status_t,string> >& summary,
|
|
|
|
list<pair<health_status_t,string> > *detail) const;
|
2008-02-28 18:21:48 +00:00
|
|
|
bool preprocess_command(MMonCommand *m);
|
|
|
|
bool prepare_command(MMonCommand *m);
|
2007-12-19 04:53:48 +00:00
|
|
|
|
2011-01-06 21:33:07 +00:00
|
|
|
void handle_osd_timeouts(const utime_t &now,
|
2012-04-24 17:55:18 +00:00
|
|
|
std::map<int,utime_t> &last_osd_report);
|
2007-02-26 00:17:32 +00:00
|
|
|
void mark_all_down();
|
|
|
|
|
2009-10-10 05:27:38 +00:00
|
|
|
void send_latest(PaxosServiceMessage *m, epoch_t start=0);
|
2009-11-06 21:54:53 +00:00
|
|
|
void send_latest_now_nodelete(PaxosServiceMessage *m, epoch_t start=0) {
|
|
|
|
send_incremental(m, start);
|
|
|
|
}
|
2007-05-16 21:53:22 +00:00
|
|
|
|
2010-11-04 05:28:54 +00:00
|
|
|
epoch_t blacklist(entity_addr_t a, utime_t until);
|
2008-12-16 18:41:55 +00:00
|
|
|
|
2012-08-21 21:22:20 +00:00
|
|
|
void dump_info(Formatter *f);
|
|
|
|
|
2009-08-28 23:48:09 +00:00
|
|
|
void check_subs();
|
2009-09-21 21:30:19 +00:00
|
|
|
void check_sub(Subscription *sub);
|
2009-08-28 23:48:09 +00:00
|
|
|
|
2009-10-22 05:48:12 +00:00
|
|
|
void add_flag(int flag) {
|
2009-10-23 00:15:20 +00:00
|
|
|
if (!(osdmap.flags & flag)) {
|
|
|
|
if (pending_inc.new_flags < 0)
|
|
|
|
pending_inc.new_flags = osdmap.flags;
|
|
|
|
pending_inc.new_flags |= flag;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void remove_flag(int flag) {
|
|
|
|
if(osdmap.flags & flag) {
|
|
|
|
if (pending_inc.new_flags < 0)
|
|
|
|
pending_inc.new_flags = osdmap.flags;
|
|
|
|
pending_inc.new_flags &= ~flag;
|
|
|
|
}
|
2009-10-22 05:48:12 +00:00
|
|
|
}
|
2006-10-05 05:39:29 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
#endif
|