mirror of
https://github.com/ceph/ceph
synced 2025-02-08 19:38:47 +00:00
git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@2100 29311d96-e01e-0410-9327-a35deaab8ce9
406 lines
8.1 KiB
C++
406 lines
8.1 KiB
C++
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
|
// vim: ts=8 sw=2 smarttab
|
|
/*
|
|
* Ceph - scalable distributed file system
|
|
*
|
|
* Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
|
|
*
|
|
* This is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License version 2.1, as published by the Free Software
|
|
* Foundation. See file COPYING.
|
|
*
|
|
*/
|
|
|
|
// TODO: missing run() method, which creates the two main timers, refreshTimer and readTimer
|
|
|
|
#include "Monitor.h"
|
|
|
|
#include "osd/OSDMap.h"
|
|
|
|
#include "MonitorStore.h"
|
|
|
|
#include "msg/Message.h"
|
|
#include "msg/Messenger.h"
|
|
|
|
#include "messages/MPing.h"
|
|
#include "messages/MPingAck.h"
|
|
#include "messages/MGenericMessage.h"
|
|
#include "messages/MMonCommand.h"
|
|
#include "messages/MMonCommandAck.h"
|
|
|
|
#include "messages/MMonPaxos.h"
|
|
|
|
#include "common/Timer.h"
|
|
#include "common/Clock.h"
|
|
|
|
#include "OSDMonitor.h"
|
|
#include "MDSMonitor.h"
|
|
#include "ClientMonitor.h"
|
|
#include "PGMonitor.h"
|
|
|
|
#include "config.h"
|
|
|
|
#define dout(l) if (l<=g_conf.debug || l<=g_conf.debug_mon) *_dout << dbeginl << g_clock.now() << " mon" << whoami << (is_starting() ? (const char*)"(starting)":(is_leader() ? (const char*)"(leader)":(is_peon() ? (const char*)"(peon)":(const char*)"(?\?)"))) << " "
|
|
#define derr(l) if (l<=g_conf.debug || l<=g_conf.debug_mon) *_derr << dbeginl << g_clock.now() << " mon" << whoami << (is_starting() ? (const char*)"(starting)":(is_leader() ? (const char*)"(leader)":(is_peon() ? (const char*)"(peon)":(const char*)"(?\?)"))) << " "
|
|
|
|
|
|
|
|
void Monitor::init()
|
|
{
|
|
lock.Lock();
|
|
|
|
dout(1) << "init" << dendl;
|
|
|
|
// store
|
|
char s[80];
|
|
sprintf(s, "mondata/mon%d", whoami);
|
|
store = new MonitorStore(s);
|
|
|
|
if (g_conf.mkfs)
|
|
store->mkfs();
|
|
|
|
store->mount();
|
|
|
|
// create
|
|
osdmon = new OSDMonitor(this, &paxos_osdmap);
|
|
mdsmon = new MDSMonitor(this, &paxos_mdsmap);
|
|
clientmon = new ClientMonitor(this, &paxos_clientmap);
|
|
pgmon = new PGMonitor(this, &paxos_pgmap);
|
|
|
|
// init paxos
|
|
paxos_test.init();
|
|
paxos_osdmap.init();
|
|
paxos_mdsmap.init();
|
|
paxos_clientmap.init();
|
|
paxos_pgmap.init();
|
|
|
|
// i'm ready!
|
|
messenger->set_dispatcher(this);
|
|
|
|
// start ticker
|
|
reset_tick();
|
|
|
|
// call election?
|
|
if (monmap->size() > 1) {
|
|
assert(monmap->size() != 2);
|
|
call_election();
|
|
} else {
|
|
// we're standalone.
|
|
set<int> q;
|
|
q.insert(whoami);
|
|
win_election(1, q);
|
|
}
|
|
|
|
lock.Unlock();
|
|
}
|
|
|
|
void Monitor::shutdown()
|
|
{
|
|
dout(1) << "shutdown" << dendl;
|
|
|
|
elector.shutdown();
|
|
|
|
if (is_leader()) {
|
|
// stop osds.
|
|
set<int32_t> ls;
|
|
osdmon->osdmap.get_all_osds(ls);
|
|
for (set<int32_t>::iterator it = ls.begin(); it != ls.end(); it++) {
|
|
if (osdmon->osdmap.is_down(*it)) continue;
|
|
dout(10) << "sending shutdown to osd" << *it << dendl;
|
|
messenger->send_message(new MGenericMessage(CEPH_MSG_SHUTDOWN),
|
|
osdmon->osdmap.get_inst(*it));
|
|
}
|
|
osdmon->mark_all_down();
|
|
|
|
// monitors too.
|
|
for (unsigned i=0; i<monmap->size(); i++)
|
|
if ((int)i != whoami)
|
|
messenger->send_message(new MGenericMessage(CEPH_MSG_SHUTDOWN),
|
|
monmap->get_inst(i));
|
|
}
|
|
|
|
// cancel all events
|
|
cancel_tick();
|
|
timer.cancel_all();
|
|
timer.join();
|
|
|
|
// unmount my local storage
|
|
if (store)
|
|
delete store;
|
|
|
|
// clean up
|
|
if (osdmon) delete osdmon;
|
|
if (mdsmon) delete mdsmon;
|
|
if (clientmon) delete clientmon;
|
|
if (pgmon) delete pgmon;
|
|
|
|
// die.
|
|
messenger->shutdown();
|
|
}
|
|
|
|
|
|
void Monitor::call_election()
|
|
{
|
|
if (monmap->size() == 1) return;
|
|
|
|
dout(10) << "call_election" << dendl;
|
|
state = STATE_STARTING;
|
|
|
|
// tell paxos
|
|
paxos_test.election_starting();
|
|
paxos_mdsmap.election_starting();
|
|
paxos_osdmap.election_starting();
|
|
paxos_clientmap.election_starting();
|
|
|
|
// call a new election
|
|
elector.call_election();
|
|
}
|
|
|
|
void Monitor::win_election(epoch_t epoch, set<int>& active)
|
|
{
|
|
state = STATE_LEADER;
|
|
leader = whoami;
|
|
mon_epoch = epoch;
|
|
quorum = active;
|
|
dout(10) << "win_election, epoch " << mon_epoch << " quorum is " << quorum << dendl;
|
|
|
|
// init paxos
|
|
paxos_test.leader_init();
|
|
paxos_mdsmap.leader_init();
|
|
paxos_osdmap.leader_init();
|
|
paxos_clientmap.leader_init();
|
|
paxos_pgmap.leader_init();
|
|
|
|
// init
|
|
osdmon->election_finished();
|
|
mdsmon->election_finished();
|
|
clientmon->election_finished();
|
|
pgmon->election_finished();
|
|
}
|
|
|
|
void Monitor::lose_election(epoch_t epoch, int l)
|
|
{
|
|
state = STATE_PEON;
|
|
mon_epoch = epoch;
|
|
leader = l;
|
|
dout(10) << "lose_election, epoch " << mon_epoch << " leader is mon" << leader << dendl;
|
|
|
|
// init paxos
|
|
paxos_test.peon_init();
|
|
paxos_mdsmap.peon_init();
|
|
paxos_osdmap.peon_init();
|
|
paxos_clientmap.peon_init();
|
|
paxos_pgmap.peon_init();
|
|
|
|
// init
|
|
osdmon->election_finished();
|
|
mdsmon->election_finished();
|
|
clientmon->election_finished();
|
|
pgmon->election_finished();
|
|
}
|
|
|
|
|
|
void Monitor::handle_command(MMonCommand *m)
|
|
{
|
|
dout(0) << "handle_command " << *m << dendl;
|
|
|
|
int r = -1;
|
|
string rs = "unrecognized command";
|
|
|
|
if (!m->cmd.empty()) {
|
|
if (m->cmd[0] == "stop") {
|
|
r = 0;
|
|
rs = "stopping";
|
|
do_stop();
|
|
}
|
|
else if (m->cmd[0] == "mds") {
|
|
mdsmon->dispatch(m);
|
|
return;
|
|
}
|
|
else if (m->cmd[0] == "osd") {
|
|
|
|
}
|
|
}
|
|
|
|
// reply
|
|
messenger->send_message(new MMonCommandAck(r, rs), m->get_source_inst());
|
|
delete m;
|
|
}
|
|
|
|
|
|
void Monitor::do_stop()
|
|
{
|
|
dout(0) << "do_stop -- shutting down" << dendl;
|
|
stopping = true;
|
|
mdsmon->do_stop();
|
|
}
|
|
|
|
|
|
void Monitor::dispatch(Message *m)
|
|
{
|
|
lock.Lock();
|
|
{
|
|
switch (m->get_type()) {
|
|
|
|
// misc
|
|
case CEPH_MSG_PING_ACK:
|
|
handle_ping_ack((MPingAck*)m);
|
|
break;
|
|
|
|
case CEPH_MSG_SHUTDOWN:
|
|
if (m->get_source().is_osd())
|
|
osdmon->dispatch(m);
|
|
else
|
|
handle_shutdown(m);
|
|
break;
|
|
|
|
case MSG_MON_COMMAND:
|
|
handle_command((MMonCommand*)m);
|
|
break;
|
|
|
|
|
|
// OSDs
|
|
case CEPH_MSG_OSD_GETMAP:
|
|
case MSG_OSD_FAILURE:
|
|
case MSG_OSD_BOOT:
|
|
case MSG_OSD_IN:
|
|
case MSG_OSD_OUT:
|
|
osdmon->dispatch(m);
|
|
break;
|
|
|
|
|
|
// MDSs
|
|
case MSG_MDS_BEACON:
|
|
case CEPH_MSG_MDS_GETMAP:
|
|
mdsmon->dispatch(m);
|
|
break;
|
|
|
|
// clients
|
|
case CEPH_MSG_CLIENT_MOUNT:
|
|
case CEPH_MSG_CLIENT_UNMOUNT:
|
|
clientmon->dispatch(m);
|
|
break;
|
|
|
|
// pg
|
|
case CEPH_MSG_STATFS:
|
|
case MSG_PGSTATS:
|
|
pgmon->dispatch(m);
|
|
break;
|
|
|
|
|
|
// paxos
|
|
case MSG_MON_PAXOS:
|
|
{
|
|
MMonPaxos *pm = (MMonPaxos*)m;
|
|
|
|
// sanitize
|
|
if (pm->epoch > mon_epoch)
|
|
call_election();
|
|
if (pm->epoch != mon_epoch) {
|
|
delete pm;
|
|
break;
|
|
}
|
|
|
|
// send it to the right paxos instance
|
|
switch (pm->machine_id) {
|
|
case PAXOS_TEST:
|
|
paxos_test.dispatch(m);
|
|
break;
|
|
case PAXOS_OSDMAP:
|
|
paxos_osdmap.dispatch(m);
|
|
break;
|
|
case PAXOS_MDSMAP:
|
|
paxos_mdsmap.dispatch(m);
|
|
break;
|
|
case PAXOS_CLIENTMAP:
|
|
paxos_clientmap.dispatch(m);
|
|
break;
|
|
default:
|
|
assert(0);
|
|
}
|
|
}
|
|
break;
|
|
|
|
// elector messages
|
|
case MSG_MON_ELECTION:
|
|
elector.dispatch(m);
|
|
break;
|
|
|
|
|
|
default:
|
|
dout(0) << "unknown message " << m << " " << *m << " from " << m->get_source_inst() << dendl;
|
|
assert(0);
|
|
}
|
|
}
|
|
lock.Unlock();
|
|
}
|
|
|
|
|
|
void Monitor::handle_shutdown(Message *m)
|
|
{
|
|
assert(m->get_source().is_mon());
|
|
if (m->get_source().num() == get_leader()) {
|
|
dout(1) << "shutdown from leader " << m->get_source() << dendl;
|
|
shutdown();
|
|
} else {
|
|
dout(1) << "ignoring shutdown from non-leader " << m->get_source() << dendl;
|
|
}
|
|
delete m;
|
|
}
|
|
|
|
void Monitor::handle_ping_ack(MPingAck *m)
|
|
{
|
|
// ...
|
|
|
|
delete m;
|
|
}
|
|
|
|
|
|
|
|
|
|
/************ TICK ***************/
|
|
|
|
class C_Mon_Tick : public Context {
|
|
Monitor *mon;
|
|
public:
|
|
C_Mon_Tick(Monitor *m) : mon(m) {}
|
|
void finish(int r) {
|
|
mon->tick();
|
|
}
|
|
};
|
|
|
|
void Monitor::cancel_tick()
|
|
{
|
|
if (tick_timer) timer.cancel_event(tick_timer);
|
|
}
|
|
|
|
void Monitor::reset_tick()
|
|
{
|
|
cancel_tick();
|
|
tick_timer = new C_Mon_Tick(this);
|
|
timer.add_event_after(g_conf.mon_tick_interval, tick_timer);
|
|
}
|
|
|
|
|
|
void Monitor::tick()
|
|
{
|
|
tick_timer = 0;
|
|
|
|
// ok go.
|
|
dout(11) << "tick" << dendl;
|
|
|
|
osdmon->tick();
|
|
mdsmon->tick();
|
|
|
|
// next tick!
|
|
reset_tick();
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|