net: decrease trigger turnaround time

This commit is contained in:
Thomas Schoebel-Theuer 2013-05-11 22:52:11 +02:00 committed by Thomas Schoebel-Theuer
parent 0e15b38457
commit e3c10d31a9
4 changed files with 108 additions and 57 deletions

View File

@ -165,13 +165,6 @@ config MARS_SYNC_FLIP_INTERVAL
the time interval to increase throughput in favour the time interval to increase throughput in favour
of latency. of latency.
config MARS_FAST_TRIGGER
bool "fast internal triggering"
depends on MARS
default y
---help---
Normally ON. Switch off in case of endless trigger loops
config MARS_NETIO_TIMEOUT config MARS_NETIO_TIMEOUT
int "timeout for remote IO operations (in seconds)" int "timeout for remote IO operations (in seconds)"
depends on MARS depends on MARS

View File

@ -319,11 +319,12 @@ int handler_thread(void *data)
status = -EPROTO; status = -EPROTO;
switch (cmd.cmd_code & CMD_FLAG_MASK) { switch (cmd.cmd_code & CMD_FLAG_MASK) {
case CMD_NOP: case CMD_NOP:
MARS_DBG("#%d got NOP operation\n", sock->s_debug_nr);
status = 0; status = 0;
MARS_DBG("#%d got NOP operation\n", sock->s_debug_nr);
break; break;
case CMD_NOTIFY: case CMD_NOTIFY:
mars_trigger(); status = 0;
from_remote_trigger();
break; break;
case CMD_GETINFO: case CMD_GETINFO:
{ {
@ -721,7 +722,7 @@ static int _server_thread(void *data)
MARS_DBG("kill server sio bricks (when possible) = %d\n", status); MARS_DBG("kill server sio bricks (when possible) = %d\n", status);
if (!mars_global || !mars_global->global_power.button) { if (!mars_global || !mars_global->global_power.button) {
brick_msleep(2000); brick_msleep(1000);
continue; continue;
} }
@ -731,7 +732,7 @@ static int _server_thread(void *data)
if (status == -EAGAIN) if (status == -EAGAIN)
continue; // without error message continue; // without error message
MARS_WRN("accept status = %d\n", status); MARS_WRN("accept status = %d\n", status);
brick_msleep(2000); brick_msleep(1000);
continue; continue;
} }
handler_socket.s_shutdown_on_err = true; handler_socket.s_shutdown_on_err = true;
@ -781,7 +782,7 @@ static int _server_thread(void *data)
brick = NULL; brick = NULL;
atomic_dec(&server_handler_count); atomic_dec(&server_handler_count);
} }
brick_msleep(3000); brick_msleep(2000);
} }
MARS_INF("-------- cleaning up ----------\n"); MARS_INF("-------- cleaning up ----------\n");

View File

@ -1011,10 +1011,8 @@ void _make_new_replaylink(struct mars_rotate *rot, char *new_host, int new_seque
_update_replay_link(rot, &inf); _update_replay_link(rot, &inf);
_update_version_link(rot, &inf); _update_version_link(rot, &inf);
#ifdef CONFIG_MARS_FAST_TRIGGER
mars_trigger(); mars_trigger();
mars_remote_trigger(); mars_remote_trigger();
#endif
} }
static static
@ -1238,6 +1236,12 @@ done:
// remote workers // remote workers
static
DEFINE_SPINLOCK(peer_lock);
static
struct list_head peer_anchor = LIST_HEAD_INIT(peer_anchor);
struct mars_peerinfo { struct mars_peerinfo {
struct mars_global *global; struct mars_global *global;
char *peer; char *peer;
@ -1245,8 +1249,11 @@ struct mars_peerinfo {
struct mars_socket socket; struct mars_socket socket;
struct task_struct *peer_thread; struct task_struct *peer_thread;
spinlock_t lock; spinlock_t lock;
struct list_head peer_head;
struct list_head remote_dent_list; struct list_head remote_dent_list;
int maxdepth; int maxdepth;
bool to_remote_trigger;
bool from_remote_trigger;
}; };
static static
@ -1492,12 +1499,12 @@ int run_bones(struct mars_peerinfo *peer)
run_trigger = true; run_trigger = true;
//MARS_DBG("path = '%s' worker status = %d\n", remote_dent->d_path, status); //MARS_DBG("path = '%s' worker status = %d\n", remote_dent->d_path, status);
} }
mars_free_dent_all(NULL, &tmp_list); mars_free_dent_all(NULL, &tmp_list);
#ifdef CONFIG_MARS_FAST_TRIGGER
if (run_trigger) { if (run_trigger) {
mars_trigger(); mars_trigger();
} }
#endif
return status; return status;
} }
@ -1517,8 +1524,6 @@ void _peer_cleanup(struct mars_peerinfo *peer)
} }
static DECLARE_WAIT_QUEUE_HEAD(remote_event); static DECLARE_WAIT_QUEUE_HEAD(remote_event);
static atomic_t remote_trigger_count = ATOMIC_INIT(0);
static atomic_t peer_thread_count = ATOMIC_INIT(0);
static static
int peer_thread(void *data) int peer_thread(void *data)
@ -1528,7 +1533,6 @@ int peer_thread(void *data)
struct sockaddr_storage sockaddr = {}; struct sockaddr_storage sockaddr = {};
int pause_time = 0; int pause_time = 0;
bool do_kill = false; bool do_kill = false;
bool flip = false;
int status; int status;
if (!peer) if (!peer)
@ -1543,14 +1547,11 @@ int peer_thread(void *data)
goto done; goto done;
} }
atomic_inc(&peer_thread_count);
while (!brick_thread_should_stop()) { while (!brick_thread_should_stop()) {
LIST_HEAD(tmp_list); LIST_HEAD(tmp_list);
LIST_HEAD(old_list); LIST_HEAD(old_list);
unsigned long flags; unsigned long flags;
struct mars_cmd cmd = { struct mars_cmd cmd = {
.cmd_code = CMD_GETENTS,
.cmd_str1 = peer->path, .cmd_str1 = peer->path,
.cmd_int1 = peer->maxdepth, .cmd_int1 = peer->maxdepth,
}; };
@ -1559,7 +1560,7 @@ int peer_thread(void *data)
if (do_kill) { if (do_kill) {
do_kill = false; do_kill = false;
_peer_cleanup(peer); _peer_cleanup(peer);
brick_msleep(5000); brick_msleep(1000);
continue; continue;
} }
if (!mars_net_is_alive) { if (!mars_net_is_alive) {
@ -1570,7 +1571,7 @@ int peer_thread(void *data)
status = mars_create_socket(&peer->socket, &sockaddr, false); status = mars_create_socket(&peer->socket, &sockaddr, false);
if (unlikely(status < 0)) { if (unlikely(status < 0)) {
MARS_INF("no connection to '%s'\n", real_peer); MARS_INF("no connection to '%s'\n", real_peer);
brick_msleep(5000); brick_msleep(2000);
continue; continue;
} }
do_kill = true; do_kill = true;
@ -1580,30 +1581,31 @@ int peer_thread(void *data)
continue; continue;
} }
/* This is not completely race-free, but does no harm. if (peer->from_remote_trigger) {
* In worst case, network propagation will just take pause_time = 0;
* a litte longer (see CONFIG_MARS_PROPAGATE_INTERVAL). peer->from_remote_trigger = false;
*/ MARS_DBG("got notify from peer.\n");
if (!flip && atomic_read(&remote_trigger_count) > 0) {
MARS_DBG("sending notify ... remote_tiogger_count = %d\n", atomic_read(&remote_trigger_count));
atomic_dec(&remote_trigger_count);
cmd.cmd_code = CMD_NOTIFY;
flip = true;
} }
status = mars_send_struct(&peer->socket, &cmd, mars_cmd_meta); status = 0;
if (peer->to_remote_trigger) {
pause_time = 0;
peer->to_remote_trigger = false;
MARS_DBG("sending notify to peer...\n");
cmd.cmd_code = CMD_NOTIFY;
status = mars_send_struct(&peer->socket, &cmd, mars_cmd_meta);
}
if (likely(status >= 0)) {
cmd.cmd_code = CMD_GETENTS;
status = mars_send_struct(&peer->socket, &cmd, mars_cmd_meta);
}
if (unlikely(status < 0)) { if (unlikely(status < 0)) {
MARS_WRN("communication error on send, status = %d\n", status); MARS_WRN("communication error on send, status = %d\n", status);
if (do_kill) { if (do_kill) {
do_kill = false; do_kill = false;
_peer_cleanup(peer); _peer_cleanup(peer);
} }
brick_msleep(2000);
continue;
}
if (cmd.cmd_code == CMD_NOTIFY) {
flip = false;
pause_time = 0;
brick_msleep(1000); brick_msleep(1000);
continue; continue;
} }
@ -1617,7 +1619,7 @@ int peer_thread(void *data)
_peer_cleanup(peer); _peer_cleanup(peer);
} }
mars_free_dent_all(NULL, &tmp_list); mars_free_dent_all(NULL, &tmp_list);
brick_msleep(5000); brick_msleep(2000);
continue; continue;
} }
@ -1631,15 +1633,17 @@ int peer_thread(void *data)
traced_unlock(&peer->lock, flags); traced_unlock(&peer->lock, flags);
mars_trigger();
mars_free_dent_all(NULL, &old_list); mars_free_dent_all(NULL, &old_list);
} }
brick_msleep(1000); brick_msleep(100);
if (!brick_thread_should_stop()) { if (!brick_thread_should_stop()) {
if (pause_time < mars_propagate_interval) if (pause_time < mars_propagate_interval)
pause_time++; pause_time++;
wait_event_interruptible_timeout(remote_event, wait_event_interruptible_timeout(remote_event,
atomic_read(&remote_trigger_count) > 0 || (peer->to_remote_trigger | peer->from_remote_trigger) ||
(mars_global && mars_global->main_trigger), (mars_global && mars_global->main_trigger),
pause_time * HZ); pause_time * HZ);
} }
@ -1652,16 +1656,65 @@ int peer_thread(void *data)
} }
done: done:
atomic_dec(&peer_thread_count);
brick_string_free(real_peer); brick_string_free(real_peer);
return 0; return 0;
} }
static
void _make_alive(void)
{
struct timespec now;
char *tmp;
get_lamport(&now);
tmp = path_make("%ld.%09ld", now.tv_sec, now.tv_nsec);
if (likely(tmp)) {
_make_alivelink_str("time", tmp);
brick_string_free(tmp);
}
_make_alivelink("alive", mars_global && mars_global->global_power.button ? 1 : 0);
_make_alivelink_str("tree", SYMLINK_TREE_VERSION);
}
void from_remote_trigger(void)
{
struct list_head *tmp;
int count = 0;
unsigned long flags;
_make_alive();
// TODO: replace peer_lock with rw_lock
traced_lock(&peer_lock, flags);
for (tmp = peer_anchor.next; tmp != &peer_anchor; tmp = tmp->next) {
struct mars_peerinfo *peer = container_of(tmp, struct mars_peerinfo, peer_head);
peer->from_remote_trigger = true;
count++;
}
traced_unlock(&peer_lock, flags);
MARS_DBG("got trigger for %d peers\n", count);
wake_up_interruptible_all(&remote_event);
}
EXPORT_SYMBOL_GPL(from_remote_trigger);
static static
void __mars_remote_trigger(void) void __mars_remote_trigger(void)
{ {
int count = atomic_read(&peer_thread_count); struct list_head *tmp;
atomic_add(count, &remote_trigger_count); int count = 0;
unsigned long flags;
// TODO: replace peer_lock with rw_lock
traced_lock(&peer_lock, flags);
for (tmp = peer_anchor.next; tmp != &peer_anchor; tmp = tmp->next) {
struct mars_peerinfo *peer = container_of(tmp, struct mars_peerinfo, peer_head);
peer->to_remote_trigger = true;
count++;
}
traced_unlock(&peer_lock, flags);
MARS_DBG("triggered %d peers\n", count);
wake_up_interruptible_all(&remote_event); wake_up_interruptible_all(&remote_event);
} }
@ -1706,6 +1759,10 @@ static int _kill_peer(void *buf, struct mars_dent *dent)
return 0; return 0;
} }
traced_lock(&peer_lock, flags);
list_del_init(&peer->peer_head);
traced_unlock(&peer_lock, flags);
MARS_INF("stopping peer thread...\n"); MARS_INF("stopping peer thread...\n");
if (peer->peer_thread) { if (peer->peer_thread) {
brick_thread_stop(peer->peer_thread); brick_thread_stop(peer->peer_thread);
@ -1743,6 +1800,8 @@ static int _make_peer(struct mars_global *global, struct mars_dent *dent, char *
MARS_DBG("peer '%s'\n", mypeer); MARS_DBG("peer '%s'\n", mypeer);
if (!dent->d_private) { if (!dent->d_private) {
unsigned long flags;
dent->d_private = brick_zmem_alloc(sizeof(struct mars_peerinfo)); dent->d_private = brick_zmem_alloc(sizeof(struct mars_peerinfo));
if (!dent->d_private) { if (!dent->d_private) {
MARS_ERR("no memory for peer structure\n"); MARS_ERR("no memory for peer structure\n");
@ -1755,7 +1814,12 @@ static int _make_peer(struct mars_global *global, struct mars_dent *dent, char *
peer->path = brick_strdup(path); peer->path = brick_strdup(path);
peer->maxdepth = 2; peer->maxdepth = 2;
spin_lock_init(&peer->lock); spin_lock_init(&peer->lock);
INIT_LIST_HEAD(&peer->peer_head);
INIT_LIST_HEAD(&peer->remote_dent_list); INIT_LIST_HEAD(&peer->remote_dent_list);
traced_lock(&peer_lock, flags);
list_add_tail(&peer->peer_head, &peer_anchor);
traced_unlock(&peer_lock, flags);
} }
peer = dent->d_private; peer = dent->d_private;
@ -4150,8 +4214,6 @@ static int light_thread(void *data)
MARS_INF("-------- starting as host '%s' ----------\n", id); MARS_INF("-------- starting as host '%s' ----------\n", id);
while (_global.global_power.button || !list_empty(&_global.brick_anchor)) { while (_global.global_power.button || !list_empty(&_global.brick_anchor)) {
struct timespec now;
char *tmp;
int status; int status;
MARS_DBG("-------- NEW ROUND %d ---------\n", atomic_read(&server_handler_count)); MARS_DBG("-------- NEW ROUND %d ---------\n", atomic_read(&server_handler_count));
@ -4169,14 +4231,7 @@ static int light_thread(void *data)
mars_net_is_alive = false; mars_net_is_alive = false;
} }
get_lamport(&now); _make_alive();
tmp = path_make("%ld.%09ld", now.tv_sec, now.tv_nsec);
if (likely(tmp)) {
_make_alivelink_str("time", tmp);
brick_string_free(tmp);
}
_make_alivelink("alive", _global.global_power.button ? 1 : 0);
_make_alivelink_str("tree", SYMLINK_TREE_VERSION);
compute_emergency_mode(); compute_emergency_mode();
@ -4217,7 +4272,7 @@ static int light_thread(void *data)
done: done:
MARS_INF("-------- cleaning up ----------\n"); MARS_INF("-------- cleaning up ----------\n");
mars_remote_trigger(); mars_remote_trigger();
brick_msleep(2000); brick_msleep(1000);
mars_free_dent_all(&_global, &_global.dent_anchor); mars_free_dent_all(&_global, &_global.dent_anchor);
mars_kill_brick_all(&_global, &_global.brick_anchor, false); mars_kill_brick_all(&_global, &_global.brick_anchor, false);

View File

@ -180,6 +180,8 @@ extern int mars_mem_percent;
extern int light_checker(struct mars_dent *parent, const char *_name, int namlen, unsigned int d_type, int *prefix, int *serial, bool *use_channel); extern int light_checker(struct mars_dent *parent, const char *_name, int namlen, unsigned int d_type, int *prefix, int *serial, bool *use_channel);
void from_remote_trigger(void);
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// init // init