net: decrease trigger turnaround time

This commit is contained in:
Thomas Schoebel-Theuer 2013-05-11 22:52:11 +02:00 committed by Thomas Schoebel-Theuer
parent 0e15b38457
commit e3c10d31a9
4 changed files with 108 additions and 57 deletions

View File

@ -165,13 +165,6 @@ config MARS_SYNC_FLIP_INTERVAL
the time interval to increase throughput in favour
of latency.
config MARS_FAST_TRIGGER
bool "fast internal triggering"
depends on MARS
default y
---help---
Normally ON. Switch off in case of endless trigger loops
config MARS_NETIO_TIMEOUT
int "timeout for remote IO operations (in seconds)"
depends on MARS

View File

@ -319,11 +319,12 @@ int handler_thread(void *data)
status = -EPROTO;
switch (cmd.cmd_code & CMD_FLAG_MASK) {
case CMD_NOP:
MARS_DBG("#%d got NOP operation\n", sock->s_debug_nr);
status = 0;
MARS_DBG("#%d got NOP operation\n", sock->s_debug_nr);
break;
case CMD_NOTIFY:
mars_trigger();
status = 0;
from_remote_trigger();
break;
case CMD_GETINFO:
{
@ -721,7 +722,7 @@ static int _server_thread(void *data)
MARS_DBG("kill server sio bricks (when possible) = %d\n", status);
if (!mars_global || !mars_global->global_power.button) {
brick_msleep(2000);
brick_msleep(1000);
continue;
}
@ -731,7 +732,7 @@ static int _server_thread(void *data)
if (status == -EAGAIN)
continue; // without error message
MARS_WRN("accept status = %d\n", status);
brick_msleep(2000);
brick_msleep(1000);
continue;
}
handler_socket.s_shutdown_on_err = true;
@ -781,7 +782,7 @@ static int _server_thread(void *data)
brick = NULL;
atomic_dec(&server_handler_count);
}
brick_msleep(3000);
brick_msleep(2000);
}
MARS_INF("-------- cleaning up ----------\n");

View File

@ -1011,10 +1011,8 @@ void _make_new_replaylink(struct mars_rotate *rot, char *new_host, int new_seque
_update_replay_link(rot, &inf);
_update_version_link(rot, &inf);
#ifdef CONFIG_MARS_FAST_TRIGGER
mars_trigger();
mars_remote_trigger();
#endif
}
static
@ -1238,6 +1236,12 @@ done:
// remote workers
static
DEFINE_SPINLOCK(peer_lock);
static
struct list_head peer_anchor = LIST_HEAD_INIT(peer_anchor);
struct mars_peerinfo {
struct mars_global *global;
char *peer;
@ -1245,8 +1249,11 @@ struct mars_peerinfo {
struct mars_socket socket;
struct task_struct *peer_thread;
spinlock_t lock;
struct list_head peer_head;
struct list_head remote_dent_list;
int maxdepth;
bool to_remote_trigger;
bool from_remote_trigger;
};
static
@ -1492,12 +1499,12 @@ int run_bones(struct mars_peerinfo *peer)
run_trigger = true;
//MARS_DBG("path = '%s' worker status = %d\n", remote_dent->d_path, status);
}
mars_free_dent_all(NULL, &tmp_list);
#ifdef CONFIG_MARS_FAST_TRIGGER
if (run_trigger) {
mars_trigger();
}
#endif
return status;
}
@ -1517,8 +1524,6 @@ void _peer_cleanup(struct mars_peerinfo *peer)
}
static DECLARE_WAIT_QUEUE_HEAD(remote_event);
static atomic_t remote_trigger_count = ATOMIC_INIT(0);
static atomic_t peer_thread_count = ATOMIC_INIT(0);
static
int peer_thread(void *data)
@ -1528,7 +1533,6 @@ int peer_thread(void *data)
struct sockaddr_storage sockaddr = {};
int pause_time = 0;
bool do_kill = false;
bool flip = false;
int status;
if (!peer)
@ -1543,14 +1547,11 @@ int peer_thread(void *data)
goto done;
}
atomic_inc(&peer_thread_count);
while (!brick_thread_should_stop()) {
LIST_HEAD(tmp_list);
LIST_HEAD(old_list);
unsigned long flags;
struct mars_cmd cmd = {
.cmd_code = CMD_GETENTS,
.cmd_str1 = peer->path,
.cmd_int1 = peer->maxdepth,
};
@ -1559,7 +1560,7 @@ int peer_thread(void *data)
if (do_kill) {
do_kill = false;
_peer_cleanup(peer);
brick_msleep(5000);
brick_msleep(1000);
continue;
}
if (!mars_net_is_alive) {
@ -1570,7 +1571,7 @@ int peer_thread(void *data)
status = mars_create_socket(&peer->socket, &sockaddr, false);
if (unlikely(status < 0)) {
MARS_INF("no connection to '%s'\n", real_peer);
brick_msleep(5000);
brick_msleep(2000);
continue;
}
do_kill = true;
@ -1580,30 +1581,31 @@ int peer_thread(void *data)
continue;
}
/* This is not completely race-free, but does no harm.
* In worst case, network propagation will just take
* a litte longer (see CONFIG_MARS_PROPAGATE_INTERVAL).
*/
if (!flip && atomic_read(&remote_trigger_count) > 0) {
MARS_DBG("sending notify ... remote_tiogger_count = %d\n", atomic_read(&remote_trigger_count));
atomic_dec(&remote_trigger_count);
cmd.cmd_code = CMD_NOTIFY;
flip = true;
if (peer->from_remote_trigger) {
pause_time = 0;
peer->from_remote_trigger = false;
MARS_DBG("got notify from peer.\n");
}
status = mars_send_struct(&peer->socket, &cmd, mars_cmd_meta);
status = 0;
if (peer->to_remote_trigger) {
pause_time = 0;
peer->to_remote_trigger = false;
MARS_DBG("sending notify to peer...\n");
cmd.cmd_code = CMD_NOTIFY;
status = mars_send_struct(&peer->socket, &cmd, mars_cmd_meta);
}
if (likely(status >= 0)) {
cmd.cmd_code = CMD_GETENTS;
status = mars_send_struct(&peer->socket, &cmd, mars_cmd_meta);
}
if (unlikely(status < 0)) {
MARS_WRN("communication error on send, status = %d\n", status);
if (do_kill) {
do_kill = false;
_peer_cleanup(peer);
}
brick_msleep(2000);
continue;
}
if (cmd.cmd_code == CMD_NOTIFY) {
flip = false;
pause_time = 0;
brick_msleep(1000);
continue;
}
@ -1617,7 +1619,7 @@ int peer_thread(void *data)
_peer_cleanup(peer);
}
mars_free_dent_all(NULL, &tmp_list);
brick_msleep(5000);
brick_msleep(2000);
continue;
}
@ -1631,15 +1633,17 @@ int peer_thread(void *data)
traced_unlock(&peer->lock, flags);
mars_trigger();
mars_free_dent_all(NULL, &old_list);
}
brick_msleep(1000);
brick_msleep(100);
if (!brick_thread_should_stop()) {
if (pause_time < mars_propagate_interval)
pause_time++;
wait_event_interruptible_timeout(remote_event,
atomic_read(&remote_trigger_count) > 0 ||
(peer->to_remote_trigger | peer->from_remote_trigger) ||
(mars_global && mars_global->main_trigger),
pause_time * HZ);
}
@ -1652,16 +1656,65 @@ int peer_thread(void *data)
}
done:
atomic_dec(&peer_thread_count);
brick_string_free(real_peer);
return 0;
}
static
void _make_alive(void)
{
struct timespec now;
char *tmp;
get_lamport(&now);
tmp = path_make("%ld.%09ld", now.tv_sec, now.tv_nsec);
if (likely(tmp)) {
_make_alivelink_str("time", tmp);
brick_string_free(tmp);
}
_make_alivelink("alive", mars_global && mars_global->global_power.button ? 1 : 0);
_make_alivelink_str("tree", SYMLINK_TREE_VERSION);
}
void from_remote_trigger(void)
{
struct list_head *tmp;
int count = 0;
unsigned long flags;
_make_alive();
// TODO: replace peer_lock with rw_lock
traced_lock(&peer_lock, flags);
for (tmp = peer_anchor.next; tmp != &peer_anchor; tmp = tmp->next) {
struct mars_peerinfo *peer = container_of(tmp, struct mars_peerinfo, peer_head);
peer->from_remote_trigger = true;
count++;
}
traced_unlock(&peer_lock, flags);
MARS_DBG("got trigger for %d peers\n", count);
wake_up_interruptible_all(&remote_event);
}
EXPORT_SYMBOL_GPL(from_remote_trigger);
static
void __mars_remote_trigger(void)
{
int count = atomic_read(&peer_thread_count);
atomic_add(count, &remote_trigger_count);
struct list_head *tmp;
int count = 0;
unsigned long flags;
// TODO: replace peer_lock with rw_lock
traced_lock(&peer_lock, flags);
for (tmp = peer_anchor.next; tmp != &peer_anchor; tmp = tmp->next) {
struct mars_peerinfo *peer = container_of(tmp, struct mars_peerinfo, peer_head);
peer->to_remote_trigger = true;
count++;
}
traced_unlock(&peer_lock, flags);
MARS_DBG("triggered %d peers\n", count);
wake_up_interruptible_all(&remote_event);
}
@ -1706,6 +1759,10 @@ static int _kill_peer(void *buf, struct mars_dent *dent)
return 0;
}
traced_lock(&peer_lock, flags);
list_del_init(&peer->peer_head);
traced_unlock(&peer_lock, flags);
MARS_INF("stopping peer thread...\n");
if (peer->peer_thread) {
brick_thread_stop(peer->peer_thread);
@ -1743,6 +1800,8 @@ static int _make_peer(struct mars_global *global, struct mars_dent *dent, char *
MARS_DBG("peer '%s'\n", mypeer);
if (!dent->d_private) {
unsigned long flags;
dent->d_private = brick_zmem_alloc(sizeof(struct mars_peerinfo));
if (!dent->d_private) {
MARS_ERR("no memory for peer structure\n");
@ -1755,7 +1814,12 @@ static int _make_peer(struct mars_global *global, struct mars_dent *dent, char *
peer->path = brick_strdup(path);
peer->maxdepth = 2;
spin_lock_init(&peer->lock);
INIT_LIST_HEAD(&peer->peer_head);
INIT_LIST_HEAD(&peer->remote_dent_list);
traced_lock(&peer_lock, flags);
list_add_tail(&peer->peer_head, &peer_anchor);
traced_unlock(&peer_lock, flags);
}
peer = dent->d_private;
@ -4150,8 +4214,6 @@ static int light_thread(void *data)
MARS_INF("-------- starting as host '%s' ----------\n", id);
while (_global.global_power.button || !list_empty(&_global.brick_anchor)) {
struct timespec now;
char *tmp;
int status;
MARS_DBG("-------- NEW ROUND %d ---------\n", atomic_read(&server_handler_count));
@ -4169,14 +4231,7 @@ static int light_thread(void *data)
mars_net_is_alive = false;
}
get_lamport(&now);
tmp = path_make("%ld.%09ld", now.tv_sec, now.tv_nsec);
if (likely(tmp)) {
_make_alivelink_str("time", tmp);
brick_string_free(tmp);
}
_make_alivelink("alive", _global.global_power.button ? 1 : 0);
_make_alivelink_str("tree", SYMLINK_TREE_VERSION);
_make_alive();
compute_emergency_mode();
@ -4217,7 +4272,7 @@ static int light_thread(void *data)
done:
MARS_INF("-------- cleaning up ----------\n");
mars_remote_trigger();
brick_msleep(2000);
brick_msleep(1000);
mars_free_dent_all(&_global, &_global.dent_anchor);
mars_kill_brick_all(&_global, &_global.brick_anchor, false);

View File

@ -180,6 +180,8 @@ extern int mars_mem_percent;
extern int light_checker(struct mars_dent *parent, const char *_name, int namlen, unsigned int d_type, int *prefix, int *serial, bool *use_channel);
void from_remote_trigger(void);
/////////////////////////////////////////////////////////////////////////
// init