From e78d0a30787fb4f09fde4db521d8c9b65c177cc2 Mon Sep 17 00:00:00 2001 From: Thomas Schoebel-Theuer Date: Mon, 11 Nov 2013 12:24:53 +0100 Subject: [PATCH] light: fix peer preferences, use connect-* links --- kernel/sy_old/mars_light.c | 121 +++++++++++++++++++++++++++++-------- userspace/marsadm | 20 ++++++ 2 files changed, 117 insertions(+), 24 deletions(-) diff --git a/kernel/sy_old/mars_light.c b/kernel/sy_old/mars_light.c index 77e7eed0..0cfb3274 100644 --- a/kernel/sy_old/mars_light.c +++ b/kernel/sy_old/mars_light.c @@ -277,12 +277,12 @@ enum { CL_TODO_ITEMS, CL_ACTUAL, CL_ACTUAL_ITEMS, - CL_CONNECT, CL_DATA, CL_SIZE, CL_ACTSIZE, CL_PRIMARY, CL__FILE, + CL_CONNECT, CL_TRANSFER, CL_SYNC, CL_VERIF, @@ -322,6 +322,7 @@ struct mars_rotate { struct if_brick *if_brick; const char *fetch_path; const char *fetch_peer; + const char *preferred_peer; const char *parent_path; const char *fetch_next_origin; struct say_channel *log_say; @@ -1335,7 +1336,7 @@ done: // remote workers static -DEFINE_SPINLOCK(peer_lock); +rwlock_t peer_lock = __RW_LOCK_UNLOCKED(&peer_lock); static struct list_head peer_anchor = LIST_HEAD_INIT(peer_anchor); @@ -1349,11 +1350,31 @@ struct mars_peerinfo { spinlock_t lock; struct list_head peer_head; struct list_head remote_dent_list; + unsigned long last_remote_jiffies; int maxdepth; bool to_remote_trigger; bool from_remote_trigger; }; +static +struct mars_peerinfo *find_peer(const char *peer_name) +{ + struct list_head *tmp; + struct mars_peerinfo *res = NULL; + + read_lock(&peer_lock); + for (tmp = peer_anchor.next; tmp != &peer_anchor; tmp = tmp->next) { + struct mars_peerinfo *peer = container_of(tmp, struct mars_peerinfo, peer_head); + if (!strcmp(peer->peer, peer_name)) { + res = peer; + break; + } + } + read_unlock(&peer_lock); + + return res; +} + static bool _is_usable_dir(const char *name) { @@ -1498,6 +1519,7 @@ int check_logfile(const char *peer, struct mars_dent *remote_dent, struct mars_d } } else if (!rot->fetch_serial && rot->allow_update && !rot->is_primary && !rot->old_is_primary && + (!rot->preferred_peer || !strcmp(rot->preferred_peer, peer)) && (!rot->split_brain_serial || remote_dent->d_serial < rot->split_brain_serial) && (dst_size < src_size || !local_dent)) { // start copy brick instance @@ -1809,6 +1831,8 @@ int peer_thread(void *data) traced_unlock(&peer->lock, flags); + peer->last_remote_jiffies = jiffies; + mars_trigger(); mars_free_dent_all(NULL, &old_list); @@ -1856,18 +1880,16 @@ void from_remote_trigger(void) { struct list_head *tmp; int count = 0; - unsigned long flags; _make_alive(); - // TODO: replace peer_lock with rw_lock - traced_lock(&peer_lock, flags); + read_lock(&peer_lock); for (tmp = peer_anchor.next; tmp != &peer_anchor; tmp = tmp->next) { struct mars_peerinfo *peer = container_of(tmp, struct mars_peerinfo, peer_head); peer->from_remote_trigger = true; count++; } - traced_unlock(&peer_lock, flags); + read_unlock(&peer_lock); MARS_DBG("got trigger for %d peers\n", count); wake_up_interruptible_all(&remote_event); @@ -1879,16 +1901,14 @@ void __mars_remote_trigger(void) { struct list_head *tmp; int count = 0; - unsigned long flags; - // TODO: replace peer_lock with rw_lock - traced_lock(&peer_lock, flags); + read_lock(&peer_lock); for (tmp = peer_anchor.next; tmp != &peer_anchor; tmp = tmp->next) { struct mars_peerinfo *peer = container_of(tmp, struct mars_peerinfo, peer_head); peer->to_remote_trigger = true; count++; } - traced_unlock(&peer_lock, flags); + read_unlock(&peer_lock); MARS_DBG("triggered %d peers\n", count); wake_up_interruptible_all(&remote_event); @@ -1935,9 +1955,9 @@ static int _kill_peer(void *buf, struct mars_dent *dent) return 0; } - traced_lock(&peer_lock, flags); + write_lock(&peer_lock); list_del_init(&peer->peer_head); - traced_unlock(&peer_lock, flags); + write_unlock(&peer_lock); MARS_INF("stopping peer thread...\n"); if (peer->peer_thread) { @@ -1976,7 +1996,6 @@ static int _make_peer(struct mars_global *global, struct mars_dent *dent, char * MARS_DBG("peer '%s'\n", mypeer); if (!dent->d_private) { - unsigned long flags; dent->d_private = brick_zmem_alloc(sizeof(struct mars_peerinfo)); if (!dent->d_private) { @@ -1993,9 +2012,9 @@ static int _make_peer(struct mars_global *global, struct mars_dent *dent, char * INIT_LIST_HEAD(&peer->peer_head); INIT_LIST_HEAD(&peer->remote_dent_list); - traced_lock(&peer_lock, flags); + write_lock(&peer_lock); list_add_tail(&peer->peer_head, &peer_anchor); - traced_unlock(&peer_lock, flags); + write_unlock(&peer_lock); } peer = dent->d_private; @@ -2272,10 +2291,12 @@ void rot_destruct(void *_rot) rot->log_say = NULL; brick_string_free(rot->fetch_path); brick_string_free(rot->fetch_peer); + brick_string_free(rot->preferred_peer); brick_string_free(rot->parent_path); brick_string_free(rot->fetch_next_origin); rot->fetch_path = NULL; rot->fetch_peer = NULL; + rot->preferred_peer = NULL; rot->parent_path = NULL; rot->fetch_next_origin = NULL; } @@ -2351,6 +2372,8 @@ int make_log_init(void *buf, struct mars_dent *dent) rot->fetch_next_serial = 0; rot->has_error = false; rot->has_symlinks = true; + brick_string_free(rot->preferred_peer); + rot->preferred_peer = NULL; if (dent->new_link) sscanf(dent->new_link, "%lld", &rot->dev_size); @@ -3877,6 +3900,55 @@ done: return status; } +static +bool remember_peer(struct mars_rotate *rot, struct mars_peerinfo *peer) +{ + if (!peer || !rot || rot->preferred_peer) + return false; + + if ((long long)peer->last_remote_jiffies + mars_scan_interval * HZ * 2 < (long long)jiffies) + return false; + + rot->preferred_peer = brick_strdup(peer->peer); + return true; +} + +static +int make_connect(void *buf, struct mars_dent *dent) +{ + struct mars_rotate *rot; + struct mars_peerinfo *peer; + char *names; + char *this_name; + char *tmp; + + if (unlikely(!dent->d_parent || !dent->new_link)) { + goto done; + } + rot = dent->d_parent->d_private; + if (unlikely(!rot)) { + goto done; + } + + names = brick_strdup(dent->new_link); + for (tmp = this_name = names; *tmp; tmp++) { + if (*tmp == MARS_DELIM) { + *tmp = '\0'; + peer = find_peer(this_name); + if (remember_peer(rot, peer)) + goto found; + this_name = tmp + 1; + } + } + peer = find_peer(this_name); + remember_peer(rot, peer); + +found: + brick_string_free(names); +done: + return 0; +} + static int prepare_delete(void *buf, struct mars_dent *dent) { struct kstat stat; @@ -4316,15 +4388,6 @@ static const struct light_class light_classes[] = { }, - /* Symlink indicating the current peer - */ - [CL_CONNECT] = { - .cl_name = "connect-", - .cl_len = 8, - .cl_type = 'l', - .cl_hostcontext = false, // not used here - .cl_father = CL_RESOURCE, - }, /* File or symlink to the real device / real (sparse) file * when hostcontext is missing, the corresponding peer will * not participate in that resource. @@ -4387,6 +4450,16 @@ static const struct light_class light_classes[] = { .cl_forward = make_bio, .cl_backward = kill_any, }, + /* Symlink for connection preferences + */ + [CL_CONNECT] = { + .cl_name = "connect-", + .cl_len = 8, + .cl_type = 'l', + .cl_hostcontext = true, + .cl_father = CL_RESOURCE, + .cl_forward = make_connect, + }, /* informational symlink indicating the current * status / start / pos / end of logfile transfers. */ diff --git a/userspace/marsadm b/userspace/marsadm index 40b1ca26..ccdad603 100755 --- a/userspace/marsadm +++ b/userspace/marsadm @@ -297,6 +297,11 @@ sub check_id { ldie "identifier '$str' is too long (only 16 chars allowed)" if length($str) > 16; } +sub check_id_list { + my $str = shift; + ldie "comma-separated list '$str' has disallowed characters" unless $str =~ m/^[A-Za-z_][-A-Za-z0-9_,]*$/; +} + ################################################################## # semantic checks @@ -927,6 +932,19 @@ sub is_module_loaded { return $res; } +sub set_connect_pref_list { + my ($cmd, $res, $list) = @_; + check_res_member($res); + my $dst = "$mars/resource-$res/connect-$host"; + if ($cmd =~ m/^get-/) { + my $value = get_link($dst); + lprint "$value\n"; + return; + } + check_id_list($list); + set_link($list, $dst); +} + sub set_link_cmd { my $cmd = shift; for (;;) { @@ -1855,6 +1873,8 @@ my %cmd_table = "wait for deletions", \&leave_res_phase2, ], "delete-resource" => \&delete_res, + "set-connect-pref-list"=> \&set_connect_pref_list, + "get-connect-pref-list"=> \&set_connect_pref_list, "log-rotate" => \&logrotate_res, "log-delete" => \&logdelete_res, "log-delete-all" => \&logdelete_res,