From 92d9778dd72f53272838bee46dad4bbb46a0ea3e Mon Sep 17 00:00:00 2001 From: Thomas Schoebel-Theuer Date: Wed, 25 Jan 2012 14:38:45 +0100 Subject: [PATCH] fix race on primary switchover solution: wait until all in /mars/resouce-*/ has stabilized for 60s and all known logfiles have been migrated. This more a workaround than a solution and should be fixed in MARS FULL. --- sy_old/mars_proc.c | 1 + userspace/marsadm | 24 ++++++++++++++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/sy_old/mars_proc.c b/sy_old/mars_proc.c index 38b1eac8..50b83a71 100644 --- a/sy_old/mars_proc.c +++ b/sy_old/mars_proc.c @@ -44,6 +44,7 @@ int trigger_sysctl_handler(ctl_table *table, sscanf(tmp, "%d", &code); if (code) { mars_trigger(); + mars_remote_trigger(); } } } else { diff --git a/userspace/marsadm b/userspace/marsadm index 76febd69..dd76df75 100644 --- a/userspace/marsadm +++ b/userspace/marsadm @@ -158,15 +158,31 @@ sub _check_all_mtimes { sub check_splitbrain { my ($res, $host) = @_; - for(;;) { + while(1) { my $pri = "$mars/resource-$res/primary"; my $old = readlink($pri) or die "cannot determine current primary\n"; _primary_res($res, "(none)", $pri, $old) unless $old eq "(none)"; _trigger(); sleep(5); - last if _check_all_mtimes("$mars/resource-$res/[lvr]*", 60); - print "resource directory $res not stable, waiting....\n"; - sleep(5); + if(!_check_all_mtimes("$mars/resource-$res/[lvr]*", 60)) { + print "resource directory $res not stable, waiting....\n"; + sleep(5); + next; + } + my $max = 0; + my @list = glob("$mars/resource-$res/replay-*"); + foreach my $l (@list) { + my $cont = readlink($l); + $cont =~ s/^log-([0-9]+)-.*$/$1/; + $max = $cont if $cont > $max; + } + my $last = sprintf("$mars/resource-$res/log-%09d-*", $max); + if(glob($last)) { + print "resource $res: logfile $max is present.\n"; + last; + } + print "resource $res: logfile $max is not yet transferred, waiting....\n"; + sleep(10); } my @links = glob("$mars/resource-$res/version-[0-9]*-$host"); if(!@links) {