fix race on primary switchover

solution: wait until all in /mars/resouce-*/ has stabilized
for 60s and all known logfiles have been migrated.
This more a workaround than a solution and should be fixed
in MARS FULL.
This commit is contained in:
Thomas Schoebel-Theuer 2012-01-25 14:38:45 +01:00 committed by Thomas Schoebel-Theuer
parent 8fe9b696aa
commit 92d9778dd7
2 changed files with 21 additions and 4 deletions

View File

@ -44,6 +44,7 @@ int trigger_sysctl_handler(ctl_table *table,
sscanf(tmp, "%d", &code);
if (code) {
mars_trigger();
mars_remote_trigger();
}
}
} else {

View File

@ -158,15 +158,31 @@ sub _check_all_mtimes {
sub check_splitbrain {
my ($res, $host) = @_;
for(;;) {
while(1) {
my $pri = "$mars/resource-$res/primary";
my $old = readlink($pri) or die "cannot determine current primary\n";
_primary_res($res, "(none)", $pri, $old) unless $old eq "(none)";
_trigger();
sleep(5);
last if _check_all_mtimes("$mars/resource-$res/[lvr]*", 60);
print "resource directory $res not stable, waiting....\n";
sleep(5);
if(!_check_all_mtimes("$mars/resource-$res/[lvr]*", 60)) {
print "resource directory $res not stable, waiting....\n";
sleep(5);
next;
}
my $max = 0;
my @list = glob("$mars/resource-$res/replay-*");
foreach my $l (@list) {
my $cont = readlink($l);
$cont =~ s/^log-([0-9]+)-.*$/$1/;
$max = $cont if $cont > $max;
}
my $last = sprintf("$mars/resource-$res/log-%09d-*", $max);
if(glob($last)) {
print "resource $res: logfile $max is present.\n";
last;
}
print "resource $res: logfile $max is not yet transferred, waiting....\n";
sleep(10);
}
my @links = glob("$mars/resource-$res/version-[0-9]*-$host");
if(!@links) {