mirror of
https://github.com/schoebel/mars
synced 2025-03-02 03:20:44 +00:00
fix race on primary switchover
solution: wait until all in /mars/resouce-*/ has stabilized for 60s and all known logfiles have been migrated. This more a workaround than a solution and should be fixed in MARS FULL.
This commit is contained in:
parent
8fe9b696aa
commit
92d9778dd7
@ -44,6 +44,7 @@ int trigger_sysctl_handler(ctl_table *table,
|
||||
sscanf(tmp, "%d", &code);
|
||||
if (code) {
|
||||
mars_trigger();
|
||||
mars_remote_trigger();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -158,15 +158,31 @@ sub _check_all_mtimes {
|
||||
|
||||
sub check_splitbrain {
|
||||
my ($res, $host) = @_;
|
||||
for(;;) {
|
||||
while(1) {
|
||||
my $pri = "$mars/resource-$res/primary";
|
||||
my $old = readlink($pri) or die "cannot determine current primary\n";
|
||||
_primary_res($res, "(none)", $pri, $old) unless $old eq "(none)";
|
||||
_trigger();
|
||||
sleep(5);
|
||||
last if _check_all_mtimes("$mars/resource-$res/[lvr]*", 60);
|
||||
print "resource directory $res not stable, waiting....\n";
|
||||
sleep(5);
|
||||
if(!_check_all_mtimes("$mars/resource-$res/[lvr]*", 60)) {
|
||||
print "resource directory $res not stable, waiting....\n";
|
||||
sleep(5);
|
||||
next;
|
||||
}
|
||||
my $max = 0;
|
||||
my @list = glob("$mars/resource-$res/replay-*");
|
||||
foreach my $l (@list) {
|
||||
my $cont = readlink($l);
|
||||
$cont =~ s/^log-([0-9]+)-.*$/$1/;
|
||||
$max = $cont if $cont > $max;
|
||||
}
|
||||
my $last = sprintf("$mars/resource-$res/log-%09d-*", $max);
|
||||
if(glob($last)) {
|
||||
print "resource $res: logfile $max is present.\n";
|
||||
last;
|
||||
}
|
||||
print "resource $res: logfile $max is not yet transferred, waiting....\n";
|
||||
sleep(10);
|
||||
}
|
||||
my @links = glob("$mars/resource-$res/version-[0-9]*-$host");
|
||||
if(!@links) {
|
||||
|
Loading…
Reference in New Issue
Block a user