marsadm: fix edge cases of try_to_avoid_splitbrain()

Originally a trivial silly bug (boolean value was wrong), leading to an
endless loop when a local versionlink was missing, which can happen
only after a primary crash at the wrong moment shortly after a logrotate
(not even during ordinary operations), followed by a hard reboot.

As documented in mars-manual.pdf, you simply need "modprobe mars"
to recover after such a crash reboot. MARS remembers the primary state
persistently for you and restores everything _automatically_.

Using "marsadm primary" in such a case to switch the current primary
to primary again (after an unnecessary "marsadm secondary" which is
strongly discouraged by mars-manual.pdf), although the host is / was
already in primary state after the reboot, is at least as silly as
the mentioned bug. Doing this in an /etc/init.d/ startup script
where it really doesn't belong into, is even more silly.

The latter is even an OPERATIONAL RISK, because "marsadm secondary"
works _globally_ in the whole cluster (as documented in mars-manual.pdf).
Such an improper startup script _can_ (potentially) disturb another
cluster member which had become primary in the _meantime_ during reboot.
Global cluster operations don't belong into startup scripts, because
reboots may happen unintentionally at any time.
This commit is contained in:
Thomas Schoebel-Theuer 2016-02-03 22:00:47 +01:00
parent cd01d1ae02
commit 0e6bb47cb6
1 changed files with 46 additions and 26 deletions

View File

@ -267,8 +267,10 @@ sub sleep_timeout {
return;
}
if ($timeout <= 0) {
ldie "Timeout reached. You may retry with --timeout=-1 to ensure waiting until progress is possible.\n" if (!defined($continue) || !$continue);
lwarn "Timeout reached. Continuing anyway.\n"
if (!defined($continue) || !$continue) {
ldie "Timeout reached.\n";
}
lwarn "Timeout reached. Continuing anyway.\n";
}
my $rest = $timeout;
$rest = $sleeptime if $rest > $sleeptime;
@ -1008,6 +1010,8 @@ sub log_purge_res {
sub try_to_avoid_splitbrain {
my ($cmd, $res, $old_primary) = @_;
my $old_timeout = $timeout;
$timeout = $window * 2 if $timeout < 0;
$old_primary = "" if $old_primary eq "(none)";
wait_cluster($cmd, $res, $old_primary);
if (!detect_splitbrain($res, 0)) {
@ -1015,11 +1019,15 @@ sub try_to_avoid_splitbrain {
lwarn "ATTENTION: that's no good idea.\n";
lwarn "ATTENTION: I will continue to do what you want.\n";
lwarn "ATTENTION: But you are responsible for the consequences.\n";
$timeout = $old_timeout;
return;
}
# now try to prevent producing a _new_ split brain situation....
my @host_list = glob("$mars/resource-$res/replay-*");
$timeout = $old_timeout;
return if scalar(@host_list) < 2;
$timeout = $window * 2 if $timeout < 0;
my $old_situation = "";
for (;;) {
my ($min, $max) = get_minmax_versions($res);
my $vers_glob = sprintf("$mars/resource-$res/version-%09d-*", $max);
@ -1039,37 +1047,49 @@ sub try_to_avoid_splitbrain {
if ($emergency) {
ldie "emergency mode $emergency has been entered locally: handover is not possible. Either free some space in $mars/, or use --force to use a potentially outdated version.\n";
}
my $own_vers = sprintf("$mars/resource-$res/version-%09d-$host", $max);
if (!get_link($own_vers, 2)) {
$ok = 0;
} else {
my $primary = _get_designated_primary($res);
# if the old primary is known, we can ignore all other / unrelated hosts
if ($primary && $primary ne $host && $primary ne "(none)") {
my $p_path = sprintf("$mars/resource-$res/version-%09d-%s", $max, $primary);
my $h_path = sprintf("$mars/resource-$res/version-%09d-%s", $max, $host);
my $p_vers = get_link($p_path, 1);
my $h_vers = get_link($h_path, 1);
if (!$p_vers || !$h_vers || $p_vers ne $h_vers) {
$ok = 0;
my $primary = _get_designated_primary($res);
if ($primary eq "(none)") {
# try to determine the old primary when unique
my $glob_logs = sprintf("$mars/resource-$res/log-%09d-*", $max);
my @candidates = glob($glob_logs);
if (scalar(@candidates) == 1) {
my $log_path = pop @candidates;
if ($log_path =~ m:/log-[0-9]+-(.+)$:) {
$primary = $1;
lprint "Using last primary '$primary' as a substitute.\n";
}
} else {
# old primary is unkown: we have no chance, other than comparing _all_ versions.
my @versions = glob($vers_glob);
my $first = get_link(shift @versions);
while (@versions) {
my $next = get_link(shift @versions);
if ($next ne $first) {
$ok = 0;
}
}
}
# if the old primary is known, we can ignore all other / unrelated hosts
if ($primary && $primary ne $host && $primary ne "(none)") {
my $p_path = sprintf("$mars/resource-$res/version-%09d-%s", $max, $primary);
my $h_path = sprintf("$mars/resource-$res/version-%09d-%s", $max, $host);
my $p_vers = get_link($p_path, 1);
my $h_vers = get_link($h_path, 1);
if (!$p_vers || !$h_vers || $p_vers ne $h_vers) {
$ok = 0;
}
} else {
# old primary is unknown: we have no chance, other than comparing _all_ versions.
my @versions = glob($vers_glob);
my $first = get_link(shift @versions);
while (@versions) {
my $next = get_link(shift @versions);
if ($next ne $first) {
$ok = 0;
}
}
}
last if $ok;
lprint "trying to avoid split brain: logfile update not yet completed.\n";
view_cmd("replinfo", $res);
lprint "Trying to avoid split brain for $timeout s: logfile update not yet completed.\n";
my $tpl = get_macro("replinfo");
my $new_situation = eval_macro($cmd, $res, $tpl, @_);
print $new_situation;
$timeout = $window * 2 if $new_situation ne $old_situation;
sleep_timeout();
$old_situation = $new_situation;
}
$timeout = $old_timeout;
}
sub get_size {