marsadm: abort handover when remote stopping fails

This commit is contained in:
Thomas Schoebel-Theuer 2019-11-20 11:27:22 +01:00
parent 9d30c149f6
commit 6a3bbd3bef
1 changed files with 21 additions and 4 deletions

View File

@ -1390,6 +1390,7 @@ sub check_status {
my $path = correct_path("$mars/resource-$res/actual-$peer/$key"); my $path = correct_path("$mars/resource-$res/actual-$peer/$key");
my $link; my $link;
my $rounds = 0; my $rounds = 0;
my $fail_round = 10;
for (;;) { for (;;) {
$link = get_link($path, $unchecked); $link = get_link($path, $unchecked);
$link = 0 unless (defined($link) && $link ne ""); $link = 0 unless (defined($link) && $link ne "");
@ -1403,13 +1404,19 @@ sub check_status {
lprint "at $peer: $wait_msg actual '$key' == '$val'...\n"; lprint "at $peer: $wait_msg actual '$key' == '$val'...\n";
ldie "Cannot execute $cmd on resource $res: actual '$key_msg' must be $val_msg. $action_msg Also ensure that your command _can_ succeed.\n" if !$wait; ldie "Cannot execute $cmd on resource $res: actual '$key_msg' must be $val_msg. $action_msg Also ensure that your command _can_ succeed.\n" if !$wait;
} }
sleep_timeout();
$rounds++;
if (defined($action) && $action && $rounds > 1) { if (defined($action) && $action && $rounds > 1) {
lprint "action: $action\n" if $verbose; lprint "action: $action\n" if $verbose;
my $action_status = 0;
my $old_error_count = $error_count;
eval "$action"; eval "$action";
$rounds = 0; $error_count = $old_error_count;
# Tolerate intermediate failures for some time
if ($action_status && $rounds > $fail_round) {
ldie "Action failure, status=$action_status\n";
}
} }
sleep_timeout();
$rounds++;
} }
lprint "OK at $peer: '$path' has acceptable value '$link'\n"; lprint "OK at $peer: '$path' has acceptable value '$link'\n";
} }
@ -3546,6 +3553,10 @@ sub primary_phase0 {
my $unit_path = "$mars/resource-$res/systemd-$oper-unit"; my $unit_path = "$mars/resource-$res/systemd-$oper-unit";
my $unit = get_link($unit_path, 2); my $unit = get_link($unit_path, 2);
if ($unit) { if ($unit) {
if ($old ne "(none)") {
my $response_path = "$mars/resource-$res/userspace/systemd-status-stop-$old";
set_link(0, $response_path);
}
lprint "IMPORTANT: Relying on systemd for $oper of unit '$unit'\n"; lprint "IMPORTANT: Relying on systemd for $oper of unit '$unit'\n";
lprint "IMPORTANT: unit '$unit' wanted at '$new'\n"; lprint "IMPORTANT: unit '$unit' wanted at '$new'\n";
finish_links(); finish_links();
@ -3593,7 +3604,13 @@ sub primary_phase0b {
# open-count will then go down to zero, hopefully somewhen. # open-count will then go down to zero, hopefully somewhen.
my $watch = "$mars/resource-$res/systemd-want"; my $watch = "$mars/resource-$res/systemd-want";
my $action = ""; my $action = "";
$action = "system(\"touch -h $watch\");" if -l $watch; if (-l $watch) {
$action = "system(\"touch -h $watch\");";
my $response_path = "$mars/resource-$res/userspace/systemd-status-stop-$old";
$action .= "\$action_status = get_link(\"$response_path\");";
my $msg = "systemctl stop on peer $old: status=\$action_status\n";
$action .= "ldie \"$msg\" if \$action_status;";
}
check_status($cmd, $res, "open-count", 0, 1, undef, undef, $old, $action); check_status($cmd, $res, "open-count", 0, 1, undef, undef, $old, $action);
} }