mirror of https://github.com/schoebel/mars
marsadm: implement prosumer handover on primary handover
This commit is contained in:
parent
c532ff8f8a
commit
e6d6a9b2fe
|
@ -3062,7 +3062,7 @@ sub check_primary {
|
|||
my $fail = 0;
|
||||
my $lnk = "$mars/resource-$res/actual-$host/is-primary";
|
||||
my $is_primary = get_link($lnk, 1);
|
||||
if (!$is_primary) { # give it a second chance
|
||||
if (!$is_primary && todo_local($cmd, $res)) { # give it a second chance
|
||||
$is_primary = device_exists($res);
|
||||
}
|
||||
unless ($is_primary) {
|
||||
|
@ -5127,10 +5127,15 @@ sub create_res {
|
|||
set_link("1", "$todo/connect");
|
||||
set_link("1", "$todo/sync");
|
||||
set_link("1", "$todo/allow-replay");
|
||||
set_link("0", "$todo/detach-device");
|
||||
set_link("0", "$todo/kill-device");
|
||||
set_link("(local)", "$todo/exports");
|
||||
set_link("0", "$todo/multi-prosumer");
|
||||
unlink("$resdir/syncstatus-$host");
|
||||
|
||||
my $replay_ok = 1;
|
||||
if ($create) {
|
||||
set_link("(local)", "$resdir/prosumer");
|
||||
set_link($host, "$resdir/primary");
|
||||
set_link($host, "$resdir/userspace/last-primary");
|
||||
set_link($size, "$resdir/syncstatus-$host");
|
||||
|
@ -6234,6 +6239,8 @@ sub _primary_res {
|
|||
lprint "designated primary changed from '$old' to '$new'\n";
|
||||
}
|
||||
|
||||
my %gate_set;
|
||||
|
||||
sub _set_gate {
|
||||
my ($cmd, $res, $peers) = @_;
|
||||
my $touched = 0;
|
||||
|
@ -6241,6 +6248,7 @@ sub _set_gate {
|
|||
lprint "Closing gate at '$peer'\n";
|
||||
my $lnk = "$mars/resource-$res/todo-$peer/gate-mask";
|
||||
set_link($gate_code, $lnk);
|
||||
$gate_set{$peer} = $gate_code;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6253,6 +6261,9 @@ sub _reset_gate {
|
|||
next if $val eq "0x0";
|
||||
lprint "Resetting gate of '$res'\n";
|
||||
set_link("0x0", $lnk);
|
||||
$lnk =~ m:/todo-([^/]*)/:;
|
||||
my $peer = $1;
|
||||
$gate_set{$peer} = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6281,13 +6292,28 @@ sub _reset_current_primary {
|
|||
# check whether primary/secondary switching is possible at all
|
||||
sub primary_phase0 {
|
||||
my ($cmd, $res) = @_;
|
||||
ldie "cannot switch primary: mars kernel module is not loaded\n" unless ($cmd eq "secondary" || -d "/proc/sys/mars");
|
||||
ldie "cannot switch primary: mars kernel module is not loaded\n" unless ($cmd =~ m/secondary/ || -d "/proc/sys/mars");
|
||||
if ($force) {
|
||||
check_todo($cmd, $res, "fetch", 0, 0);
|
||||
}
|
||||
_reset_gate(@_);
|
||||
_reset_new_primary(@_);
|
||||
finish_links();
|
||||
_trigger(3);
|
||||
my $old = _get_designated_primary($cmd, $res, -1);
|
||||
my $new = parse_list_spec($cmd_suffix{$cmd}, undef, $host);
|
||||
ldie "Cannot work with multiple primaries in parallel\n" if $new =~ m/[+]/;
|
||||
if ($new && $new ne "(local)") {
|
||||
# Provisionary: map the new syntax to the old one
|
||||
if ($new eq "(none)") {
|
||||
$cmd = "secondary";
|
||||
} else {
|
||||
# provisionary, to disappear
|
||||
$host = $new;
|
||||
}
|
||||
}
|
||||
lprint "Current designated primary: $old\n";
|
||||
if ($cmd eq "primary") {
|
||||
if ($cmd =~ m/primary/) {
|
||||
if ($host ne $old) {
|
||||
lprint "Allowing handover in cases of sync: ignore_sync=$ignore_sync\n" if $ignore_sync;
|
||||
check_sync_finished($res, $host, $ignore_sync);
|
||||
|
@ -6315,19 +6341,27 @@ sub primary_phase0 {
|
|||
ldie "Won't switch to avoid unnoticed data loss. You may however do a 'primary --force'.\n" unless $force;
|
||||
}
|
||||
}
|
||||
if (!todo_local(@_)) {
|
||||
my $status = _check_logger_states($cmd, $res, $old, $new, 0, 1);
|
||||
if ($status) {
|
||||
ldie "Apparently hosts '$old' and '$new' seem to work on different storage ids\n";
|
||||
}
|
||||
}
|
||||
my $want_path = "$mars/resource-$res/systemd-want";
|
||||
my $want = get_link($want_path, 2);
|
||||
if ($want) {
|
||||
my $new;
|
||||
my $oper;
|
||||
if ($cmd eq "primary") {
|
||||
if ($cmd =~ m/primary/) {
|
||||
$new = $host;
|
||||
$oper = "start";
|
||||
} else {
|
||||
$new = "(none)";
|
||||
$oper = "stop";
|
||||
}
|
||||
set_link($new, $want_path);
|
||||
if (todo_local($cmd, $res)) {
|
||||
set_link($new, $want_path);
|
||||
}
|
||||
my $unit_path = "$mars/resource-$res/systemd-$oper-unit";
|
||||
my $unit = get_link($unit_path, 2);
|
||||
if ($unit) {
|
||||
|
@ -6341,11 +6375,13 @@ sub primary_phase0 {
|
|||
return 0;
|
||||
}
|
||||
}
|
||||
return 0 if ($old eq $host and $cmd eq "primary");
|
||||
return 0 if ($old eq $host && $cmd =~ m/primary/);
|
||||
return 0 if $old eq "(none)";
|
||||
my $open_count_path = "$mars/resource-$res/actual-$old/open-count";
|
||||
my $device_in_use = get_link($open_count_path, 1);
|
||||
if ($device_in_use) {
|
||||
if ($device_in_use &&
|
||||
($cmd =~ m/secondary/ ||
|
||||
todo_local(@_))) {
|
||||
my $dev = device_name($res, $old);
|
||||
lwarn "device '$dev' for resource '$res' is $device_in_use times in use on primary host '$old'\n";
|
||||
ldie "first you must umount/close the device (on host '$old')\n" unless $force;
|
||||
|
@ -6368,6 +6404,19 @@ sub primary_phase0a {
|
|||
lprint "Prepare new primary '$new' handover\n";
|
||||
_switch($cmd, $res, "$mars/resource-$res/todo-$new/fetch", 1);
|
||||
}
|
||||
if (!todo_local(@_)) {
|
||||
my $prosumers = get_prosumers(@_);
|
||||
$prosumers = "(none)" if !$prosumers;
|
||||
my $lnk = "$mars/resource-$res/new-primary";
|
||||
my $new = $host;
|
||||
lprint "Prepare new primary '$new'\n";
|
||||
set_link($new, $lnk);
|
||||
$lnk = "$mars/resource-$res/todo-$new/exports";
|
||||
set_link($prosumers, $lnk);
|
||||
$lnk = "$mars/resource-$res/todo-$new/multi-prosumer";
|
||||
my $val = ($prosumers =~ m/[+]/) ? "1" : "0";
|
||||
set_link($val, $lnk);
|
||||
}
|
||||
finish_links();
|
||||
return 0 unless $force;
|
||||
wait_cond($cmd, $res, "is-fetch-off");
|
||||
|
@ -6416,36 +6465,218 @@ sub primary_phase0b {
|
|||
return 0;
|
||||
}
|
||||
|
||||
sub compensate_primary_fail_prepared {
|
||||
my ($cmd, $res) = @_;
|
||||
lprint "Resetting gate and primary handover infrastucture...\n";
|
||||
_reset_gate($cmd, $res);
|
||||
_reset_new_primary($cmd, $res);
|
||||
finish_links();
|
||||
}
|
||||
|
||||
sub compensate_primary_fail_switched {
|
||||
my ($cmd, $res) = @_;
|
||||
compensate_primary_fail_prepared(@_);
|
||||
lprint "Switching back to previous primary...\n";
|
||||
_reset_current_primary($cmd, $res);
|
||||
lprint "Sorry, I cannot prevent SPLIT BRAIN which may follow now.\n";
|
||||
}
|
||||
|
||||
# when necessary, switch to secondary (intermediately)
|
||||
# prosumer: use gate instead
|
||||
sub primary_phase1 {
|
||||
my ($cmd, $res) = @_;
|
||||
return 0 if ($force and $cmd eq "primary");
|
||||
my $old = _get_designated_primary($cmd, $res, -1);
|
||||
return 0 if ($old eq $host and $cmd eq "primary");
|
||||
if (!todo_local(@_)) {
|
||||
finish_links();
|
||||
my $prosumers = get_prosumers(@_);
|
||||
if (!$force && $cmd =~ m/primary/) {
|
||||
_set_gate($cmd, $res, $prosumers);
|
||||
}
|
||||
foreach my $peer (split("\\+", $prosumers)) {
|
||||
my $detach_device = "0";
|
||||
if ($cmd =~ m/primary/) {
|
||||
$allow_fail_action = \&compensate_primary_fail_prepared;
|
||||
lprint "Activating device at '$peer'\n";
|
||||
} elsif ($cmd =~ m/secondary/) {
|
||||
$detach_device = "1";
|
||||
lprint "Removing device at '$peer'\n";
|
||||
}
|
||||
my $lnk = "$mars/resource-$res/todo-$peer/detach-device";
|
||||
set_link($detach_device, $lnk);
|
||||
}
|
||||
finish_links();
|
||||
return 0 unless $cmd =~ m/secondary/;
|
||||
}
|
||||
return 0 if ($force && $cmd =~ m/primary/);
|
||||
my $old = $pri_old{$res};
|
||||
return 0 if ($old eq $host && $cmd =~ m/primary/);
|
||||
my $new = "(none)";
|
||||
if (!$force and $cmd eq "primary") {
|
||||
if (!$force && $cmd =~ m/primary/) {
|
||||
my $status = try_to_avoid_splitbrain($cmd, $res, $old);
|
||||
return $status if $status;
|
||||
}
|
||||
return 0 if $old eq $new;
|
||||
_primary_res($res, $new, $old);
|
||||
finish_links();
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub _check_logger_states {
|
||||
my ($cmd, $res, $old, $new, $after_stamp, $only_id) = @_;
|
||||
my $old_path = "$mars/resource-$res/actual-$old/state-logger";
|
||||
my $new_path = "$mars/resource-$res/actual-$new/state-logger";
|
||||
# race prevention: stamps must be retrieved _first_
|
||||
my $old_stamp = get_link_stamp($old_path);
|
||||
my $new_stamp = get_link_stamp($new_path);
|
||||
my $old_state = get_link($old_path);
|
||||
my $new_state = get_link($new_path);
|
||||
lprint "Old $old_stamp logger state: '$old_state' at $old\n" if $verbose;
|
||||
lprint "New $new_stamp logger state: '$new_state' at $new\n" if $verbose;
|
||||
if (!$old_state || !$new_state) {
|
||||
lwarn "UNEXPECTED: undefined logger state\n";
|
||||
return 1;
|
||||
}
|
||||
if ($only_id) {
|
||||
$old_state =~ m/^[0-9.]+,([^,]+),/;
|
||||
my $id_old = $1;
|
||||
$new_state =~ m/^[0-9.]+,([^,]+),/;
|
||||
my $id_new = $1;
|
||||
if (!$id_old || !$id_new || $id_new ne $id_old) {
|
||||
lprint "stor_id '$id_new' != '$id_old'\n";
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if ($after_stamp) {
|
||||
my $status = 0;
|
||||
my $old_diff = $old_stamp - $after_stamp;
|
||||
my $new_diff = $new_stamp - $after_stamp;
|
||||
if ($old_diff <= 0) {
|
||||
lprint "Old logger stamp is not yet recent ($old_diff s)\n";
|
||||
$status = 1;
|
||||
}
|
||||
if ($new_diff <= 0) {
|
||||
lprint "New logger stamp is not yet recent ($new_diff s)\n";
|
||||
$status = 1;
|
||||
}
|
||||
if ($status) {
|
||||
_trigger(3);
|
||||
return 1;
|
||||
}
|
||||
lprint "Logger stamps are recent (old=$old_diff, new=$new_diff)\n";
|
||||
}
|
||||
$old_state =~ m/,([0-9])$/;
|
||||
my $old_dirty = $1;
|
||||
$new_state =~ m/,([0-9])$/;
|
||||
my $new_dirty = $1;
|
||||
if ($old_dirty) {
|
||||
lprint "Old logger stamp is dirty\n";
|
||||
return 1;
|
||||
}
|
||||
if ($new_dirty) {
|
||||
lprint "New logger stamp is dirty\n";
|
||||
return 1;
|
||||
}
|
||||
# strip off the stor_epoch, they are necessarily different
|
||||
$old_state =~ s/^[0-9]+\.[0-9]+,//;
|
||||
$new_state =~ s/^[0-9]+\.[0-9]+,//;
|
||||
if ($old_state ne $new_state) {
|
||||
lprint "Logger states are different\n";
|
||||
return 1;
|
||||
}
|
||||
lprint "Logger states OK\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
my $phase2_waited = 0;
|
||||
my %gate_stamps;
|
||||
|
||||
sub primary_phase1b {
|
||||
my ($cmd, $res) = @_;
|
||||
$phase2_waited = 0;
|
||||
finish_links();
|
||||
return 0 if $force;
|
||||
my $check_logger = 0;
|
||||
my $this_stamp = 0;
|
||||
my $old = _get_designated_primary($cmd, $res, -1);
|
||||
my $status = check_primary_gone($cmd, $res, $old);
|
||||
return $status if $status;
|
||||
if (!$force and $cmd eq "primary") {
|
||||
my $new = $host;
|
||||
if (todo_local(@_)) {
|
||||
my $status = check_primary_gone($cmd, $res, $old);
|
||||
return $status if $status;
|
||||
} elsif ($cmd =~ m/primary/ &&
|
||||
_get_designated_primary($cmd, $res, 0) ne "(none)") {
|
||||
# check that gates have actually closed
|
||||
_trigger(3);
|
||||
my $prosumers = get_prosumers(@_);
|
||||
foreach my $peer (split("\\+", $prosumers)) {
|
||||
my $lnk = "$mars/resource-$res/actual-$peer/gate-on";
|
||||
my $status = get_link($lnk, 1);
|
||||
# When gate is not on, assume OrphanProsumer and ignore it.
|
||||
# This can happen during incidents.
|
||||
if (!defined($status) || !$status) {
|
||||
lprint "Gate at '$peer' is not active.\n";
|
||||
if (!$gate_set{$peer}) {
|
||||
lprint "Ignoring inactive gate at '$peer'.\n";
|
||||
next;
|
||||
}
|
||||
my $open_count_path = "$mars/resource-$res/actual-$peer/open-count";
|
||||
my $device_in_use = get_link($open_count_path, 1);
|
||||
if (!$device_in_use) {
|
||||
lprint "Ignoring unused gate at '$peer'.\n";
|
||||
next;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
$lnk = "$mars/resource-$res/actual-$peer/gate-mask";
|
||||
$status = get_link($lnk, 1);
|
||||
if (!defined($status) || $status eq "") {
|
||||
lwarn "Gate at '$peer' does not respond.\n";
|
||||
delete $gate_stamps{$res};
|
||||
return 1;
|
||||
}
|
||||
# remove leading 0
|
||||
$status =~ s/^0x0*(.)/0x$1/;
|
||||
lprint "Gate at '$peer' is '$status'\n";
|
||||
if ($status ne $gate_code) {
|
||||
delete $gate_stamps{$res};
|
||||
return 1;
|
||||
}
|
||||
# Get timestamp of lastly closed gate
|
||||
if (!defined($gate_stamps{$res})) {
|
||||
my $stamp = get_link_stamp($lnk);
|
||||
$this_stamp = $stamp if $stamp > $this_stamp;
|
||||
}
|
||||
lprint "Gate at '$peer' is closed.\n";
|
||||
}
|
||||
# Check logger states
|
||||
# First check validitiy
|
||||
my $status = _check_logger_states($cmd, $res, $old, $new);
|
||||
if ($status) {
|
||||
delete $gate_stamps{$res};
|
||||
return $status;
|
||||
}
|
||||
# Now remember the first occurence in time
|
||||
if (!defined($gate_stamps{$res}) && $this_stamp) {
|
||||
$gate_stamps{$res} = $this_stamp;
|
||||
}
|
||||
# Check again, this time also checking the timestamps
|
||||
$check_logger = 1;
|
||||
my $after_stamp = $gate_stamps{$res};
|
||||
$status = _check_logger_states($cmd, $res, $old, $new, $after_stamp);
|
||||
return $status if $status;
|
||||
}
|
||||
if (!$force && $cmd =~ m/primary/) {
|
||||
my $status = try_to_avoid_splitbrain($cmd, $res, $old);
|
||||
return $status if $status;
|
||||
}
|
||||
if ($check_logger) {
|
||||
# paranoia: check once again after split-brain detection
|
||||
my $after_stamp = $gate_stamps{$res};
|
||||
my $status = _check_logger_states($cmd, $res, $old, $new, $after_stamp);
|
||||
return $status if $status;
|
||||
}
|
||||
my $lnk = "$mars/resource-$res/todo-$host/detach-device";
|
||||
my $detach_device = ($cmd =~ m/secondary/) ? "1" : "0";
|
||||
set_link($detach_device, $lnk);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -6453,7 +6684,9 @@ sub primary_phase1b {
|
|||
sub primary_phase2 {
|
||||
my ($cmd, $res) = @_;
|
||||
return 0 if $force;
|
||||
return 0 unless $cmd eq "primary";
|
||||
return 0 unless $cmd =~ m/primary/;
|
||||
finish_links();
|
||||
return if !todo_local(@_);
|
||||
wait_cluster($cmd) if !$phase2_waited++;
|
||||
my $old = $pri_old{$res};
|
||||
return check_primary_gone($cmd, $res, $old);
|
||||
|
@ -6472,23 +6705,193 @@ sub primary_phase2b {
|
|||
# when necessary, switch to primary
|
||||
sub primary_phase3 {
|
||||
my ($cmd, $res) = @_;
|
||||
return 0 unless $cmd eq "primary";
|
||||
return 0 unless $cmd =~ m/primary/;
|
||||
my $old = _get_designated_primary($cmd, $res, -1);
|
||||
$pri_old{$res} = $old;
|
||||
my $new = $host;
|
||||
_primary_res($res, $new, $old);
|
||||
$allow_fail_action = \&compensate_primary_fail_switched;
|
||||
my $prosumers = get_prosumers(@_);
|
||||
if ($prosumers eq "(local)") {
|
||||
my $lnk = "$mars/resource-$res/todo-$new/detach-device";
|
||||
set_link("0", $lnk);
|
||||
$lnk = "$mars/resource-$res/todo-$new/exports";
|
||||
set_link("(local)", $lnk);
|
||||
$lnk = "$mars/resource-$res/todo-$new/multi-prosumer";
|
||||
set_link("0", $lnk);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub primary_phase3b {
|
||||
my ($cmd, $res) = @_;
|
||||
finish_links();
|
||||
if (!$force && !todo_local(@_)) {
|
||||
# check that prosumer handover is prepared for the right primary
|
||||
my $primary = _get_designated_primary($cmd, $res, -1);
|
||||
my $prosumers = get_prosumers(@_);
|
||||
_trigger(3);
|
||||
foreach my $peer (split("\\+", $prosumers)) {
|
||||
my $lnk = "$mars/resource-$res/actual-$peer/prosumer-on";
|
||||
my $val = get_link($lnk, 1);
|
||||
unless ($val) {
|
||||
lprint "Prosumer at '$peer' is not active\n";
|
||||
return 0;
|
||||
}
|
||||
$lnk = "$mars/resource-$res/actual-$peer/prosumer-peer-path";
|
||||
$val = get_link($lnk, 1);
|
||||
return 1 unless $val;
|
||||
$val =~ m/@(.+?)(?:$|:)/;
|
||||
my $connected = $1;
|
||||
lprint "Prosumer at '$peer' is connected with '$connected'\n";
|
||||
if ($connected eq $primary) {
|
||||
lprint "No handover necessary for '$peer'\n";
|
||||
next;
|
||||
}
|
||||
$lnk = "$mars/resource-$res/actual-$peer/new-prosumer-activated";
|
||||
$val = get_link($lnk, 1);
|
||||
if (!$val) {
|
||||
lprint "Prosumer handover not yet prepared at '$peer'\n";
|
||||
return 1;
|
||||
}
|
||||
$lnk = "$mars/resource-$res/actual-$peer/new-prosumer-peer-path";
|
||||
$val = get_link($lnk, 1);
|
||||
return 1 unless $val;
|
||||
$val =~ m/@(.+?)(?:$|:)/;
|
||||
$connected = $1;
|
||||
lprint "Prosumer handover preparation at '$peer' is connected with '$connected'\n";
|
||||
unless ($connected eq $primary) {
|
||||
lprint "Waiting for handover prepare at '$peer'\n";
|
||||
return 1;
|
||||
}
|
||||
lprint "OK handover prepared at '$peer'\n";
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
# activate prosumer-handover
|
||||
sub primary_phase3c {
|
||||
my ($cmd, $res) = @_;
|
||||
if (!$force && !todo_local(@_)) {
|
||||
my $prosumers = get_prosumers(@_);
|
||||
foreach my $peer (split("\\+", $prosumers)) {
|
||||
lprint "Activating prosumer handover at '$peer'\n";
|
||||
my $lnk = "$mars/resource-$res/todo-$peer/handover-prosumer";
|
||||
set_link("1", $lnk);
|
||||
}
|
||||
finish_links();
|
||||
$allow_fail_action = \&compensate_primary_fail_prepared;
|
||||
_trigger(3);
|
||||
}
|
||||
}
|
||||
|
||||
# wait for prosumer-handover finished
|
||||
sub primary_phase3d {
|
||||
my ($cmd, $res) = @_;
|
||||
return 0 if $force || todo_local(@_);
|
||||
my $old = $pri_old{$res};
|
||||
my $new = $host;
|
||||
my $prosumers = get_prosumers(@_);
|
||||
foreach my $peer (split("\\+", $prosumers)) {
|
||||
my $lnk = "$mars/resource-$res/actual-$peer/prosumer-on";
|
||||
my $val = get_link($lnk, 1);
|
||||
unless ($val) {
|
||||
lprint "Prosumer at '$peer' is not active\n";
|
||||
return 0;
|
||||
}
|
||||
$lnk = "$mars/resource-$res/actual-$peer/new-prosumer-activated";
|
||||
$val = get_link($lnk, 1);
|
||||
if ($val) {
|
||||
lprint "Prosumer handover not yet finished at '$peer'\n";
|
||||
return 1;
|
||||
}
|
||||
$lnk = "$mars/resource-$res/actual-$peer/prosumer-peer-path";
|
||||
$val = get_link($lnk, 1);
|
||||
unless ($val) {
|
||||
lwarn "UNEXPECTED: host '$peer' suddenly reports no peer path anymore.\n";
|
||||
return 1;
|
||||
}
|
||||
$val =~ m/@(.+?)(?:$|:)/;
|
||||
my $connected = $1;
|
||||
lprint "Prosumer at '$peer' is connected with '$connected'\n";
|
||||
if ($connected ne $new) {
|
||||
lwarn "UNEXPECTED: host '$peer' is connected with '$connected' instead of '$new'.\n";
|
||||
return 1;
|
||||
}
|
||||
lprint "Prosumer handover finished at '$peer'\n";
|
||||
}
|
||||
# this is time critical
|
||||
_reset_gate(@_);
|
||||
finish_links();
|
||||
_trigger(3);
|
||||
return 0;
|
||||
}
|
||||
|
||||
# wait for gate closed and reset old primary exports
|
||||
sub primary_phase3e {
|
||||
my ($cmd, $res) = @_;
|
||||
return 0 if $force || todo_local(@_);
|
||||
# check that gates have actually closed
|
||||
my $prosumers = get_prosumers(@_);
|
||||
foreach my $peer (split("\\+", $prosumers)) {
|
||||
my $lnk = "$mars/resource-$res/actual-$peer/prosumer-on";
|
||||
my $val = get_link($lnk, 1);
|
||||
unless ($val) {
|
||||
lprint "Prosumer at '$peer' is not active\n";
|
||||
return 0;
|
||||
}
|
||||
$lnk = "$mars/resource-$res/actual-$peer/gate-mask";
|
||||
my $status = get_link($lnk, 1);
|
||||
if (!defined($status) || $status eq "") {
|
||||
lwarn "Gate at '$peer' does not respond.\n";
|
||||
_trigger(3);
|
||||
return 1;
|
||||
}
|
||||
# remove leading 0
|
||||
$status =~ s/^0x0*(.)/0x$1/;
|
||||
lprint "Gate at '$peer' is '$status'\n";
|
||||
if ($status ne "0x0") {
|
||||
_trigger(3);
|
||||
return 1;
|
||||
}
|
||||
lprint "Gate at '$peer' is open\n";
|
||||
}
|
||||
finish_links();
|
||||
_trigger(3);
|
||||
$allow_fail_action = undef;
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub primary_phase3f {
|
||||
my ($cmd, $res) = @_;
|
||||
if (!$force && !todo_local(@_)) {
|
||||
my $old = $pri_old{$res};
|
||||
my $new = $host;
|
||||
if ($old ne $new) {
|
||||
my $lnk = "$mars/resource-$res/actual-$old/is-primary";
|
||||
my $val = get_link($lnk, 1);
|
||||
if ($val) {
|
||||
_trigger(3);
|
||||
lprint "Old primary '$old' not yet gone\n";
|
||||
return 1;
|
||||
}
|
||||
lprint "Old primary '$old' is gone.\n";
|
||||
} else {
|
||||
lprint "Old primary '$old' is the new one.\n";
|
||||
}
|
||||
}
|
||||
_reset_new_primary(@_);
|
||||
return 0;
|
||||
}
|
||||
|
||||
# wait for device to appear / disappear
|
||||
sub primary_phase4 {
|
||||
my ($cmd, $res) = @_;
|
||||
if($cmd eq "secondary") {
|
||||
check_mars_device($cmd, $res, 1, 1);
|
||||
if($cmd =~ m/secondary/) {
|
||||
if (todo_local(@_)) {
|
||||
check_mars_device($cmd, $res, 1, 1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
my $ok = detect_splitbrain($res, 1);
|
||||
|
@ -6507,7 +6910,17 @@ sub primary_phase4 {
|
|||
lwarn "\n";
|
||||
return 0;
|
||||
}
|
||||
check_mars_device($cmd, $res, 1, 0);
|
||||
if (todo_local(@_)) {
|
||||
check_mars_device($cmd, $res, 1, 0);
|
||||
} elsif (!$force) {
|
||||
my $old = $pri_old{$res};
|
||||
my $new = $host;
|
||||
lprint "Unexporting old primary '$old' for safety.\n";
|
||||
my $lnk = "$mars/resource-$res/todo-$old/exports";
|
||||
set_link("(none)", $lnk);
|
||||
$lnk = "$mars/resource-$res/todo-$old/multi-prosumer";
|
||||
set_link("0", $lnk);
|
||||
}
|
||||
# new switch semantics, when nothing has failed before: up
|
||||
up_res_phase1(@_);
|
||||
return 0;
|
||||
|
@ -9798,6 +10211,7 @@ my %cmd_table =
|
|||
],
|
||||
\&primary_phase0,
|
||||
"check preconditions",
|
||||
|
||||
"FORK",
|
||||
\&primary_phase0a,
|
||||
"conditionally wait for fetch off",
|
||||
|
@ -9806,21 +10220,44 @@ my %cmd_table =
|
|||
"LOOP",
|
||||
\&primary_phase1,
|
||||
"leave primary state",
|
||||
|
||||
"LOOP",
|
||||
\&primary_phase1b,
|
||||
"trigger remote",
|
||||
|
||||
"LOOP",
|
||||
\&primary_phase2,
|
||||
"wait for cluster when necessary",
|
||||
|
||||
"LOOP",
|
||||
\&primary_phase2b,
|
||||
"avoid split brain",
|
||||
|
||||
\&primary_phase3,
|
||||
"switch to primary",
|
||||
|
||||
"LOOP",
|
||||
\&primary_phase3b,
|
||||
"trigger remote",
|
||||
|
||||
\&primary_phase3c,
|
||||
"trigger prosumer handover",
|
||||
|
||||
"LOOP",
|
||||
\&primary_phase3d,
|
||||
"wait for prosumer handover and open gate",
|
||||
|
||||
"LOOP",
|
||||
\&primary_phase3e,
|
||||
"wait for gate open and reset old primary exports",
|
||||
|
||||
"LOOP",
|
||||
\&primary_phase3f,
|
||||
"wait for primary gone",
|
||||
|
||||
\&primary_phase4,
|
||||
"wait for device",
|
||||
|
||||
"LOOP",
|
||||
\&primary_phase5,
|
||||
"trigger systemd",
|
||||
|
|
Loading…
Reference in New Issue