mirror of
https://github.com/schoebel/mars
synced 2025-04-01 22:58:34 +00:00
marsadm: new command update-cluster
This commit is contained in:
parent
3ab97f26b5
commit
263d9fa9d7
@ -1267,6 +1267,7 @@ sub get_alive_links {
|
|||||||
my $alive = shift || "alive";
|
my $alive = shift || "alive";
|
||||||
my $hosts = shift || "*";
|
my $hosts = shift || "*";
|
||||||
my $warn = shift || 0;
|
my $warn = shift || 0;
|
||||||
|
my $non_participating = shift || 0;
|
||||||
$res = "*" if $res eq "all";
|
$res = "*" if $res eq "all";
|
||||||
my %cand;
|
my %cand;
|
||||||
foreach my $path (glob("$mars/ips/ip-$hosts")) {
|
foreach my $path (glob("$mars/ips/ip-$hosts")) {
|
||||||
@ -1285,6 +1286,7 @@ sub get_alive_links {
|
|||||||
# peer must be a candiate matching the hosts spec
|
# peer must be a candiate matching the hosts spec
|
||||||
next unless $cand{$peer};
|
next unless $cand{$peer};
|
||||||
# peer must be participating in the same resources
|
# peer must be participating in the same resources
|
||||||
|
if (!$non_participating) {
|
||||||
my @other = glob("$mars/resource-$res/data-$peer");
|
my @other = glob("$mars/resource-$res/data-$peer");
|
||||||
next unless @other;
|
next unless @other;
|
||||||
# I must be participating in some of the _same_ resources
|
# I must be participating in some of the _same_ resources
|
||||||
@ -1296,6 +1298,7 @@ sub get_alive_links {
|
|||||||
$common++ if -e $self;
|
$common++ if -e $self;
|
||||||
}
|
}
|
||||||
next unless $common;
|
next unless $common;
|
||||||
|
}
|
||||||
# OK: remember peer
|
# OK: remember peer
|
||||||
$peers{$peer} = 1;
|
$peers{$peer} = 1;
|
||||||
}
|
}
|
||||||
@ -1419,29 +1422,39 @@ sub wait_cond {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# wait until some communication has occurred
|
# wait until everything is recent
|
||||||
sub wait_cluster {
|
sub wait_cluster {
|
||||||
my $cmd = shift;
|
my $cmd = shift;
|
||||||
my $res = shift || "all";
|
my $res = shift || "all";
|
||||||
my $hosts = shift || "*";
|
my $hosts = shift || "*";
|
||||||
my $abort = shift;
|
my $abort = shift;
|
||||||
$abort = $force unless defined($abort);
|
$abort = $force unless defined($abort);
|
||||||
|
my $trigger_code = shift || ($hosts =~ m/\*/ ? 3 : 2);
|
||||||
|
my $non_participating = ($trigger_code >= 8);
|
||||||
$timeout_val = undef;
|
$timeout_val = undef;
|
||||||
finish_links();
|
finish_links();
|
||||||
|
lprint "Ping $hosts\n";
|
||||||
|
_trigger($trigger_code);
|
||||||
|
my %old_status = get_alive_links($res, "time", $hosts, 0, $non_participating);
|
||||||
|
my $total_count = scalar(keys(%old_status));
|
||||||
my $start_time = mars_time();
|
my $start_time = mars_time();
|
||||||
my %old_status = get_alive_links($res, "time", $hosts);
|
my %progress;
|
||||||
_trigger($hosts =~ m/\*/ ? 3 : 2);
|
foreach my $peer (keys(%old_status)) {
|
||||||
lprint "Ping and wait for answers from " . join(",", sort alphanum_cmp keys(%old_status)) . "\n" if %old_status;
|
$progress{$peer} = $non_participating ? 1 : 0;
|
||||||
|
}
|
||||||
|
lprint "Wait for answers from " . join(",", sort alphanum_cmp keys(%old_status)) . "\n" if %old_status;
|
||||||
my $delta = $window > 0 ? $window / 2 : 30;
|
my $delta = $window > 0 ? $window / 2 : 30;
|
||||||
while (1) {
|
while (1) {
|
||||||
my $dead_count = 0;
|
my $dead_count = 0;
|
||||||
my $alive_count = 0;
|
my $alive_count = 0;
|
||||||
my $unknown_count = 0;
|
my $unknown_count = 0;
|
||||||
my %status = get_alive_links($res, "time", $hosts);
|
my %status = get_alive_links($res, "time", $hosts, 0, $non_participating);;
|
||||||
my $now = mars_time();
|
my $now = mars_time();
|
||||||
foreach my $peer (keys(%status)) {
|
foreach my $peer (keys(%status)) {
|
||||||
next if $peer eq $host;
|
next if $peer eq $host;
|
||||||
if ($status{$peer} ne $old_status{$peer} || $status{$peer} >= $start_time) {
|
if ($status{$peer} > $old_status{$peer} &&
|
||||||
|
$status{$peer} > $start_time &&
|
||||||
|
$progress{$peer}-- <= 0) {
|
||||||
$alive_count++;
|
$alive_count++;
|
||||||
} elsif ($status{$peer} + $delta < $now) {
|
} elsif ($status{$peer} + $delta < $now) {
|
||||||
$dead_count++;
|
$dead_count++;
|
||||||
@ -1449,11 +1462,15 @@ sub wait_cluster {
|
|||||||
$unknown_count++;
|
$unknown_count++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!$unknown_count) {
|
if ($unknown_count) {
|
||||||
if (!$dead_count) {
|
_trigger($trigger_code);
|
||||||
lprint "all $alive_count peer(s) seem to be alive\n";
|
# ensure more progress will happen
|
||||||
|
%old_status = %status;
|
||||||
} else {
|
} else {
|
||||||
lwarn "$alive_count peer(s) seem to be alive, and $dead_count peer(s) seem to be dead / not reachable\n";
|
if (!$dead_count) {
|
||||||
|
lprint "$alive_count/$total_count peer(s) seem to be alive\n";
|
||||||
|
} else {
|
||||||
|
lwarn "$alive_count/$total_count peer(s) seem to be alive, and $dead_count peer(s) seem to be dead / not reachable\n";
|
||||||
ldie "aborting (--force was not given)\n" if $abort;
|
ldie "aborting (--force was not given)\n" if $abort;
|
||||||
}
|
}
|
||||||
last;
|
last;
|
||||||
@ -1471,6 +1488,13 @@ sub wait_cluster_noforce {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sub update_cluster {
|
||||||
|
my $cmd = shift;
|
||||||
|
my $res = shift || "all";
|
||||||
|
lprint "UPDATING $res\n" if $verbose;
|
||||||
|
wait_cluster($cmd, $res, "*", 0, 8);
|
||||||
|
}
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
# syntactic checks
|
# syntactic checks
|
||||||
@ -6609,6 +6633,15 @@ my %cmd_table =
|
|||||||
"Wait until the given condition is met on the resource, locally.",
|
"Wait until the given condition is met on the resource, locally.",
|
||||||
\&wait_cond,
|
\&wait_cond,
|
||||||
],
|
],
|
||||||
|
"update-cluster"
|
||||||
|
=> [
|
||||||
|
"usage: update-cluster [<resource_name>]",
|
||||||
|
"Fetch all the links from all joined cluster hosts.",
|
||||||
|
"Use this between create-resource and join-resource.",
|
||||||
|
"NOTICE: this is extremely useful for avoiding races when scripting",
|
||||||
|
"in a cluster.",
|
||||||
|
\&update_cluster,
|
||||||
|
],
|
||||||
|
|
||||||
# compatible keywords (or their derivatives)
|
# compatible keywords (or their derivatives)
|
||||||
"attach"
|
"attach"
|
||||||
@ -7279,7 +7312,7 @@ if ($cmd =~ "show|cron") {
|
|||||||
ldie "argument '$res' isn't numeric\n" unless $res =~ m/^[0-9.]+$/;
|
ldie "argument '$res' isn't numeric\n" unless $res =~ m/^[0-9.]+$/;
|
||||||
} elsif ($cmd =~ m/^(join|merge)-cluster$/) {
|
} elsif ($cmd =~ m/^(join|merge)-cluster$/) {
|
||||||
$res = shift @args || helplist "peer argument is missing\n";
|
$res = shift @args || helplist "peer argument is missing\n";
|
||||||
} elsif (!($cmd =~ m/^(create|split|leave|wait)-cluster|merge-cluster-list|create-uuid|cat|[a-z]+-file|trigger/)) {
|
} elsif (!($cmd =~ m/^(create|split|leave|wait|update)-cluster|merge-cluster-list|create-uuid|cat|[a-z]+-file|trigger/)) {
|
||||||
$res = shift @args || helplist "resource argument is missing\n";
|
$res = shift @args || helplist "resource argument is missing\n";
|
||||||
check_id_list($res);
|
check_id_list($res);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user