marsadm: new command split-cluster

This commit is contained in:
Thomas Schoebel-Theuer 2017-07-26 07:43:23 +02:00 committed by Thomas Schoebel-Theuer
parent 2a8eefa167
commit 21e680a08f

View File

@ -98,7 +98,7 @@ sub lwarn {
my $Id = '$Id$ ';
my $user_version = 0.1;
my $marsadm_version = 1; # some rough hint at newer features
my $marsadm_version = 1.1; # some rough hint at newer features
my $mars = "/mars";
my $host = `uname -n` or ldie "cannot determine my network node name\n";
chomp $host;
@ -2078,6 +2078,73 @@ sub create_res {
}
}
sub split_cluster {
my ($cmd) = @_;
# checks
lprint "Checking...\n";
my $ips_backup = "$backup_dir/ips-backup";
system("mkdir -p $ips_backup") and ldie "OOps";
system("cp -a $mars/ips/* $ips_backup/");
my $safeguard_cmd = "(( \$(find $mars/ -name 'delete-*' | wc -l) == 0 ))";
foreach my $peer (glob("$ips_backup/ip-*")) {
$peer =~ s:^$ips_backup/ip-::;
ssh_cmd($peer, $ssh_probe);
ssh_cmd($peer, $safeguard_cmd);
}
lprint "Creating IP backups...\n";
# create ips backup
foreach my $peer (glob("$ips_backup/ip-*")) {
$peer =~ s:^$ips_backup/ip-::;
next if $peer eq $host;
rsync_cmd($peer, "--max-size=1 --update $peer:$mars/ips/ $ips_backup/", 1);
}
system("cp -a $mars/ips/* $ips_backup/");
my @peers = map { $_ =~ s:^$ips_backup/ip-::; $_ } (glob("$ips_backup/ip-*"));
ldie "Empty peer list\n" unless @peers;
foreach my $peer (@peers) {
next if $peer eq $host;
rsync_cmd($peer, "--max-size=1 --update $ips_backup $peer:$backup_dir/");
}
# delete all hosts everywhere
lprint "Deleting all peers IPs everywhere...\n";
foreach my $peer (@peers) {
print "--- peer $peer\n";
my $cmd = "rm -rf $mars/ips";
ssh_cmd($peer, $cmd, 1);
}
# wait until all peer threads are gone
lprint "Waiting...\n";
sleep(10);
my $cond_cmd = "(( \$(ps ax | grep mars_peer | grep -v grep | wc -l) == 0 ))";
for (my $loop = 0; $loop < $timeout; $loop++) {
my $running = 0;
foreach my $peer (@peers) {
my $status = ssh_cmd($peer, $cond_cmd, 1);
$running++ if $status;
}
last if !$running;
print "--- $running peers have running peer threads.\n";
sleep(1);
}
for (my $loop = 0; $loop < 3; $loop++) {
# re-create all groups everywhere
lprint "RE-CREATE peer IPs...\n";
foreach my $peer (@peers) {
print "--- peer $peer\n";
my $cmd = "rm -f $mars/todo-global/delete* $mars/actual-*/msg-*connection-*; ";
$cmd .= "for i in $mars/resource-*; do if ! [[ -e \$i/data-$peer ]] && ! [[ -e \$i/replay-$peer ]]; then rm -rf $backup_dir/\${i##*/}; mv \$i $backup_dir/; fi; done; ";
$cmd .= "mkdir -p $mars/ips; ";
my $sub_list = "{ for i in \$(ls $mars/resource-*/data-$peer | cut -d/ -f1-3 | sort -u); do (cd \$i; ls data-*); done; echo x-$peer; }";
my $sub_cmd = "echo RESTORE IP \$j; cp -a $ips_backup/ip-\$j $mars/ips/";
$cmd .= "for j in \$($sub_list | cut -d- -f2- | sort -u); do $sub_cmd; done";
lprint "$cmd\n";
ssh_cmd($peer, $cmd, 1);
}
}
}
sub leave_res_phase0 {
my ($cmd, $res) = @_;
check_not_primary(@_) unless $force;
@ -4688,6 +4755,14 @@ my %cmd_table =
"possible.",
\&merge_cluster,
],
"split-cluster"
=> [
"usage: split-cluster (no parameters)",
"NOT OFFICIALLY SUPPORTED - ONLY FOR EXPERTS.",
"RTFS = Read The Fucking Sourcecode.",
"Use this only if you know what you are doing.",
\&split_cluster,
],
"leave-cluster"
=> [
"usage: leave-cluster (no parameters)",
@ -5578,7 +5653,7 @@ if ($cmd =~ "show") {
ldie "argument '$res' isn't numeric\n" unless $res =~ m/^[0-9.]+$/;
} elsif ($cmd =~ m/^(join|merge)-cluster$/) {
$res = shift @args || helplist "peer argument is missing\n";
} elsif (!($cmd =~ m/^(create|leave|wait)-cluster|merge-cluster-list|create-uuid|cat|[a-z]+-file/)) {
} elsif (!($cmd =~ m/^(create|split|leave|wait)-cluster|merge-cluster-list|create-uuid|cat|[a-z]+-file/)) {
$res = shift @args || helplist "resource argument is missing\n";
check_id($res);
}