mirror of
https://github.com/schoebel/mars
synced 2025-02-07 15:52:02 +00:00
c861a0d202
DRBD does not allow to deliberately provoke a SPLIT BRAIN in connected state. Thus MARS Light cannot be required to support such silly things in any way.
1897 lines
58 KiB
Perl
Executable File
1897 lines
58 KiB
Perl
Executable File
#!/usr/bin/perl -w
|
|
# (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG
|
|
|
|
use strict;
|
|
use English;
|
|
use warnings;
|
|
|
|
umask 0077;
|
|
|
|
##################################################################
|
|
|
|
# messaging
|
|
|
|
my $error_count = 0;
|
|
my $notify = "";
|
|
|
|
sub lprint {
|
|
my ($text) = @_;
|
|
print $text;
|
|
if ($notify) {
|
|
system("/usr/bin/logger -t marsadm \"$notify $text\"");
|
|
}
|
|
}
|
|
|
|
sub ldie {
|
|
my ($text) = @_;
|
|
$error_count++;
|
|
lprint "DYING: $text";
|
|
die "\n";
|
|
}
|
|
|
|
sub lwarn {
|
|
my ($text) = @_;
|
|
lprint "WARNING: $text";
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# low-level infrastructure
|
|
|
|
my @link_list = ();
|
|
my %link_hash;
|
|
my $verbose = 0;
|
|
|
|
sub get_link {
|
|
my ($path, $unchecked) = @_;
|
|
my $result = readlink($path);
|
|
if (!defined($result)) {
|
|
ldie "cannot read symlink '$path'\n" unless $unchecked;
|
|
lwarn "cannot read symlink '$path'\n" if $unchecked == 1;
|
|
}
|
|
return $result;
|
|
}
|
|
|
|
sub is_link_recent {
|
|
my ($path) = @_;
|
|
my @stat = lstat($path);
|
|
return 0 if (!@stat);
|
|
return 1 if $stat[9] + 15 >= mars_time();
|
|
return 0;
|
|
}
|
|
|
|
sub to_tmp {
|
|
my $path = shift;
|
|
$path =~ s:^(.*)/:$1/.tmp.:;
|
|
return $path;
|
|
}
|
|
|
|
sub from_tmp {
|
|
my $path = shift;
|
|
$path =~ s:^(.*)/\.tmp\.:$1/:;
|
|
return $path;
|
|
}
|
|
|
|
sub set_link {
|
|
my ($src, $dst) = @_;
|
|
my $dst_tmp = to_tmp($dst);
|
|
unlink($dst_tmp);
|
|
symlink($src, $dst_tmp) or ldie "cannot create symlink '$dst' -> '$src'\n";
|
|
# the _order_ is important! remove existing intermediate element before re-appanding
|
|
if (exists($link_hash{$dst})) {
|
|
my @copy = @link_list;
|
|
@link_list = ();
|
|
foreach my $elem (@copy) {
|
|
next if $elem eq $dst;
|
|
push @link_list, $elem;
|
|
}
|
|
}
|
|
$link_hash{$dst} = $src;
|
|
push @link_list, $dst;
|
|
}
|
|
|
|
sub finish_links {
|
|
return unless @link_list;
|
|
my $timestamp = time();
|
|
undef $/;
|
|
my $lamport = "";
|
|
if (open(my $fh, "<", "/proc/sys/mars/lamport_clock")) {
|
|
$lamport = <$fh>;
|
|
}
|
|
if ($lamport =~ m/lamport_now=([0-9.]+)/) {
|
|
$timestamp = $1;
|
|
lprint "using lamport timestamp $timestamp\n" if $verbose;
|
|
}
|
|
while (my $link = shift @link_list) {
|
|
my $link_tmp = to_tmp($link);
|
|
system("touch -h -d \"\@$timestamp\" $link_tmp") == 0 or ldie "cannot set mtime on symlink '$link_tmp'\n";
|
|
rename($link_tmp, $link) or ldie "cannot finalize symlink '$link'\n";
|
|
if ($verbose) {
|
|
my $target = readlink($link);
|
|
lprint "created symlink '$link' -> '$target'\n";
|
|
}
|
|
}
|
|
_trigger();
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# global variables and checks
|
|
|
|
my $Id = '$Id$ ';
|
|
my $user_version = 0.1;
|
|
my $mars = "/mars";
|
|
my $host = `uname -n` or ldie "cannot determine my network node name\n";
|
|
chomp $host;
|
|
my $force = 0;
|
|
my $timeout = -1;
|
|
my $ip = _get_ip() or ldie "cannot determine my IP address\n";
|
|
|
|
my $kernel_version = 0;
|
|
unless ($ARGV[0] =~ m/cluster|cat/) {
|
|
$kernel_version = get_link("$mars/tree-$host", 1);
|
|
if ($kernel_version && $user_version != $kernel_version) {
|
|
lwarn "kernel_version=$kernel_version user_version=$user_version\n";
|
|
if ($user_version < $kernel_version) {
|
|
ldie "Sorry, your MARS kernel module uses version $kernel_version, but my $0 userspace version is only $user_version. That cannot work. Please upgrade your userspace scripts!\n";
|
|
} elsif (int($user_version) > int($kernel_version) + 1) {
|
|
ldie "MAJOR VERSION mismatch : only one major upgrade step is supported.\n";
|
|
}
|
|
lwarn "using different minor versions is possible, but you should upgrade your kernel module ASAP\n";
|
|
}
|
|
}
|
|
|
|
sub get_alive_links {
|
|
my $res = shift || "all";
|
|
my $alive = shift || "alive";
|
|
my $glob = "$mars/$alive-*";
|
|
if ($res ne "all") {
|
|
$glob = "$mars/$alive-{";
|
|
my $count = 0;
|
|
foreach my $peer (glob("$mars/resource-$res/data-*")) {
|
|
$peer =~ m:/data-(.+):;
|
|
$glob .= "," if $count++;
|
|
$glob .= $1;
|
|
}
|
|
$glob .= "}";
|
|
}
|
|
my %links;
|
|
foreach my $path (glob($glob)) {
|
|
$path =~ m:/$alive-(.+):;
|
|
my $peer = $1;
|
|
$links{$peer} = get_link($path);
|
|
}
|
|
return %links;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# timeout handling
|
|
|
|
#
|
|
# return the lamport clock time in nanosecond resolution
|
|
# fallback to system time()
|
|
#
|
|
sub mars_time {
|
|
open(my $lamport_clock, "<", "/proc/sys/mars/lamport_clock");
|
|
my $lamport_time;
|
|
while (<$lamport_clock>) {
|
|
$lamport_time = $1 if /^lamport_now=(.*)/;
|
|
}
|
|
close($lamport_clock);
|
|
return $lamport_time || time() . '0' x 9;
|
|
}
|
|
|
|
sub sleep_timeout {
|
|
my $sleeptime = shift || 5;
|
|
if ($timeout < 0) {
|
|
sleep($sleeptime);
|
|
return;
|
|
}
|
|
ldie "Timeout reached. You may retry with --timeout=-1 to ensure waiting until progress is possible.\n" if !$timeout;
|
|
my $rest = $timeout;
|
|
$rest = $sleeptime if $rest > $sleeptime;
|
|
sleep($rest);
|
|
$timeout -= $rest;
|
|
}
|
|
|
|
# wait for some condition
|
|
sub wait_cond {
|
|
my ($cmd, $res, $specific) = @_;
|
|
my $is_actual = ($specific =~ s/^(is|has)-//);
|
|
my $is_on = !($specific =~ s/-off$//);
|
|
$specific =~ s/-on$//;
|
|
if ($is_actual) {
|
|
if ($specific eq "device") {
|
|
check_mars_device($cmd, $res, 1, !$is_on);
|
|
return;
|
|
}
|
|
my %table =
|
|
(
|
|
"attach" => "is-attached",
|
|
"attached" => "is-attached",
|
|
"replay" => "is-replaying",
|
|
"replaying"=> "is-replaying",
|
|
"copy" => "is-copying",
|
|
"copying" => "is-copying",
|
|
"sync" => "is-syncing",
|
|
"syncing" => "is-syncing",
|
|
"primary" => "is-primary",
|
|
);
|
|
my $name = $table{$specific};
|
|
ldie "actual indicator '$specific' does not exist\n" unless exists($table{$specific});
|
|
check_status($cmd, $res, $name, $is_on ? 1 : 0, 1);
|
|
} else {
|
|
my %table =
|
|
(
|
|
"attach" => "attach",
|
|
"attached" => "attach",
|
|
"connect" => "connect",
|
|
"replay" => "allow-replay",
|
|
"sync" => "sync",
|
|
);
|
|
my $name = $table{$specific};
|
|
ldie "button '$specific' does not exist\n" unless exists($table{$specific});
|
|
check_todo($cmd, $res, $name, $is_on ? 1 : 0, 1);
|
|
}
|
|
}
|
|
|
|
# wait until some communication has occurred
|
|
sub wait_cluster {
|
|
return wait_cond(@_) if int(@_) >= 3;
|
|
my $cmd = shift;
|
|
my $res = shift || "all";
|
|
my $start_time = mars_time();
|
|
_trigger();
|
|
my $delta = $timeout > 0 ? $timeout : 30;
|
|
while (1) {
|
|
my $dead_count = 0;
|
|
my $alive_count = 0;
|
|
my $unknown_count = 0;
|
|
my %status = get_alive_links($res, "time");
|
|
my $now = mars_time();
|
|
foreach my $peer (keys(%status)) {
|
|
next if $peer eq $host;
|
|
if ($status{$peer} >= $start_time) {
|
|
$alive_count++;
|
|
} elsif ($status{$peer} + $delta < $now) {
|
|
$dead_count++;
|
|
} else {
|
|
$unknown_count++;
|
|
}
|
|
}
|
|
if (!$dead_count && !$unknown_count) {
|
|
lprint "all $alive_count peer(s) seem to be alive\n";
|
|
last;
|
|
}
|
|
if (!$unknown_count) {
|
|
lwarn "$alive_count peer(s) seem to be alive, and $dead_count peer(s) seem to be dead / not reachable\n";
|
|
ldie "--force not given\n" unless $force;
|
|
last;
|
|
}
|
|
sleep(1);
|
|
}
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# syntactic checks
|
|
|
|
sub check_id {
|
|
my $str = shift;
|
|
ldie "identifier '$str' has disallowed characters" unless $str =~ m/^[A-Za-z_][-A-Za-z0-9_]*$/;
|
|
ldie "identifier '$str' is too long (only 16 chars allowed)" if length($str) > 16;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# semantic checks
|
|
|
|
sub check_res {
|
|
my $res = shift;
|
|
if (not -d "$mars/resource-$res") {
|
|
# DO WHAT I MEAN: try to substitute a device name for a badly given resource name if it is unique
|
|
my $count = 0;
|
|
my $found;
|
|
my @tests = glob("$mars/resource-*/device-$host");
|
|
foreach my $test (@tests) {
|
|
my $target = get_link($test, 2);
|
|
if ($target eq $res) {
|
|
$found = $test;
|
|
$count++;
|
|
}
|
|
}
|
|
if (!$count) {
|
|
@tests = glob("$mars/resource-*/_direct-*-$host");
|
|
foreach my $test (@tests) {
|
|
my $target = get_link($test, 2);
|
|
$target =~ s/^.*,//;
|
|
if ($target eq $res) {
|
|
$found = $test;
|
|
$count++;
|
|
}
|
|
}
|
|
}
|
|
ldie "resource '$res' does not exist ($count replacements found)\n" unless $count == 1 and $found;
|
|
$found =~ s:^.*/resource-(.*)/.*$:$1:;
|
|
lwarn "substituting bad resource name '$res' by uniquely matching resource name '$found'\n";
|
|
$res = $found;
|
|
}
|
|
return $res;
|
|
}
|
|
|
|
sub check_sizes {
|
|
my ($res, $host) = @_;
|
|
my $logical_size = get_link("$mars/resource-$res/size");
|
|
my $physical_size = get_link("$mars/resource-$res/actsize-$host", 1);
|
|
if (defined($physical_size) && $physical_size < $logical_size) {
|
|
ldie "physical device on host '$host' has size $physical_size, which is smaller than the logical resource size $logical_size\n";
|
|
}
|
|
}
|
|
|
|
sub check_res_member {
|
|
my $res = shift;
|
|
ldie "sorry, I have not yet joined to resource '$res'\n" unless -e "$mars/resource-$res/data-$host";
|
|
check_sizes($res, $host);
|
|
}
|
|
|
|
sub check_sync_finished {
|
|
my ($res, $host) = @_;
|
|
check_sizes(@_);
|
|
my $lnk = "$mars/resource-$res/syncstatus-$host";
|
|
if (lstat($lnk)) {
|
|
my $syncstatus = get_link($lnk, 1);
|
|
my $size = get_link("$mars/resource-$res/size");
|
|
ldie "sync has not yet finished, only $syncstatus / $size bytes transferred\n" unless $syncstatus >= $size;
|
|
}
|
|
lprint "OK, it seems that sync has finished on $host.\n";
|
|
}
|
|
|
|
sub check_primary {
|
|
my ($cmd, $res) = @_;
|
|
my $lnk = "$mars/resource-$res/actual-$host/is-primary";
|
|
my $is_primary = get_link($lnk);
|
|
if (!$is_primary) { # give it a second chance
|
|
my $name = get_link("$mars/resource-$res/device-$host");
|
|
my $dev = "/dev/mars/$name";
|
|
$is_primary = 1 if -b $dev;
|
|
}
|
|
ldie "for operation '$cmd' I need to be primary\n" unless $is_primary;
|
|
my $primary = _get_designated_primary($res);
|
|
ldie "for operation '$cmd', I also must be the designated primary\n" unless $primary eq $host;
|
|
}
|
|
|
|
sub check_not_primary {
|
|
my ($cmd, $res) = @_;
|
|
my $lnk = "$mars/resource-$res/actual-$host/is-primary";
|
|
my $is_primary = get_link($lnk);
|
|
if ($is_primary) {
|
|
ldie "operation '$cmd' cannot be executed on primary\n";
|
|
}
|
|
# also check whether we intend to become primary
|
|
my $primary = _get_designated_primary($res);
|
|
ldie "operation '$cmd' cannot be executed on designated primary\n" if $primary eq $host;
|
|
}
|
|
|
|
sub check_primary_gone {
|
|
my ($res) = @_;
|
|
for (;;) {
|
|
my $pri = _get_actual_primary($res);
|
|
last if !$pri;
|
|
last if $pri eq $host;
|
|
lprint "waiting for other primary host ($pri) to disappear....\n";
|
|
sleep_timeout();
|
|
}
|
|
}
|
|
|
|
sub check_todo {
|
|
my ($cmd, $res, $key, $val, $wait, $unchecked, $inv) = @_;
|
|
my $path = "$mars/resource-$res/todo-$host/$key";
|
|
my $link;
|
|
for (;;) {
|
|
$link = get_link($path, $unchecked);
|
|
return unless defined($link);
|
|
if (defined($inv) && $inv) {
|
|
last if $link != $val;
|
|
ldie "cannot execute $cmd: switch '$key' must not have value '$val'\n" if !$wait;
|
|
lprint "waiting until switch '$key' leaves the value '$val'....\n";
|
|
} else {
|
|
last if $link == $val;
|
|
ldie "cannot execute $cmd: switch '$key' must have value '$val', but actually has value '$link'\n" if !$wait;
|
|
lprint "waiting until switch '$key' reaches the value '$val'....\n";
|
|
}
|
|
sleep_timeout();
|
|
}
|
|
lprint "OK, '$path' has acceptable value '$link'\n";
|
|
}
|
|
|
|
sub check_status {
|
|
my ($cmd, $res, $key, $val, $wait, $unchecked, $inv) = @_;
|
|
my $path = "$mars/resource-$res/actual-$host/$key";
|
|
my $link;
|
|
for (;;) {
|
|
$link = get_link($path, $unchecked);
|
|
return unless defined($link);
|
|
if (defined($inv) && $inv) {
|
|
last if $link != $val;
|
|
ldie "cannot execute $cmd: '$path' must not have value '$val'\n" if !$wait;
|
|
lprint "waiting until '$key' leaves the value '$val'...\n";
|
|
} else {
|
|
last if $link == $val;
|
|
ldie "cannot execute $cmd: '$path' must have value '$val'\n" if !$wait;
|
|
lprint "waiting until '$key' reaches the value '$val'...\n";
|
|
}
|
|
sleep_timeout();
|
|
}
|
|
lprint "OK, '$path' has acceptable value '$link'\n";
|
|
}
|
|
|
|
sub check_mars_device {
|
|
my ($cmd, $res, $wait, $inv) = @_;
|
|
my $name = get_link("$mars/resource-$res/device-$host");
|
|
my $dev = "/dev/mars/$name";
|
|
my $backoff = 1;
|
|
my $round = 0;
|
|
if ($inv) {
|
|
while (-b $dev) {
|
|
ldie "cannot execute $cmd: device '$dev' has not yet disappeared\n" if !$wait;
|
|
lwarn "device '$dev' has not yet disappeared\n";
|
|
sleep_timeout($backoff);
|
|
# very slowly increasing backoff
|
|
if ($backoff < 10 && $round++ > 5) {
|
|
$round = 0;
|
|
$backoff++;
|
|
}
|
|
}
|
|
lprint "device '$dev' is no longer present\n" unless -b $dev;
|
|
return;
|
|
}
|
|
# !$inv
|
|
my $primary = _get_designated_primary($res);
|
|
ldie "for operation '$cmd', I should be the designated primary\n" unless $primary eq $host;
|
|
while (! -e $dev) {
|
|
my $text = get_error_text($cmd, $res);
|
|
lprint $text if $text;
|
|
ldie "aborting due to errors\n" if $text =~ m/error/mi;
|
|
ldie "cannot execute $cmd: device '$dev' not yet present\n" if !$wait;
|
|
lprint "device '$dev' not yet present\n";
|
|
sleep_timeout($backoff);
|
|
# very slowly increasing backoff
|
|
if ($backoff < 10 && $round++ > 5) {
|
|
$round = 0;
|
|
$backoff++;
|
|
}
|
|
}
|
|
lprint "device '$dev' is present\n" if -b $dev;
|
|
}
|
|
|
|
sub check_userspace {
|
|
my ($dst) = @_;
|
|
if ($dst !~ m:/userspace/:) {
|
|
ldie "your path '$dst' must be inside $mars/userspace/ or $mars/resource-*/userspace/\n" unless $force;
|
|
lwarn "your path '$dst' is outside $mars/userspace/ or $mars/resource-*/userspace/ and you gave --force, hopefully YOU KNOW WHAT YOU ARE DOING\n";
|
|
}
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# state inspection routines
|
|
|
|
sub _get_minmax {
|
|
my ($res, $glob, $take_symlink) = @_;
|
|
my $min = -1;
|
|
my $max = -1;
|
|
my @paths = glob($glob) or ldie "cannot find '$glob'\n";
|
|
foreach my $path (@paths) {
|
|
my $nr = $path;
|
|
if ($take_symlink) {
|
|
$nr = get_link($path);
|
|
}
|
|
$nr =~ s:^.*[a-z]+-([0-9]+)(-[^/]*)?$:$1:;
|
|
$min = $nr if ($nr < $min || $min < 0);
|
|
$max = $nr if ($nr > $max || $max < 0);
|
|
}
|
|
return ($min, $max);
|
|
}
|
|
|
|
sub get_minmax_logfiles {
|
|
my ($res) = @_;
|
|
return _get_minmax($res, "$mars/resource-$res/log-*", 0);
|
|
}
|
|
|
|
sub get_minmax_versions {
|
|
my ($res) = @_;
|
|
return _get_minmax($res, "$mars/resource-$res/version-*", 0);
|
|
}
|
|
|
|
sub get_minmax_any {
|
|
my ($res) = @_;
|
|
return _get_minmax($res, "$mars/resource-$res/{log,version}-*", 0);
|
|
}
|
|
|
|
sub get_minmax_replays {
|
|
my ($res) = @_;
|
|
return _get_minmax($res, "$mars/resource-$res/replay-*", 1);
|
|
}
|
|
|
|
sub detect_splitbrain {
|
|
my ($res, $do_report) = @_;
|
|
my $ok = 1;
|
|
my %from;
|
|
foreach my $vers_path (glob("$mars/resource-$res/version-*")) {
|
|
$vers_path =~ m:/version-(0-9+)-:;
|
|
my $nr = $1;
|
|
my $vers = get_link($vers_path, 1);
|
|
$vers =~ m/,log-[0-9]+-([^,]+),/;
|
|
my $origin = $1;
|
|
if (defined($from{$nr}) && $from{$nr} ne $origin) {
|
|
$ok = 0;
|
|
lwarn "SPLIT BRAIN at logfile $nr detected: hostA = '${from{$nr}}', hostB = '$origin'\n" if $do_report;
|
|
}
|
|
$from{$nr} = $origin;
|
|
}
|
|
return $ok;
|
|
}
|
|
|
|
sub try_to_avoid_splitbrain {
|
|
# NYI
|
|
}
|
|
|
|
sub get_size {
|
|
my $arg = shift || "";
|
|
if (!($arg =~ m/^([0-9]+(?:\.[0-9]*)?)([kmgtp]?)$/i)) {
|
|
ldie "size argument '$arg' must be a number, optionally followed by suffix k or m or g or t or p\n";
|
|
}
|
|
my $mod = $2 || "";
|
|
$arg = $1;
|
|
$_ = $mod;
|
|
SWITCH: {
|
|
/k/i and $arg *= 1024, last SWITCH;
|
|
/m/i and $arg *= 1024 * 1024, last SWITCH;
|
|
/g/i and $arg *= 1024 * 1024 * 1024, last SWITCH;
|
|
/t/i and $arg *= 1024 * 1024 * 1024 * 1024, last SWITCH;
|
|
/p/i and $arg *= 1024 * 1024 * 1024 * 1024 * 1024, last SWITCH;
|
|
}
|
|
ldie "size '$arg' is not a multiple of 4k\n" if ($arg % 4096) != 0;
|
|
return $arg;
|
|
}
|
|
|
|
#
|
|
# Get actual primary node from links below actual-*/ subdirs
|
|
#
|
|
sub _get_actual_primary {
|
|
my ($res) = @_;
|
|
my @primary_links = glob("$mars/resource-$res/actual-*/is-primary");
|
|
my $primary;
|
|
foreach my $link (@primary_links) {
|
|
if (my $val = get_link($link)) {
|
|
$primary = ($link =~ qr%.*actual-([^/]+)/is-primary%)[0];
|
|
last;
|
|
# Note: if there are more than one 'is-primary' links (an insane state anyway),
|
|
# the first 'is-primary' link is selected. Other links are ignored.
|
|
}
|
|
}
|
|
return $primary;
|
|
}
|
|
|
|
sub _get_designated_primary {
|
|
my ($res) = @_;
|
|
return get_link("$mars/resource-$res/primary");
|
|
}
|
|
|
|
sub get_peers {
|
|
my ($res) = @_;
|
|
my @list = glob("$mars/resource-$res/data-*");
|
|
return map { $_ =~ s:$mars/resource-$res/data-::; $_ } @list;
|
|
}
|
|
|
|
sub __conv_tv {
|
|
my ($tv_sec, $tv_nsec) = @_;
|
|
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($tv_sec);
|
|
return sprintf("%04d-%02d-%02d %02d:%02d:%02d.%s", $year+1900, $mon + 1, $mday, $hour, $min, $sec, $tv_nsec);
|
|
}
|
|
|
|
sub _replace_timestamps {
|
|
my ($txt) = @_;
|
|
$txt =~ s:([0-9]{9,99})\.([0-9]{9}):__conv_tv($1,$2):ge;
|
|
return $txt;
|
|
}
|
|
|
|
sub _get_text {
|
|
my ($path, $regex, $do_print) = @_;
|
|
open(IN, "<", $path) or return "";
|
|
my $text = "";
|
|
my $count = 0;
|
|
while (my $line = <IN>) {
|
|
# use regex e.g. for fetching only errors and warnings
|
|
if (!$regex || $line =~ $regex) {
|
|
$line = _replace_timestamps($line);
|
|
$count++;
|
|
if ($do_print) {
|
|
print $line;
|
|
} else {
|
|
$text .= $line;
|
|
}
|
|
}
|
|
}
|
|
close(IN);
|
|
return $count if $do_print;
|
|
return $text;
|
|
}
|
|
|
|
sub get_error_text {
|
|
my ($cmd, $res) = @_;
|
|
my $text = _get_text("$mars/resource-$res/logstatus-$host.status", "m/^(err|warn)/i", 0);
|
|
return $text;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# helpers
|
|
|
|
sub _trigger {
|
|
system("(echo 2 > /proc/sys/mars/trigger) >/dev/null 2>&1");
|
|
}
|
|
|
|
sub _switch {
|
|
my ($cmd, $res, $path, $on) = @_;
|
|
my $src = $on ? "1" : "0";
|
|
|
|
my $old = get_link($path);
|
|
if ($old && $old eq $src) {
|
|
lprint "${cmd} on resource $res is already activated\n" if $cmd;
|
|
return;
|
|
}
|
|
set_link($src, $path);
|
|
lprint "successfully started ${cmd} on resource $res\n" if $cmd;
|
|
}
|
|
|
|
sub _writable {
|
|
my ($path, $on) = @_;
|
|
my $oldmode = (lstat $path)[2] & 0700;
|
|
my $newmode = $on ? $oldmode | 0200 : $oldmode & ~0200;
|
|
lprint "chmod '$path' $oldmode $newmode";
|
|
chmod($newmode, $path) == 1 or ldie "cannot chmod '$path'\n";
|
|
}
|
|
|
|
sub _get_ip {
|
|
chomp (my @info = `/sbin/ip addr show dev eth0`);
|
|
foreach (@info) {
|
|
m#\sinet\s(\d+\.\d+\.\d+\.\d+)# && return $1;
|
|
}
|
|
return undef;
|
|
}
|
|
|
|
# Which primary was active in the past when logfile number $log_nr was created?
|
|
# In general, this may be even a node which doesn't exist anymore.
|
|
# $supposed_primary should be normally empty, but may be used
|
|
# to give a hint and check for consistency of ancient knowledge.
|
|
|
|
sub _get_former_primary {
|
|
my ($basedir, $log_nr, $supposed_primary) = @_;
|
|
my $primary = $supposed_primary;
|
|
foreach my $type ("log", "version") {
|
|
my $base_path = sprintf("$basedir/$type-%09d-", $log_nr);
|
|
my $pri_path = "$base_path$supposed_primary";
|
|
my $log_path = "$base_path*";
|
|
my @names = glob($log_path);
|
|
if (!@names) {
|
|
if ($type eq "log" and !$supposed_primary) {
|
|
next;
|
|
}
|
|
ldie "Sorry, there exist no names '$log_path'\n" unless $supposed_primary;
|
|
} elsif (scalar(@names) == 1) {
|
|
my $found = $names[0];
|
|
if ($supposed_primary) {
|
|
ldie "Sorry, '$pri_path' does not exist, although '$found' would exist.\n" unless $pri_path eq $found;
|
|
} else { # assume that the found name is the right one.
|
|
$pri_path = $found;
|
|
lprint "found '$pri_path'\n";
|
|
ldie "found name is malformed\n" unless $pri_path =~ m:^$base_path(.*):;
|
|
$primary = $1;
|
|
}
|
|
} else { # multiple exist...
|
|
lprint "There are multiple names with number $log_nr.\n";
|
|
my $present = 0;
|
|
foreach my $file (@names) {
|
|
lprint " $file\n";
|
|
$present++ if $file eq $pri_path;
|
|
}
|
|
if ($type eq "log") {
|
|
lwarn "Usually, this is an indication for split-brain.\n";
|
|
lwarn "Be careful!\n";
|
|
}
|
|
ldie "Cannot select between them -- no primary preference given.\n" unless $supposed_primary;
|
|
ldie "Sorry, '$pri_path' is not among them.\n" unless $present;
|
|
lprint "=> using '$pri_path' out of them\n";
|
|
}
|
|
return $primary;
|
|
}
|
|
ldie "could not determine old primary site for logfile version number $log_nr\n";
|
|
}
|
|
|
|
sub _fake_versionlink {
|
|
my ($basedir, $log_nr, $primary) = @_;
|
|
$primary = _get_former_primary(@_) unless $primary;
|
|
my $new_version = sprintf("$basedir/version-%09d-$host", $log_nr);
|
|
my $pri_version = sprintf("$basedir/version-%09d-$primary", $log_nr);
|
|
if ($primary eq $host) {
|
|
lwarn "it makes no sense to fake my own version link '$new_version'\n";
|
|
return;
|
|
}
|
|
my $pri_link = get_link($pri_version);
|
|
if (!$pri_link) { # try any one else
|
|
lwarn "cannot read symlink '$pri_version' -- trying a neighbor link instead\n";
|
|
my $try_version = sprintf("$basedir/version-%09d-*", $log_nr);
|
|
my @test = glob($try_version);
|
|
my $test_version = shift @test;
|
|
if ($test_version) {
|
|
lwarn "trying substitute symlink '$test_version'\n";
|
|
my $test_link = get_link($test_version);
|
|
if ($test_link) {
|
|
$pri_link = $test_link;
|
|
lwarn "got value '$pri_link', hopefully this is right\n" if $pri_link;
|
|
}
|
|
}
|
|
}
|
|
if ($pri_link) {
|
|
lprint "creating new version symlink '$new_version' -> '$pri_link'\n";
|
|
set_link($pri_link, $new_version);
|
|
} else {
|
|
lwarn "cannot read symlink '$pri_version' -- cannot create faked versionlink '$pri_version'\n";
|
|
}
|
|
}
|
|
|
|
sub _set_replaylink {
|
|
my ($basedir, $log_nr) = @_;
|
|
my $primary = _get_former_primary(@_);
|
|
my $rep_path = "$basedir/replay-$host";
|
|
my $rep_val = sprintf("log-%09d-$primary,0,0", $log_nr);
|
|
lprint "creating new replaylink '$rep_path' -> '$rep_val'\n";
|
|
set_link($rep_val, $rep_path);
|
|
|
|
if ($log_nr > 1) {
|
|
my $old_primary = "";
|
|
my $vers_link = sprintf("$basedir/version-%09d-$primary", $log_nr);
|
|
my $vers_val = get_link($vers_link);
|
|
$old_primary = $1 if $vers_val =~ m/:.*,log-[0-9]+-([^,]+),/;
|
|
_fake_versionlink($basedir, $log_nr - 1, $old_primary);
|
|
} else {
|
|
my $initial;
|
|
for (;;) {
|
|
$initial = get_link("$basedir/version-000000001-$primary", 1);
|
|
last if $initial;
|
|
sleep_timeout();
|
|
}
|
|
set_link($initial, "$basedir/version-000000001-$host");
|
|
}
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# commands
|
|
|
|
sub ignore_cmd {
|
|
my ($cmd, $res) = @_;
|
|
lprint "ignoring command '$cmd' on resource '$res'\n";
|
|
}
|
|
|
|
sub senseless_cmd {
|
|
my ($cmd, $res) = @_;
|
|
lprint "command '$cmd' makes no sense with MARS (ignoring on resource '$res')\n";
|
|
}
|
|
|
|
sub forbidden_cmd {
|
|
my ($cmd, $res) = @_;
|
|
ldie "command '$cmd' on resource '$res' cannot be used with MARS (migth affect too many hosts, lead to undesired consequences)\n";
|
|
}
|
|
|
|
sub nyi_cmd {
|
|
my ($cmd, $res) = @_;
|
|
ldie "command '$cmd' on resource '$res' is not yet implemented\n";
|
|
}
|
|
|
|
sub is_module_loaded {
|
|
open TEST, "lsmod | grep mars |";
|
|
my $res = <TEST>;
|
|
close TEST;
|
|
return $res;
|
|
}
|
|
|
|
sub set_link_cmd {
|
|
my $cmd = shift;
|
|
for (;;) {
|
|
my $src = shift || last;
|
|
my $dst = shift || ldie "you did not supply a symlink destination for source '$src'\n";
|
|
ldie "symlink target '$dst' is not an absolute path\n" unless $dst =~ m:^/:;
|
|
check_userspace($dst);
|
|
my $dir = `dirname "$dst"` or ldie "path '$dst' has no dirname\n";
|
|
chomp $dir;
|
|
ldie "directory '$dir' does not exist\n" unless -d $dir;
|
|
set_link($src, $dst);
|
|
}
|
|
}
|
|
|
|
sub _create_cluster {
|
|
my ($cmd) = @_;
|
|
system("mkdir $mars") unless -d $mars;
|
|
my $old_uuid = get_link("$mars/uuid", 2);
|
|
if ($cmd eq "create-cluster") {
|
|
ldie "cluster was already created with uuid='$old_uuid'\n" if $old_uuid && !$force;
|
|
my $uuid = `echo -n \$(hostname) \$(date)`;
|
|
set_link($uuid, "$mars/uuid");
|
|
finish_links(); # opportunity for errors => don't continue
|
|
} elsif (!$old_uuid && !$force) {
|
|
if ($user_version == 0.1) {
|
|
my $uuid = `echo -n \$(hostname) \$(date)`;
|
|
set_link($uuid, "$mars/uuid");
|
|
}
|
|
ldie "cluster has no uuid\n" if $user_version > 0.1;
|
|
}
|
|
system("mkdir $mars/ips") unless -d "$mars/ips";
|
|
system("mkdir $mars/userspace") unless -d "$mars/userspace";
|
|
system("mkdir $mars/defaults") unless -d "$mars/defaults";
|
|
system("mkdir $mars/defaults-$host") unless -d "$mars/defaults-$host";
|
|
system("mkdir $mars/todo-global") unless -d "$mars/todo-global";
|
|
set_link($ip, "$mars/ips/ip-$host");
|
|
set_link("1", "$mars/todo-global/deleted-$host");
|
|
}
|
|
|
|
sub create_cluster {
|
|
my ($cmd, $peer) = @_;
|
|
ldie "cluster is already created\n" if !$force && -d "$mars/ips";
|
|
ldie "mars module is loaded, please unload first\n" if is_module_loaded();
|
|
_create_cluster(@_);
|
|
}
|
|
|
|
sub join_cluster {
|
|
my ($cmd, $peer) = @_;
|
|
if (glob("$mars/resource-*") or glob("$mars/ips/*")) {
|
|
ldie "Sorry, some resources already exist!\nThis is dangerous!\nIf you are sure that no resource clash is possible, re-invoke this command with '--force' option\n" unless $force;
|
|
}
|
|
ldie "mars module is loaded, please unload first\n" if is_module_loaded();
|
|
lprint "joining cluster via rsync (peer='$peer')\n";
|
|
# check connection
|
|
system("ssh $peer uname -a") == 0 or ldie "oops, no connection to $peer ...\n";
|
|
mkdir($mars) unless -d $mars;
|
|
system("rsync --recursive --links --max-size=1 -v $peer:$mars/ $mars/") == 0 or ldie "cannot get remote symlink tree via rsync\n";
|
|
_create_cluster(@_);
|
|
finish_links();
|
|
system("rsync --recursive --links -v $mars/ips/ $peer:$mars/ips/") == 0 or ldie "oops\n";
|
|
}
|
|
|
|
sub leave_cluster {
|
|
my ($cmd) = @_;
|
|
my $check = "/mars/resource-*/*-$host";
|
|
ldie "I am member of some resources\n" if glob($check) && !$force;
|
|
_create_delete("$mars/ips/$host");
|
|
}
|
|
|
|
sub create_res {
|
|
my ($cmd, $res, $dev, $appear, $size_arg) = @_;
|
|
my $create = ($cmd eq "create-resource");
|
|
|
|
ldie "undefined device or size argument\n" unless $dev;
|
|
$appear = $res if !$appear;
|
|
check_id($appear) if $appear;
|
|
|
|
my $resdir = "$mars/resource-$res";
|
|
if ($create) {
|
|
ldie "resource directory '$res' already exists; you may override this via --force\n" if (!$force && -d $resdir);
|
|
ldie "resource directory '$res' has some contents -- remove by hand if you are sure that you really know what you are doing, or give --force.\n" if (!$force && glob("$resdir/*.status"));
|
|
lprint "creating new resource '$res'\n";
|
|
} else {
|
|
if ( -e "$resdir/data-$host") {
|
|
lwarn "resource '$res' has been already joined -- this is dangerous!\n";
|
|
ldie "refusing dangerous operation\n" unless $force;
|
|
} else {
|
|
lprint "joining to existing resource '$res'\n";
|
|
}
|
|
}
|
|
|
|
my $size = 0;
|
|
if (-b $dev) {
|
|
ldie "block device '$dev' must be an absolute path starting with '/'\n" unless $dev =~ m/^\//;
|
|
use Fcntl 'SEEK_END', 'O_RDONLY', 'O_RDWR', 'O_EXCL';
|
|
my $flags = O_RDWR | O_EXCL;
|
|
if ($force) {
|
|
$flags = O_RDONLY;
|
|
}
|
|
sysopen(TEST, $dev, $flags) or ldie "cannot open device '$dev' for exclusive rw access\n";
|
|
$size = sysseek(TEST, 0, SEEK_END);
|
|
close(TEST);
|
|
lprint "block device '$dev': determined size = $size bytes\n";
|
|
if ($size_arg) {
|
|
my $new_size = get_size($size_arg);
|
|
ldie "size argument '$size_arg' is smaller than device size '$size'\n" unless $new_size <= $size;
|
|
lprint "reducing size from $size to $new_size\n";
|
|
$size = $new_size;
|
|
}
|
|
} else {
|
|
$size = get_size($dev);
|
|
if ($size > 0) {
|
|
$dev = "";
|
|
} else {
|
|
ldie "bad parameter '$dev': must be either a block device name, or size followed by k or m or g or t\n";
|
|
}
|
|
}
|
|
|
|
ldie "implausible size $size" unless $size > 4096 * 16; # smaller floppies should not exist ;)
|
|
|
|
my $primary;
|
|
my $replay_nr = -1;
|
|
if ($create) {
|
|
mkdir($resdir);
|
|
ldie "could not create resource '$res'\n" unless -d $resdir;
|
|
set_link($size, "$resdir/size");
|
|
} else { # join
|
|
ldie "resource '$res' does not exist\n" unless -d $resdir;
|
|
my $res_size = get_link("$mars/resource-$res/size");
|
|
if ($size < $res_size) {
|
|
lwarn "size of new device is only $size, but should be $res_size\n";
|
|
ldie "refusing to join due to bad size\n" unless $force;
|
|
} elsif ($size > $res_size) {
|
|
lprint "Your physical device has size $size, which is larger than the logical resource size $res_size.\n";
|
|
lprint "This does no harm, but you are wasting some space.\n";
|
|
}
|
|
$primary = _get_designated_primary($res);
|
|
ldie "implausible state: I ($host) am already designated primary of resource '$res' which I just wanted to join\n" if $primary eq $host;
|
|
if ($primary eq "(none)") {
|
|
my @list = glob("$resdir/replay-*") or ldie "cannot find any candidate for primary\n";
|
|
my $first = pop @list or ldie "bad glob list\n";
|
|
$primary = get_link($first);
|
|
$primary =~ s/^log-[0-9]+-(.*),.*,.*$/$1/;
|
|
lprint "using '$primary' as primary\n";
|
|
}
|
|
ldie "my ip '$ip' is not registered -- please run 'join-cluster' first\n" unless -l "$mars/ips/ip-$host";
|
|
my $replay = get_link("$resdir/replay-$primary");
|
|
if ($replay =~ m/^log-([0-9]+)-/) {
|
|
$replay_nr = $1;
|
|
} else {
|
|
ldie "cannot determine current logfile number.\n";
|
|
}
|
|
}
|
|
|
|
# check for uniqeness of $appear
|
|
if ($appear) {
|
|
foreach my $old_dev (glob("$mars/resource-*/device-$host")) {
|
|
$old_dev =~ m:/resource-([^/]+)/:;
|
|
next unless defined($1);
|
|
my $old_res = $1;
|
|
next if $old_res eq $res;
|
|
my $old_name = get_link($old_dev);
|
|
if ($old_name eq $appear) {
|
|
if ( -e "$mars/resource-$old_res/data-$host") {
|
|
ldie "device '/dev/mars/$old_name' is already present in joined resource '$old_res'\n";
|
|
} else {
|
|
lwarn "device '/dev/mars/$old_name' is already present in another unjoined resource '$old_res' -- this does no harm, but may be confusing.\n";
|
|
}
|
|
}
|
|
}
|
|
# warn if devices are named differently throughout the cluster
|
|
foreach my $old_dev (glob("$resdir/device-*")) {
|
|
my $old_name = get_link($old_dev);
|
|
if ($old_name ne $appear) {
|
|
$old_dev =~ m:/device-(.+)$:;
|
|
my $old_host = $1;
|
|
lwarn "your name '/dev/mars/$appear' differs from '/dev/mars/$old_name' on host '$old_host'.";
|
|
lwarn "this does no harm, but may be confusing.";
|
|
}
|
|
}
|
|
}
|
|
|
|
my $file = "$resdir/data-$host";
|
|
if (!$dev) {
|
|
lwarn "file '$file' already exists - reusing\n" if -e $file;
|
|
lprint "setup sparse file '$file' with size $size\n";
|
|
open(OUT, ">>", $file) or ldie "could not open '$file'\n";
|
|
truncate(OUT, $size) or ldie "truncate to size $size failed\n";
|
|
close OUT;
|
|
} else {
|
|
lprint "using existing device '$dev'\n";
|
|
set_link($dev, $file);
|
|
}
|
|
|
|
if ($appear) {
|
|
lprint "resource '$res' will appear as local device '/dev/mars/$appear'\n";
|
|
set_link($appear, "$resdir/device-$host");
|
|
}
|
|
|
|
mkdir("$resdir/userspace") unless -d "$resdir/userspace";
|
|
mkdir("$resdir/defaults") unless -d "$resdir/defaults";
|
|
mkdir("$resdir/defaults-$host");
|
|
mkdir("$resdir/local-$host");
|
|
mkdir("$resdir/actual-$host");
|
|
my $todo = "$resdir/todo-$host";
|
|
mkdir($todo);
|
|
set_link("1", "$todo/attach");
|
|
set_link("1", "$todo/connect");
|
|
set_link("1", "$todo/sync");
|
|
set_link("1", "$todo/allow-replay");
|
|
unlink("$resdir/syncstatus-$host");
|
|
|
|
if ($create) {
|
|
set_link($host, "$resdir/primary");
|
|
set_link($size, "$resdir/syncstatus-$host");
|
|
set_link("log-000000001-$host,0,0", "$resdir/replay-$host");
|
|
system("touch $resdir/log-000000001-$host");
|
|
set_link("00000000000000000000000000000000,log-000000001-$host,0:", "$resdir/version-000000001-$host");
|
|
finish_links();
|
|
lprint "successfully created resource '$res'\n";
|
|
} else {
|
|
_set_replaylink($resdir, $replay_nr, $primary);
|
|
set_link("0", "$resdir/syncstatus-$host");
|
|
finish_links();
|
|
lprint "successfully joined resource '$res'\n";
|
|
}
|
|
}
|
|
|
|
sub leave_res_phase0 {
|
|
my ($cmd, $res) = @_;
|
|
check_not_primary(@_);
|
|
foreach my $tmp (glob("$mars/resource-$res/todo-$host/*")) {
|
|
my $status = get_link($tmp, 2);
|
|
ldie "switch '$tmp' is not off\n" if $status;
|
|
}
|
|
check_status($cmd, $res, "is-attached", 0, 0, 1);
|
|
if (!$force) {
|
|
foreach my $tmp (glob("$mars/resource-$res/actual-$host/{is-,logfile-}*")) {
|
|
my $status = get_link($tmp);
|
|
ldie "running status '$tmp' is not off\n" if $status;
|
|
}
|
|
}
|
|
}
|
|
|
|
sub leave_res_phase1 {
|
|
my ($cmd, $res) = @_;
|
|
_create_delete("$mars/resource-$res/replay-$host");
|
|
_create_delete("$mars/resource-$res/data-$host");
|
|
_create_delete("$mars/resource-$res/syncstatus-$host");
|
|
_create_delete("$mars/resource-$res/device-$host");
|
|
_create_delete("$mars/resource-$res/actsize-$host");
|
|
foreach my $dir (glob("$mars/resource-$res/*-$host/")) {
|
|
foreach my $tmp (glob("${dir}*")) {
|
|
_create_delete($tmp);
|
|
}
|
|
_create_delete($dir);
|
|
}
|
|
cleanup_res($cmd, $res, 0);
|
|
finish_links();
|
|
}
|
|
|
|
# wait for deletions (avoid races with following commands)
|
|
sub leave_res_phase2 {
|
|
my ($cmd, $res) = @_;
|
|
_wait_delete();
|
|
}
|
|
|
|
sub logrotate_res {
|
|
my ($cmd, $res) = @_;
|
|
check_primary(@_);
|
|
my @paths = glob("$mars/resource-$res/log-*-$host") or ldie "cannot find any logfiles\n";
|
|
@paths = sort(@paths);
|
|
my $last = pop(@paths);
|
|
if (-z $last) {
|
|
lprint "an empty logfile '$last' already exists, nothing to do.\n";
|
|
return;
|
|
}
|
|
my $nr = $last;
|
|
$nr =~ s/^.*log-([0-9]+)-.+$/$1/;
|
|
my $next = sprintf("$mars/resource-$res/log-%09d-$host", $nr + 1);
|
|
ldie "logfile '$next' already exists\n" if -e $next;
|
|
system("touch $next");
|
|
}
|
|
|
|
sub _get_deletable_logfiles {
|
|
my ($cmd, $res) = @_;
|
|
my $min = -1;
|
|
my $max = -1;
|
|
my @log_paths = glob("$mars/resource-$res/log-*") or ldie "cannot find any logfiles\n";
|
|
foreach my $path (@log_paths) {
|
|
$path =~ m/\/log-([0-9]+)-/;
|
|
my $nr = $1;
|
|
$min = $nr if ($nr < $min || $min < 0);
|
|
$max = $nr if ($nr > $max || $max < 0);
|
|
}
|
|
my @paths = glob("$mars/resource-$res/replay-*") or ldie "cannot find any replay symlinks\n";
|
|
foreach my $path (@paths) {
|
|
my $target = get_link($path);
|
|
$target =~ m/^log-([0-9]+)/;
|
|
my $nr = $1;
|
|
$max = $nr if ($nr < $max || $max < 0);
|
|
}
|
|
lprint "min deletable logfile number: $min\n";
|
|
lprint "min non-deletable logfile number: $max\n";
|
|
return ($min, $max);
|
|
}
|
|
|
|
my $delete_nr = -1;
|
|
|
|
sub _create_delete {
|
|
my ($target) = @_;
|
|
ldie "cannot delete: '$target' is no absolute path\n" unless $target =~ m:^/:;
|
|
if ($delete_nr < 0) { # compute only upon first call
|
|
my @paths = glob("$mars/todo-global/delete-*");
|
|
foreach my $path (@paths) {
|
|
$path =~ m/-([0-9]+)/;
|
|
if (defined($1) && $1 > $delete_nr) {
|
|
$delete_nr = $1;
|
|
}
|
|
}
|
|
my @paths2 = glob("$mars/todo-global/deleted-*");
|
|
foreach my $path (@paths2) {
|
|
my $link = get_link($path, 1);
|
|
$link =~ m/([0-9]+)/;
|
|
if (defined($1) && $1 > $delete_nr) {
|
|
$delete_nr = $1;
|
|
}
|
|
}
|
|
}
|
|
my $new = sprintf("$mars/todo-global/delete-%09d-$host", ++$delete_nr);
|
|
lprint "create symlink $new -> $target\n";
|
|
set_link($target, $new);
|
|
}
|
|
|
|
sub _wait_delete {
|
|
for (;;) {
|
|
my $deleted = get_link("$mars/todo-global/deleted-$host");
|
|
last if $deleted >= $delete_nr;
|
|
lprint "waiting for deletions to apply locally....\n";
|
|
sleep_timeout();
|
|
}
|
|
}
|
|
|
|
sub delete_file_cmd {
|
|
my $cmd = shift;
|
|
foreach my $path (@_) {
|
|
check_userspace($path);
|
|
_create_delete($path);
|
|
}
|
|
}
|
|
|
|
sub logdelete_res {
|
|
my ($cmd, $res) = @_;
|
|
my @paths = glob("$mars/resource-$res/log-*") or ldie "cannot find any logfiles\n";
|
|
@paths = sort(@paths);
|
|
my ($min_deletable, $max_deletable) = _get_deletable_logfiles(@_);
|
|
if ($min_deletable >= $max_deletable) {
|
|
lprint "no logfiles are deletable.\n";
|
|
return;
|
|
}
|
|
if ($cmd ne "log-delete-all") {
|
|
$max_deletable = $min_deletable + 1; # delete only the first one
|
|
}
|
|
|
|
my $nr = 0;
|
|
for (;;) {
|
|
my $first = shift(@paths);
|
|
last unless $first;
|
|
$nr = $first;
|
|
$nr =~ s/^.*log-([0-9]+)-.+$/$1/;
|
|
next unless $nr < $max_deletable;
|
|
|
|
lprint "chosen '$first' for deletion\n";
|
|
|
|
_create_delete($first);
|
|
}
|
|
lprint "removing left-over version symlinks...\n";
|
|
foreach my $versionlink (glob("$mars/resource-$res/version-*")) {
|
|
my $nrv = $versionlink;
|
|
$nrv =~ s/^.*\/version-([0-9]+)-.+$/$1/;
|
|
# we need at least one more version link than logfiles for consistency checks
|
|
next unless $nrv < $max_deletable - 1;
|
|
_create_delete($versionlink);
|
|
}
|
|
}
|
|
|
|
sub attach_res_phase0 {
|
|
my ($cmd, $res) = @_;
|
|
return if $force;
|
|
my $detach = ($cmd eq "detach");
|
|
if ($detach) {
|
|
my $device_in_use = get_link("$mars/resource-$res/actual-$host/open-count", 1);
|
|
if ($device_in_use) {
|
|
my $name = get_link("$mars/resource-$res/device-$host");
|
|
my $dev = "/dev/mars/$name";
|
|
ldie "device '$dev' is in use\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
sub attach_res_phase1 {
|
|
my ($cmd, $res) = @_;
|
|
my $detach = ($cmd eq "detach");
|
|
my $path = "$mars/resource-$res/todo-$host/attach";
|
|
_switch($cmd, $res, $path, !$detach);
|
|
}
|
|
|
|
sub attach_res_phase2 {
|
|
my ($cmd, $res) = @_;
|
|
my $detach = ($cmd eq "detach");
|
|
check_status($cmd, $res, "is-attached", $detach ? 0 : 1, 1);
|
|
if ($detach) {
|
|
check_mars_device($cmd, $res, 1, 1);
|
|
}
|
|
}
|
|
|
|
sub connect_global_res {
|
|
my ($cmd, $res) = @_;
|
|
my $disconnect = ($cmd =~ m/disconnect/);
|
|
my @paths = glob("$mars/resource-$res/todo-*/");
|
|
for my $path (@paths) {
|
|
_switch($cmd, $res, "$path/connect", !$disconnect);
|
|
}
|
|
}
|
|
|
|
sub connect_local_res {
|
|
my ($cmd, $res) = @_;
|
|
my $disconnect = ($cmd =~ m/disconnect/);
|
|
my $path = "$mars/resource-$res/todo-$host/connect";
|
|
_switch($cmd, $res, $path, !$disconnect);
|
|
}
|
|
|
|
sub pause_sync_global_res {
|
|
my ($cmd, $res) = @_;
|
|
my $pause = ($cmd =~ m/pause/);
|
|
my @paths = glob("$mars/resource-$res/todo-*/");
|
|
for my $path (@paths) {
|
|
_switch($cmd, $res, "$path/sync", !$pause);
|
|
}
|
|
}
|
|
|
|
sub pause_sync_local_res {
|
|
my ($cmd, $res) = @_;
|
|
my $pause = ($cmd =~ m/pause/);
|
|
my $path = "$mars/resource-$res/todo-$host/sync";
|
|
_switch($cmd, $res, $path, !$pause);
|
|
}
|
|
|
|
sub pause_replay_global_res {
|
|
my ($cmd, $res) = @_;
|
|
my $pause = ($cmd =~ m/pause/);
|
|
my @paths = glob("$mars/resource-$res/todo-*/");
|
|
for my $path (@paths) {
|
|
_switch($cmd, $res, "$path/allow-replay", !$pause);
|
|
}
|
|
}
|
|
|
|
sub pause_replay_local_res {
|
|
my ($cmd, $res) = @_;
|
|
my $pause = ($cmd =~ m/pause/);
|
|
my $path = "$mars/resource-$res/todo-$host/allow-replay";
|
|
_switch($cmd, $res, $path, !$pause);
|
|
}
|
|
|
|
sub up_res_phase0 {
|
|
my ($cmd, $res) = @_;
|
|
my $down = ($cmd eq "down");
|
|
if ($down) {
|
|
attach_res_phase0("detach", $res);
|
|
} else {
|
|
attach_res_phase0("attach", $res);
|
|
}
|
|
}
|
|
|
|
sub up_res_phase1 {
|
|
my ($cmd, $res) = @_;
|
|
my $down = ($cmd eq "down");
|
|
if ($down) {
|
|
pause_replay_local_res("pause-replay-local", $res);
|
|
pause_sync_local_res("pause-sync-local", $res);
|
|
connect_local_res("disconnect", $res);
|
|
attach_res_phase1("detach", $res);
|
|
} else {
|
|
attach_res_phase1("attach", $res);
|
|
connect_local_res("connect", $res);
|
|
pause_sync_local_res("resume-sync-local", $res);
|
|
pause_replay_local_res("resume-replay-local", $res);
|
|
}
|
|
}
|
|
|
|
sub up_res_phase2 {
|
|
my ($cmd, $res) = @_;
|
|
my $down = ($cmd eq "down");
|
|
if ($down) {
|
|
attach_res_phase2("detach", $res);
|
|
} else {
|
|
attach_res_phase2("attach", $res);
|
|
}
|
|
}
|
|
|
|
sub set_replay_res {
|
|
my ($cmd, $res, $new_nr) = @_;
|
|
if (!$new_nr || $new_nr <= 0) {
|
|
ldie "you must supply a numeric logfile number as third argument.\n";
|
|
}
|
|
check_not_primary(@_);
|
|
check_todo($cmd, $res, "allow-replay", 0, 0);
|
|
my $replaylink = "$mars/resource-$res/replay-$host";
|
|
my $old_val = get_link($replaylink);
|
|
my $old_nr = $old_val;
|
|
$old_nr =~ s/log-([0-9]+)-.*/$1/;
|
|
ldie "old log number '$old_nr' is wrong\n" unless $old_nr > 0;
|
|
if ($new_nr > $old_nr) {
|
|
lwarn "you try to skip logfile numbers from $old_nr to $new_nr, are you sure?\n";
|
|
ldie "you would need --force if you really know what you are doing.\n" unless $force;
|
|
}
|
|
_set_replaylink("$mars/resource-$res", $new_nr, "");
|
|
set_link("$new_nr", "$mars/resource-$res/skip-check-$host");
|
|
}
|
|
|
|
sub fake_local_res {
|
|
my ($cmd, $res) = @_;
|
|
my $path = "$mars/resource-$res/todo-$host/sync";
|
|
_switch($cmd, $res, $path, 0);
|
|
#check_status($res, "copy-syncstatus-$host", 0);
|
|
my $size = get_link("$mars/resource-$res/size");
|
|
my $target = "$mars/resource-$res/syncstatus-$host";
|
|
set_link($size, $target);
|
|
}
|
|
|
|
sub _primary_res {
|
|
my ($res, $new, $old) = @_;
|
|
my $pri = "$mars/resource-$res/primary";
|
|
set_link($new, $pri);
|
|
lprint "designated primary changed from '$old' to '$new'\n";
|
|
}
|
|
|
|
# check whether primary/secondary switching is possible at all
|
|
sub primary_phase0 {
|
|
my ($cmd, $res) = @_;
|
|
ldie "cannot switch primary: mars kernel module is not loaded\n" unless ($cmd eq "secondary" || -d "/proc/sys/mars");
|
|
if ($force) {
|
|
lwarn "You can do a '$cmd --force' only in DISCONNECTED state.\n";
|
|
check_todo($cmd, $res, "connect", 0, 0);
|
|
}
|
|
if ($cmd eq "primary") {
|
|
check_sync_finished($res, $host);
|
|
check_todo($cmd, $res, "attach", 1, 0);
|
|
check_todo($cmd, $res, "connect", 1, 0) if !$force;
|
|
check_todo($cmd, $res, "allow-replay", 1, 0);
|
|
}
|
|
my $old = _get_designated_primary($res);
|
|
return if ($old eq $host and $cmd eq "primary");
|
|
return if $old eq "(none)";
|
|
my $open_count_path = "$mars/resource-$res/actual-$old/open-count";
|
|
my $device_in_use = get_link($open_count_path, 1);
|
|
if ($device_in_use) {
|
|
my $name = get_link("$mars/resource-$res/device-$old", 1) || "unknown";
|
|
lwarn "device '/dev/mars/$name' for resource '$res' is $device_in_use times in use on primary host '$old'\n";
|
|
ldie "first you must umount/close the device (on host '$old')\n" unless $force;
|
|
lwarn "First you SHOULD umount/close the device (on host '$old'), but you ignore this recommendation by giving the --force option.\n";
|
|
if (is_link_recent($open_count_path)) {
|
|
lwarn "You are forcing a SPLIT BRAIN via --force right now. Do you know that this is an ERRONEOUS state? Do you really know what you are doing?\n";
|
|
} else {
|
|
lwarn "You may produce a SPLIT BRAIN via --force because the peer host '$old' is currently not reachable.\n";
|
|
}
|
|
}
|
|
lprint "all preconditions OK for resource '$res'\n";
|
|
}
|
|
|
|
# when necessary, switch to secondary (intermediately)
|
|
sub primary_phase1 {
|
|
my ($cmd, $res) = @_;
|
|
return if ($force and $cmd eq "primary");
|
|
my $old = _get_designated_primary($res);
|
|
return if ($old eq $host and $cmd eq "primary");
|
|
return if ($old ne $host and $cmd eq "secondary");
|
|
my $new = "(none)";
|
|
return if $old eq $new;
|
|
_primary_res($res, $new, $old);
|
|
}
|
|
|
|
# when necessary, wait
|
|
sub primary_phase2 {
|
|
my ($cmd, $res) = @_;
|
|
return if $force;
|
|
return unless $cmd eq "primary";
|
|
check_primary_gone($res);
|
|
try_to_avoid_splitbrain(@_);
|
|
}
|
|
|
|
# when necessary, switch to primary
|
|
sub primary_phase3 {
|
|
my ($cmd, $res) = @_;
|
|
return unless $cmd eq "primary";
|
|
my $old = _get_designated_primary($res);
|
|
my $new = $host;
|
|
return if $old eq $new;
|
|
_primary_res($res, $new, $old);
|
|
}
|
|
|
|
# wait for device to appear / disappear
|
|
sub primary_phase4 {
|
|
my ($cmd, $res) = @_;
|
|
return if $force;
|
|
if($cmd eq "secondary") {
|
|
check_mars_device($cmd, $res, 1, 1);
|
|
return;
|
|
}
|
|
check_mars_device($cmd, $res, 1, 0);
|
|
}
|
|
|
|
sub wait_umount_res {
|
|
my ($cmd, $res) = @_;
|
|
while (1) {
|
|
my $sum = 0;
|
|
foreach my $path (glob("$mars/resource-$res/actual-*/open-count")) {
|
|
$sum += get_link($path);
|
|
}
|
|
last if !$sum;
|
|
lprint "device for resource '$res' is $sum times in use somewhere\n";
|
|
sleep_timeout(3);
|
|
}
|
|
lprint "OK, device for resource '$res' is not in use.\n";
|
|
}
|
|
|
|
sub cleanup_res {
|
|
my ($cmd, $res, $replay_nr) = @_;
|
|
my @logfiles = glob("$mars/resource-$res/log-*-$host");
|
|
my @versions = glob("$mars/resource-$res/version-*-$host");
|
|
foreach my $path (@logfiles) {
|
|
$path =~ m:/log-([0-9]+)-: or next;
|
|
my $nr = $1;
|
|
next if $nr >= $replay_nr;
|
|
_create_delete($path);
|
|
}
|
|
foreach my $path (@logfiles, @versions) {
|
|
$path =~ m:/version-([0-9]+)-: or next;
|
|
my $nr = $1;
|
|
next if $nr >= $replay_nr - 1;
|
|
_create_delete($path);
|
|
}
|
|
}
|
|
|
|
sub invalidate_res_phase0 {
|
|
my ($cmd, $res) = @_;
|
|
check_not_primary(@_);
|
|
my $primary = _get_designated_primary($res);
|
|
ldie "for operation '$cmd', some other designated primary must exist (currently there is none)\n" if $primary eq "(none)";
|
|
}
|
|
|
|
sub invalidate_res_phase1 {
|
|
my ($cmd, $res) = @_;
|
|
_switch($cmd, $res, "$mars/resource-$res/todo-$host/sync", 0);
|
|
_switch($cmd, $res, "$mars/resource-$res/todo-$host/allow-replay", 0);
|
|
}
|
|
|
|
sub invalidate_res_phase2 {
|
|
my ($cmd, $res) = @_;
|
|
if (!$force) {
|
|
check_status($cmd, $res, "is-syncing", 0, 1);
|
|
check_status($cmd, $res, "is-replaying", 0, 1);
|
|
}
|
|
}
|
|
|
|
sub invalidate_res_phase3 {
|
|
my ($cmd, $res) = @_;
|
|
my $dst = "$mars/resource-$res/syncstatus-$host";
|
|
my $primary = _get_designated_primary($res);
|
|
my $replay = get_link("$mars/resource-$res/replay-$primary");
|
|
$replay =~ m/^log-([0-9]+)-/ or ldie "replay link '$replay' is not parsable\n";
|
|
my $replay_nr = $1;
|
|
set_link("0", $dst);
|
|
finish_links(); # opportunity for errors => don't continue
|
|
set_link($replay, "$mars/resource-$res/replay-$host");
|
|
_switch($cmd, $res, "$mars/resource-$res/todo-$host/allow-replay", 1);
|
|
_switch($cmd, $res, "$mars/resource-$res/todo-$host/sync", 1);
|
|
}
|
|
|
|
sub resize_res {
|
|
my ($cmd, $res, $size_arg) = @_;
|
|
my $new_size = 0;
|
|
if ($size_arg) {
|
|
$new_size = get_size($size_arg);
|
|
ldie "optional size argument '$new_size' must be numeric and positive\n" unless scalar($new_size) > 0;
|
|
lprint "new size: $new_size bytes\n";
|
|
}
|
|
check_primary(@_);
|
|
my @actsizes = glob("$mars/resource-$res/actsize-*");
|
|
ldie "resource $res has no actsize-* symlinks\n" unless @actsizes;
|
|
my $lnk = "$mars/resource-$res/size";
|
|
my $old_size = get_link($lnk);
|
|
my $min_size = 0;
|
|
foreach my $actsize (@actsizes) {
|
|
my $this_size = get_link($actsize);
|
|
if (!$min_size || $this_size < $min_size) {
|
|
$min_size = $this_size;
|
|
}
|
|
}
|
|
lprint "old_size=$old_size\n";
|
|
lprint "min_size=$min_size\n";
|
|
$new_size = $min_size if !$new_size;
|
|
lprint "new_size=$new_size\n";
|
|
ldie "new size $new_size is higher than the minimum size of all volumes $min_size" if $new_size > $min_size; # no override with --force possible
|
|
# for now, disallow decreasing until some bugs are fixed
|
|
ldie "only increases of the size are possible!\n" if $new_size < $old_size;
|
|
ldie "only increases of the size are possible without --force\n" if $new_size <= $old_size && !$force;
|
|
foreach my $switch (glob("$mars/resource-$res/todo-*/sync")) {
|
|
my $this_switch = get_link($switch);
|
|
ldie "sync on '$switch' is switched on -- use marsadm pause-sync to stop\n" unless !$this_switch;
|
|
}
|
|
my @syncsizes = glob("$mars/resource-$res/syncstatus-$host");
|
|
foreach my $syncsize (@syncsizes) {
|
|
my $this_size = get_link($syncsize);
|
|
ldie "sync on $syncsize has not yet finished: $this_size != $old_size (DANGEROUS FIX: if you know what you are doing, marsadm fake-sync can 'fix' it -- but this may need a full-sync afterwards)\n" unless $this_size == $old_size;
|
|
}
|
|
foreach my $syncsize (@syncsizes) {
|
|
my $this_size = get_link($syncsize);
|
|
set_link($new_size, $syncsize);
|
|
}
|
|
set_link($new_size, $lnk);
|
|
}
|
|
|
|
sub role_cmd {
|
|
my ($cmd, $res) = @_;
|
|
my $primary = _get_actual_primary($res) || '(none)';
|
|
my $todo_primary = _get_designated_primary($res);
|
|
my $msg = "I am actually ";
|
|
$msg .= ($primary eq $host) ? "primary" : "secondary";
|
|
if ($primary eq $todo_primary) {
|
|
$msg .= " and $primary is primary" if ($primary ne $host);
|
|
}
|
|
elsif ($primary ne $todo_primary) {
|
|
$todo_primary = "I" if ($todo_primary eq $host);
|
|
$msg .= " and $todo_primary should be primary";
|
|
}
|
|
lprint $msg . "\n";
|
|
}
|
|
|
|
sub mars_state_cmd {
|
|
my ($cmd, $res) = @_;
|
|
my $primary = _get_actual_primary($res) || '(none)';
|
|
my $todo_primary = _get_designated_primary($res);
|
|
|
|
if ($primary eq $host) {
|
|
lprint "is_primary\n";
|
|
return;
|
|
}
|
|
elsif ($todo_primary eq $host) {
|
|
lprint "becoming_primary\n";
|
|
return;
|
|
}
|
|
|
|
# secondary without ambitions to become primary
|
|
|
|
my $size = get_link("$mars/resource-$res/size");
|
|
my $syncstatus = get_link("$mars/resource-$res/syncstatus-$host");
|
|
if ($syncstatus != $size) {
|
|
lprint "secondary inconsistent ($syncstatus bytes of $size)\n";
|
|
return;
|
|
}
|
|
|
|
if ($primary eq "(none)") {
|
|
my $min = 0;
|
|
foreach my $path (glob("$mars/resource-$res/log-*")) {
|
|
my $nr = $path;
|
|
$nr =~ s:^.*[a-z]+-([0-9]+)(-[^/]*)?$:$1:;
|
|
if ($nr > $min) {
|
|
$primary = $path;
|
|
$primary =~ s:^.*/[a-z]+-[0-9]+-([^/]*)$:$1:;
|
|
$min = $nr;
|
|
}
|
|
}
|
|
}
|
|
my $primary_replay = get_link("$mars/resource-$res/replay-$primary");
|
|
my $host_replay = get_link("$mars/resource-$res/replay-$host");
|
|
if ($primary_replay eq $host_replay) {
|
|
lprint "secondary uptodate\n";
|
|
return;
|
|
}
|
|
lprint "secondary outdated ($host_replay instead of $primary_replay)\n";
|
|
}
|
|
|
|
sub cat_cmd {
|
|
my $cmd = shift;
|
|
foreach my $path (@_) {
|
|
_get_text($path, undef, 1);
|
|
}
|
|
}
|
|
|
|
sub mars_info_cmd {
|
|
my ($cmd, $res) = @_;
|
|
my $info = "$mars/resource-$res/logstatus-$host.status";
|
|
cat_cmd($cmd, $info);
|
|
}
|
|
|
|
sub show_cmd {
|
|
my ($cmd, $res) = @_;
|
|
$res = "*" if !$res || $res eq "all";
|
|
my $glob = "$mars/{ips/ip-$host,alive-$host,emergency-$host,rest-space-$host,resource-$res/{device,primary,size,actsize-$host,syncstatus-$host,replay-$host,actual-$host/*,todo-$host/*}}";
|
|
foreach my $link (glob($glob)) {
|
|
next unless -l $link;
|
|
my $res = get_link($link);
|
|
my $short = $link;
|
|
$short =~ s:^$mars/::;
|
|
lprint "$short=$res\n";
|
|
}
|
|
}
|
|
|
|
sub show_errors_cmd {
|
|
my ($cmd, $res) = @_;
|
|
my $text = get_error_text(@_);
|
|
if ($text) {
|
|
lprint $text;
|
|
ldie "resource $res has some (old) problems.\n";
|
|
} else {
|
|
lprint "no errors/warnings are reported.\n";
|
|
}
|
|
}
|
|
|
|
sub helplist {
|
|
my $temp;
|
|
$temp = shift;
|
|
lprint "ERROR: $temp" if ($temp);
|
|
lprint "
|
|
marsadm [<global_option>] <command> [<resource>] [<option>]
|
|
|
|
<global_option> =
|
|
--force : skip safety checks
|
|
use only when you know what you are doing!
|
|
--timeout=<seconds> : leave safety checks after timeout with an error
|
|
instead of waiting forever
|
|
|
|
<command> =
|
|
cluster : create-cluster join-cluster leave-cluster
|
|
resource : create-resource join-resource leave-resource
|
|
primary secondary invalidate
|
|
up down
|
|
replication: connect disconnect connect-local disconnect-local
|
|
replay : pause-replay resume-replay pause-replay-local resume-replay-local
|
|
sync : pause-sync resume-sync pause-sync-local resume-sync-local fake-sync
|
|
logfile : log-rotate log-delete log-delete-all
|
|
device : attach detach
|
|
internal : help version
|
|
|
|
<resource> = name of resource or \"all\" for all resources
|
|
|
|
<option> = special by command
|
|
\n";
|
|
exit 0;
|
|
}
|
|
|
|
sub version {
|
|
lprint "$0 $Id\n";
|
|
#lprint "my IP is $ip\n";
|
|
exit 0;
|
|
}
|
|
|
|
##################################################################
|
|
my %cmd_table =
|
|
(
|
|
# new keywords
|
|
"create-cluster" => \&create_cluster,
|
|
"join-cluster" => \&join_cluster,
|
|
"leave-cluster" => \&leave_cluster,
|
|
"create-resource" => \&create_res,
|
|
"join-resource" => \&create_res,
|
|
"leave-resource" => [
|
|
"check preconditions", \&leave_res_phase0,
|
|
"switch state", \&leave_res_phase1,
|
|
"wait for deletions", \&leave_res_phase2,
|
|
],
|
|
"log-rotate" => \&logrotate_res,
|
|
"log-delete" => \&logdelete_res,
|
|
"log-delete-all" => \&logdelete_res,
|
|
"fake-sync" => \&fake_local_res,
|
|
"set-link" => \&set_link_cmd,
|
|
"delete-file" => \&delete_file_cmd,
|
|
"cat" => \&cat_cmd,
|
|
"show" => \&show_cmd,
|
|
"show-errors" => \&show_errors_cmd,
|
|
"show-state" => \&mars_state_cmd,
|
|
"mars-state" => \&mars_state_cmd, # deprecated
|
|
"show-info" => \&mars_info_cmd,
|
|
"mars-info" => \&mars_info_cmd, # deprecated
|
|
"pause-replay-local" => \&pause_replay_local_res,
|
|
"pause-replay-global" => \&pause_replay_global_res,
|
|
"pause-replay" => \&pause_replay_local_res,
|
|
"resume-replay-local" => \&pause_replay_local_res,
|
|
"resume-replay-global"=> \&pause_replay_global_res,
|
|
"resume-replay" => \&pause_replay_local_res,
|
|
"set-replay" => \&set_replay_res,
|
|
"wait-umount" => \&wait_umount_res,
|
|
"wait-cluster" => \&wait_cluster,
|
|
"wait-resource" => \&wait_cluster,
|
|
|
|
# compatible keywords (or their derivatives)
|
|
"attach" => [
|
|
"check preconditions", \&attach_res_phase0,
|
|
"switch state", \&attach_res_phase1,
|
|
"wait for effect", \&attach_res_phase2,
|
|
],
|
|
"detach" => [
|
|
"check preconditions", \&attach_res_phase0,
|
|
"switch state", \&attach_res_phase1,
|
|
"wait for effect", \&attach_res_phase2,
|
|
],
|
|
"connect-local" => \&connect_local_res,
|
|
"connect-global" => \&connect_global_res,
|
|
"connect" => \&connect_local_res,
|
|
"disconnect-local" => \&connect_local_res,
|
|
"disconnect-global" => \&connect_global_res,
|
|
"disconnect" => \&connect_local_res,
|
|
"syncer" => \&ignore_cmd,
|
|
"up" => [
|
|
"check preconditions", \&up_res_phase0,
|
|
"switch state", \&up_res_phase1,
|
|
"wait for effect", \&up_res_phase2,
|
|
],
|
|
"down" => [
|
|
"check preconditions", \&up_res_phase0,
|
|
"switch state", \&up_res_phase1,
|
|
"wait for effect", \&up_res_phase2,
|
|
],
|
|
"primary" => [
|
|
"check preconditions", \&primary_phase0,
|
|
"leave primary state", \&primary_phase1,
|
|
"wait when necessary", \&primary_phase2,
|
|
"switch to primary", \&primary_phase3,
|
|
"wait for device", \&primary_phase4,
|
|
],
|
|
"secondary" => [
|
|
"check preconditions", \&primary_phase0,
|
|
"leave primary state", \&primary_phase1,
|
|
"wait for effect", \&primary_phase4,
|
|
],
|
|
"invalidate" => [
|
|
"check preconditions", \&invalidate_res_phase0,
|
|
"stop old replay", \&invalidate_res_phase1,
|
|
"wait for replay off", \&invalidate_res_phase2,
|
|
"force symlinks", \&invalidate_res_phase3,
|
|
],
|
|
"invalidate-remote" => \&forbidden_cmd,
|
|
"resize" => \&resize_res,
|
|
"create-md" => \&senseless_cmd,
|
|
"get-gi" => \&ignore_cmd,
|
|
"show-gi" => \&ignore_cmd,
|
|
"dump-md" => \&senseless_cmd,
|
|
"outdate" => \&ignore_cmd,
|
|
"adjust" => \&ignore_cmd,
|
|
"wait-connect" => \&wait_cluster,
|
|
"role" => \&role_cmd,
|
|
"state" => \&role_cmd,
|
|
"cstate" => \&nyi_cmd,
|
|
"dstate" => \&nyi_cmd,
|
|
"status" => \&nyi_cmd,
|
|
"dump" => \&senseless_cmd,
|
|
"verify" => \&forbidden_cmd,
|
|
"pause-sync-local" => \&pause_sync_local_res,
|
|
"pause-sync-global" => \&pause_sync_global_res,
|
|
"pause-sync" => \&pause_sync_local_res,
|
|
"resume-sync-local" => \&pause_sync_local_res,
|
|
"resume-sync-global"=> \&pause_sync_global_res,
|
|
"resume-sync" => \&pause_sync_local_res,
|
|
"new-current-uuid" => \&senseless_cmd,
|
|
"hidden-commands" => \&ignore_cmd,
|
|
);
|
|
|
|
my @args;
|
|
|
|
foreach my $arg (@ARGV) {
|
|
if ($arg eq "--force" || $arg eq "-f") {
|
|
$force++;
|
|
next;
|
|
} elsif ($arg eq "--verbose" || $arg eq "-v") {
|
|
$verbose++;
|
|
next;
|
|
} elsif ($arg =~ s/--timeout\s*=\s*([0-9]+)/$1/) {
|
|
$timeout = $arg;
|
|
next;
|
|
} elsif ($arg =~ s/--host\s*=\s*([-_A-Za-z0-9]+)/$1/) {
|
|
ldie "host '$arg' does not exist in /mars/ips/ip-*\n" unless -l "/mars/ips/ip-$arg";
|
|
if ($arg ne $host) {
|
|
lprint "ATTENTION: acting as if I were host '$arg'\n";
|
|
lwarn "some commands require local knowledge not available here.\n";
|
|
lwarn "thus something may fail or go wrong - use this at your risk!\n";
|
|
$host = $arg;
|
|
}
|
|
next;
|
|
}
|
|
if ($arg =~ s/^force-//) {
|
|
$force++;
|
|
}
|
|
push @args, $arg;
|
|
}
|
|
|
|
my $cmd = shift @args || helplist "command argument is missing\n";
|
|
|
|
$notify = "(cmd: $cmd)" unless $cmd eq "version";
|
|
|
|
if ($cmd =~ m/^help$/ || $cmd =~ m/^h$/) {
|
|
helplist;
|
|
}
|
|
|
|
if ($cmd =~ m/^version$/ || $cmd =~ m/^v$/) {
|
|
version;
|
|
}
|
|
|
|
ldie "only root may use this tool\n" if $< != 0 && $cmd !~ m/^cat$/; # getpid() seems to be missing in perlfunc
|
|
helplist "unknown command $cmd\n" if !exists $cmd_table{$cmd};
|
|
if (!(-d $mars) && $cmd !~ m/(create|join)-cluster|cat/) {
|
|
ldie "The $mars directory does not exist.\n";
|
|
}
|
|
|
|
my $res = "";
|
|
if ($cmd eq "show") {
|
|
$res = shift @args;
|
|
} elsif (!($cmd =~ m/^(create|leave|wait)-cluster|cat|[a-z]+-file$/)) {
|
|
$res = shift @args || helplist "resource argument is missing\n";
|
|
check_id($res);
|
|
}
|
|
|
|
|
|
lprint "using FORCE option -- hopefully you know what you do!\n" if $force;
|
|
|
|
my %checked_res;
|
|
|
|
sub do_one_res {
|
|
my $func = shift;
|
|
my ($cmd, $res) = @_;
|
|
if (!$checked_res{"$cmd$res"}) {
|
|
$res = check_res($res) unless $cmd =~ m/^(join|create|leave|wait)-cluster|create-resource|show|cat|[a-z]+-file|set-link$/;
|
|
check_res_member($res) unless $cmd =~ m/^(join|create)-(cluster|resource)|(leave|wait)-cluster|show|cat|[a-z]+-file|set-link$/;
|
|
detect_splitbrain($res);
|
|
$checked_res{"$cmd$res"} = 1;
|
|
}
|
|
&{$func}(@_);
|
|
}
|
|
|
|
my %skip_res;
|
|
|
|
sub do_all_res {
|
|
my $func = shift;
|
|
my $cmd = shift;
|
|
my $res = shift || "all";
|
|
if ($res eq "all" && $cmd !~ m/show|cat|cluster/) {
|
|
ldie "For safety reasons, --force is only allowed on explicitly named resources. Combination of 'all' with --force is disallowed!\n" if $force;
|
|
ldie "Cannot combine command '$cmd' with 'all' existing resources - you must explicitly name a single new resource\n" if $cmd =~ m/create|join/;
|
|
my $any_success = 0;
|
|
my $any_member = 0;
|
|
foreach $res (glob("$mars/resource-*")) {
|
|
next unless -e "$res/data-$host";
|
|
$any_member++;
|
|
$res =~ s/^.*\/resource-(.*)$/$1/;
|
|
next if defined($skip_res{$res});
|
|
lprint "--------- resource $res\n";
|
|
eval {
|
|
do_one_res($func, $cmd, $res, @_);
|
|
1;
|
|
} and $any_success = 1 or $skip_res{$res} = 1;
|
|
}
|
|
if (!$any_success) {
|
|
if (!$any_member) {
|
|
lprint "I am not member of any resource\n";
|
|
return 1;
|
|
}
|
|
ldie "all resources have errors\n";
|
|
}
|
|
return !$any_success;
|
|
} else {
|
|
return do_one_res($func, $cmd, $res, @_);
|
|
}
|
|
}
|
|
|
|
my $func = $cmd_table{$cmd};
|
|
ldie "unknown command '$cmd'\n" unless $func;
|
|
|
|
if (ref($func) eq "ARRAY") {
|
|
my @list = @$func;
|
|
while (@list) {
|
|
my $headline = shift @list;
|
|
my $memb_func = shift @list;
|
|
lprint "---------------------------- $headline:\n";
|
|
my $status = do_all_res($memb_func, $cmd, $res, @args);
|
|
last if (defined($status) && $status);
|
|
finish_links();
|
|
}
|
|
} elsif (ref($func) eq "CODE") {
|
|
do_all_res($func, $cmd, $res, @args);
|
|
} else {
|
|
ldie "internal error: command table is wrong for '$cmd'";
|
|
}
|
|
|
|
finish_links();
|
|
|
|
exit($error_count);
|