mirror of https://github.com/schoebel/mars
481 lines
15 KiB
Perl
481 lines
15 KiB
Perl
#!/usr/bin/perl -w
|
|
# (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG
|
|
|
|
use strict;
|
|
use English;
|
|
use warnings;
|
|
|
|
my $mars = "/mars";
|
|
my $host = `uname -n` or die "cannot determine my network node name\n";
|
|
chomp $host;
|
|
my $ip = `ip a` or die "cannot determine my IP address\n";
|
|
$ip =~ s/\A.*inet +(?!127\.0\.)([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+).*?\Z/$1/ms or die "cannot parse my IP address\n";
|
|
chomp $ip;
|
|
print "my IP is $ip\n";
|
|
|
|
umask 0077;
|
|
die "only root may use this tool\n" unless `whoami` eq "root\n"; # getpid() seems to be missing in perlfunc
|
|
|
|
##################################################################
|
|
|
|
# syntactic checks
|
|
|
|
sub check_id {
|
|
my $str = shift;
|
|
die "identifier '$str' has disallowed characters" unless $str =~ m/^[A-Za-z_][-A-Za-z0-9_]*$/;
|
|
die "identifier '$str' is too long (only 16 chars allowed)" if length($str) > 16;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# semantic checks
|
|
|
|
sub check_res {
|
|
my $res = shift;
|
|
if(not -d "$mars/resource-$res") {
|
|
# DO WHAT I MEAN: try to substitute a device name for a badly given resource name if it is unique
|
|
my $count = 0;
|
|
my $found;
|
|
my @tests = glob("$mars/resource-*/device-$host");
|
|
foreach my $test (@tests) {
|
|
my $target = readlink($test);
|
|
if($target eq $res) {
|
|
$found = $test;
|
|
$count++;
|
|
}
|
|
}
|
|
if(!$count) {
|
|
@tests = glob("$mars/resource-*/_direct-*-$host");
|
|
foreach my $test (@tests) {
|
|
my $target = readlink($test);
|
|
$target =~ s/^.*,//;
|
|
if($target eq $res) {
|
|
$found = $test;
|
|
$count++;
|
|
}
|
|
}
|
|
}
|
|
die "resource '$res' does not exist ($count replacements found)\n" unless $count == 1 and $found;
|
|
$found =~ s:^.*/resource-(.*)/.*$:$1:;
|
|
warn "substituting bad resource name '$res' by uniquely matching resource name '$found'\n";
|
|
$res = $found;
|
|
}
|
|
return $res;
|
|
}
|
|
|
|
sub check_res_member {
|
|
my $res = shift;
|
|
die "sorry, I have not yet joined to resource '$res'\n" unless -e "$mars/resource-$res/data-$host";
|
|
}
|
|
|
|
sub check_sync_finished {
|
|
my ($res, $host) = @_;
|
|
my $lnk = "$mars/resource-$res/syncstatus-$host";
|
|
if(lstat($lnk)) {
|
|
my $syncstatus = readlink($lnk);
|
|
my $size = readlink("$mars/resource-$res/size") or die "cannot read size\n";
|
|
die "sync has not yet finished, only $syncstatus / $size bytes transferred\n" unless $syncstatus >= $size;
|
|
}
|
|
print "OK, it seems that sync has finished on $host.\n";
|
|
}
|
|
|
|
sub check_primary_gone {
|
|
my ($res) = @_;
|
|
for(;;) {
|
|
my @links = glob("$mars/resource-$res/actual-*/is-primary");
|
|
my $found = 0;
|
|
foreach my $link (@links) {
|
|
my $val = readlink($link);
|
|
$found++ if $val;
|
|
}
|
|
last if !$found;
|
|
print "waiting for $found other primary host(s) to disappear....\n";
|
|
sleep(5);
|
|
}
|
|
}
|
|
|
|
sub check_splitbrain {
|
|
my ($res, $host) = @_;
|
|
my @links = glob("$mars/resource-$res/version-[0-9]*-$host");
|
|
die "no version information available for host $host\n" unless @links;
|
|
my $link = pop @links;
|
|
die "cannot pop last link element\n" unless $link;
|
|
my $version = readlink($link);
|
|
die "cannot get version information for $link\n" unless $version;
|
|
my $serial = $link;
|
|
$serial =~ s:$mars/resource-$res/version-([0-9]+).*$:$1:;
|
|
print "my version: $link -> $version\n";
|
|
foreach my $other (glob("$mars/resource-$res/version-[0-9]*-*")) {
|
|
$other =~ m:$mars/resource-$res/version-([0-9]+)-(.*)$:;
|
|
my $other_serial = $1;
|
|
my $other_host = $2;
|
|
next if $serial ne $other_serial;
|
|
next if $host eq $other_host;
|
|
my $other_version = readlink($other);
|
|
print "other version: $other -> $other_version\n";
|
|
die "split brain detected: $other_version != $version\n" unless $version eq $other_version;
|
|
}
|
|
}
|
|
|
|
sub get_size {
|
|
my $arg = shift;
|
|
return -1 unless $arg =~ m/^[0-9.]+[kmgtp]?$/i;
|
|
my $mod = $arg;
|
|
$arg =~ s/[^0-9]+$//;
|
|
$mod =~ s/^[0-9]+//;
|
|
$_ = $mod;
|
|
SWITCH: {
|
|
/k/i and $arg *= 1024, last SWITCH;
|
|
/m/i and $arg *= 1024 * 1024, last SWITCH;
|
|
/g/i and $arg *= 1024 * 1024 * 1024, last SWITCH;
|
|
/t/i and $arg *= 1024 * 1024 * 1024 * 1024, last SWITCH;
|
|
/p/i and $arg *= 1024 * 1024 * 1024 * 1024 * 1024, last SWITCH;
|
|
}
|
|
return $arg;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# helpers
|
|
|
|
sub _trigger {
|
|
system("modprobe mars_proc; (echo 1 > /proc/sys/mars) >/dev/null 2>&1");
|
|
}
|
|
|
|
sub _switch {
|
|
my ($cmd, $res, $path, $on) = @_;
|
|
my $src = $on ? "1" : "0";
|
|
|
|
my $old = readlink($path);
|
|
if($old && $old eq $src) {
|
|
print "${cmd} on resource $res is already activated\n" if $cmd;
|
|
return;
|
|
}
|
|
|
|
my $tmp = $path;
|
|
$tmp =~ s/\/([^\/]+)$/.tmp.$1/;
|
|
symlink($src, $tmp) or die "cannot create switch symlink\n";
|
|
rename($tmp, $path) or die "cannot rename switch symlink\n";
|
|
print "successfully started ${cmd} on resource $res\n" if $cmd;
|
|
}
|
|
|
|
sub _writable {
|
|
my ($path, $on) = @_;
|
|
my $oldmode = (lstat $path)[2] & 0700;
|
|
my $newmode = $on ? $oldmode | 0200 : $oldmode & ~0200;
|
|
print "chmod '$path' $oldmode $newmode";
|
|
chmod($newmode, $path) == 1 or die "cannot chmod '$path'\n";
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# commands
|
|
|
|
sub ignore_cmd {
|
|
my ($cmd, $res) = @_;
|
|
print "ignoring command '$cmd' on resource '$res'\n";
|
|
exit(0);
|
|
}
|
|
|
|
sub senseless_cmd {
|
|
my ($cmd, $res) = @_;
|
|
print "command '$cmd' makes no sense with MARS (ignoring)\n";
|
|
exit(0);
|
|
}
|
|
|
|
sub forbidden_cmd {
|
|
my ($cmd, $res) = @_;
|
|
die "command '$cmd' cannot be used with MARS (it is impossible to carry out uniquely and could therefore lead to a disaster)\n";
|
|
}
|
|
|
|
sub nyi_cmd {
|
|
my ($cmd, $res) = @_;
|
|
die "command '$cmd' is not yet implemented\n";
|
|
}
|
|
|
|
sub _create_system {
|
|
my ($cmd, $peer, $force) = @_;
|
|
system("mkdir $mars") unless -d $mars;
|
|
system("mkdir $mars/ips") unless -d "$mars/ips";
|
|
system("mkdir $mars/defaults") unless -d "$mars/defaults";
|
|
system("mkdir $mars/defaults-$host") unless -d "$mars/defaults-$host";
|
|
symlink($ip, "$mars/ips/ip-$host");
|
|
}
|
|
|
|
sub create_system {
|
|
my ($cmd, $peer, $force) = @_;
|
|
die "system is already created\n" if !$force && -d "$mars/ips";
|
|
_create_system(@_);
|
|
}
|
|
|
|
sub join_system {
|
|
my ($cmd, $peer, $force) = @_;
|
|
if(glob("$mars/resource-*")) {
|
|
die "Sorry, some resources already exist!\nThis is dangerous!\nIf you are sure that no resource clash is possible, re-invoke this command with '--force' option\n" unless ($force and $force =~ m/--force/);
|
|
}
|
|
print "joining system via rsync (peer='$peer')\n";
|
|
_create_system(@_);
|
|
system("rsync --recursive --links -v $peer:$mars/ips/ $mars/ips/") == 0 or die "oops\n";
|
|
symlink($ip, "$mars/ips/ip-$host");
|
|
system("rsync --recursive --links -v $mars/ips/ $peer:$mars/ips/") == 0 or die "oops\n";
|
|
}
|
|
|
|
sub create_res {
|
|
my ($cmd, $res, $dev, $appear) = @_;
|
|
my $create = ($cmd eq "create-resource");
|
|
|
|
die "undefined device or size argument\n" unless $dev;
|
|
$appear = $res if !$appear;
|
|
check_id($appear) if $appear;
|
|
|
|
if($create) {
|
|
die "resource '$res' already exists\n" if -d "$mars/resource-$res";
|
|
print "creating new resource '$res'\n";
|
|
} else {
|
|
print "joining to existing resource '$res'\n";
|
|
}
|
|
|
|
my $size = get_size($dev);
|
|
if($size > 0) {
|
|
$dev = "";
|
|
} else {
|
|
die "block device '$dev' does not exist\n" unless -b $dev;
|
|
die "block device '$dev' must be an absolute path starting with '/'\n" unless $dev =~ m/^\//;
|
|
use Fcntl 'SEEK_END';
|
|
open(TEST, "<$dev") or die "cannot open device for reading\n";
|
|
$size = sysseek(TEST, 0, SEEK_END);
|
|
close(TEST);
|
|
print "device size = $size bytes\n";
|
|
die "implausible size" unless $size > 0;
|
|
}
|
|
|
|
my $tmp = "$mars/.tmp.$res";
|
|
my $primary;
|
|
my $replay;
|
|
if($create) {
|
|
_create_system(@_);
|
|
system("rm -rf $tmp");
|
|
system("mkdir $tmp") == 0 or die "could not create resource '$res'\n";
|
|
symlink($size, "$tmp/size") or die "cannot create size indicator symlink\n";
|
|
} else {
|
|
$tmp = "$mars/resource-$res";
|
|
die "resource '$res' does not exist\n" unless -d $tmp;
|
|
$primary = readlink("$tmp/primary") or die "cannot determine primary\n";
|
|
die "resource '$res' is already joined\n" if -e "$tmp/data-$host";
|
|
die "my ip '$ip' is not registered -- please run 'join-system' first\n" unless -l "$mars/ips/ip-$host";
|
|
my $oldsize = readlink("$tmp/size") or die "cannot determine old size\n";
|
|
if($size < $oldsize) {
|
|
print "adjusting size to $oldsize\n";
|
|
$size = $oldsize;
|
|
}
|
|
die "sizes differ: real size = $oldsize, but requested size = $size\n" unless $oldsize == $size;
|
|
$replay = readlink("$tmp/replay-$primary") or die "cannot read replay status of primary '$primary'\n";
|
|
$replay =~ s/-$primary,/-$host,/ or die "something is wrong here\n";
|
|
}
|
|
|
|
my $file = "$tmp/data-$host";
|
|
if(!$dev) {
|
|
print "creating sparse file '$file' with size $size\n";
|
|
open(OUT, ">$file") or die "could not open '$file'\n";
|
|
use Fcntl 'SEEK_SET';
|
|
sysseek(OUT, $size-1, SEEK_SET) == $size-1 or die "could not seek\n";
|
|
syswrite(OUT, '\0', 1) == 1 or die "cannot init sparse file\n";
|
|
close OUT;
|
|
} else {
|
|
print "using existing device '$dev'\n";
|
|
symlink($dev, $file) or die "cannot create device symlink\n";
|
|
}
|
|
if($appear) {
|
|
# TODO: check for uniqeness of $appear
|
|
print "resource '$res' will appear as local device '/dev/mars/$appear'\n";
|
|
system("rm -f $tmp/device-$host");
|
|
symlink($appear, "$tmp/device-$host") or die "cannot create symlink for local device appearance\n";
|
|
}
|
|
|
|
mkdir("$tmp/defaults") unless -d "$tmp/defaults";
|
|
mkdir("$tmp/defaults-$host");
|
|
mkdir("$tmp/actual-$host");
|
|
mkdir("$tmp/switch-$host");
|
|
symlink("1", "$tmp/switch-$host/attach");
|
|
symlink("1", "$tmp/switch-$host/connect");
|
|
symlink("1", "$tmp/switch-$host/sync");
|
|
system("rm -f $tmp/syncstatus-$host");
|
|
|
|
if($create) {
|
|
symlink($host, "$tmp/primary") or die "cannot create primary symlink\n";
|
|
symlink($size, "$tmp/syncstatus-$host") or die "cannot create primary syncstatus\n";
|
|
symlink("log-000000001-$host,0,0", "$tmp/replay-$host") or die "cannot create replay status\n";
|
|
rename($tmp, "$mars/resource-$res") or die "cannot finalize resource '$res'\n";
|
|
print "successfully created resource '$res'\n";
|
|
} else {
|
|
system("rm -f $tmp/replay-$host");
|
|
symlink($replay, "$tmp/replay-$host") or die "cannot create replay status\n";
|
|
symlink("0", "$tmp/syncstatus-$host") or die "cannot start initial sync\n";
|
|
system("rm -f $tmp/connect-$host");
|
|
symlink($primary, "$tmp/connect-$host") or die "cannot create peer connect symlink\n";
|
|
symlink($host, "$tmp/connect-$primary") unless -l "$tmp/connect-$primary";
|
|
print "successfully joined resource '$res'\n";
|
|
}
|
|
}
|
|
|
|
sub attach_res {
|
|
my ($cmd, $res) = @_;
|
|
my $detach = ($cmd eq "detach");
|
|
my $path = "$mars/resource-$res/switch-$host/attach";
|
|
_switch($cmd, $res, $path, !$detach);
|
|
}
|
|
|
|
sub connect_res {
|
|
my ($cmd, $res) = @_;
|
|
my $disconnect = ($cmd eq "disconnect");
|
|
my $path = "$mars/resource-$res/switch-$host/connect";
|
|
_switch($cmd, $res, $path, !$disconnect);
|
|
}
|
|
|
|
sub pause_res {
|
|
my ($cmd, $res) = @_;
|
|
my $pause = ($cmd eq "pause-sync");
|
|
my $path = "$mars/resource-$res/switch-$host/sync";
|
|
_switch($cmd, $res, $path, !$pause);
|
|
}
|
|
|
|
sub up_res {
|
|
my ($cmd, $res) = @_;
|
|
my $down = ($cmd eq "down");
|
|
if($down) {
|
|
pause_res("pause-sync", $res);
|
|
connect_res("disconnect", $res);
|
|
attach_res("detach", $res);
|
|
} else {
|
|
attach_res("attach", $res);
|
|
connect_res("connect", $res);
|
|
pause_res("resume-sync", $res);
|
|
}
|
|
}
|
|
|
|
sub _primary_res {
|
|
my ($res, $host, $pri, $old) = @_;
|
|
my $tmp = "$mars/resource-$res/.tmp.primary";
|
|
system("rm -f $tmp");
|
|
symlink($host, $tmp) or die "cannot create new primary symlink\n";
|
|
rename($tmp, $pri) or die "cannot install new primary symlink\n";
|
|
print "primary changed from '$old' to '$host'\n";
|
|
}
|
|
|
|
sub primary_res {
|
|
my ($cmd, $res) = @_;
|
|
my $sec = ($cmd eq "secondary");
|
|
my $pri = "$mars/resource-$res/primary";
|
|
my $old = readlink($pri) or die "cannot determine current primary\n";
|
|
if($sec) {
|
|
die "for safety reasons, switching to secondary is only allowed when I ($host) am primary\n" if($old ne $host);
|
|
$host = "(none)";
|
|
} elsif($old eq $host) {
|
|
print "I am already primary.\n";
|
|
exit(0);
|
|
} else { # try to switch myself to primary
|
|
print "trying to switch $host to primary...\n";
|
|
check_sync_finished($res, $host);
|
|
_primary_res($res, "(none)", $pri, $old) unless $old eq "(none)";
|
|
check_primary_gone($res);
|
|
check_splitbrain($res, $host);
|
|
}
|
|
_primary_res($res, $host, $pri, $old);
|
|
}
|
|
|
|
sub invalidate_res {
|
|
my ($cmd, $res) = @_;
|
|
my $pri = "$mars/resource-$res/primary";
|
|
my $old = readlink($pri) or die "cannot determine current primary\n";
|
|
my $dst = "$mars/resource-$res/syncstatus-$host";
|
|
|
|
die "primary side cannot be invalidated\n" if $old eq $host;
|
|
|
|
system("rm -f $dst");
|
|
symlink("0", $dst) or die "cannot create invalidation symlink '$dst'\n";
|
|
}
|
|
|
|
sub role_cmd {
|
|
my ($cmd, $res) = @_;
|
|
my $pri = "$mars/resource-$res/primary";
|
|
my $old = readlink($pri) or die "cannot determine current primary\n";
|
|
if($old eq $host) {
|
|
print "primary\n";
|
|
} else {
|
|
print "secondary\n";
|
|
}
|
|
}
|
|
|
|
##################################################################
|
|
|
|
my %cmd_table =
|
|
(
|
|
# new keywords
|
|
"create-system" => \&create_system,
|
|
"join-system" => \&join_system,
|
|
"create-resource" => \&create_res,
|
|
"join-resource" => \&create_res,
|
|
|
|
# compatible keywords
|
|
"attach" => \&attach_res,
|
|
"detach" => \&attach_res,
|
|
"connect" => \&connect_res,
|
|
"disconnect" => \&connect_res,
|
|
"syncer" => \&ignore_cmd,
|
|
"up" => \&up_res,
|
|
"down" => \&up_res,
|
|
"primary" => \&primary_res,
|
|
"secondary" => \&primary_res,
|
|
"invalidate" => \&invalidate_res,
|
|
"invalidate-remote" => \&forbidden_cmd,
|
|
"resize" => \&nyi_cmd,
|
|
"create-md" => \&senseless_cmd,
|
|
"get-gi" => \&ignore_cmd,
|
|
"show-gi" => \&ignore_cmd,
|
|
"dump-md" => \&senseless_cmd,
|
|
"outdate" => \&ignore_cmd,
|
|
"adjust" => \&ignore_cmd,
|
|
"wait-connect" => \&nyi_cmd,
|
|
"role" => \&role_cmd,
|
|
"state" => \&role_cmd,
|
|
"cstate" => \&nyi_cmd,
|
|
"status" => \&nyi_cmd,
|
|
"dump" => \&senseless_cmd,
|
|
"verify" => \&nyi_cmd,
|
|
"pause-sync" => \&pause_res,
|
|
"resume-sync" => \&pause_res,
|
|
"new-current-uuid" => \&senseless_cmd,
|
|
"dstate" => \&nyi_cmd,
|
|
"hidden-commands" => \&ignore_cmd,
|
|
);
|
|
|
|
my $cmd = shift || die "command argument is missing\n";
|
|
my $res = shift || die "resource argument is missing\n";
|
|
|
|
die "unknown command '$cmd'\n" if !exists $cmd_table{$cmd};
|
|
check_id($res);
|
|
|
|
sub do_res {
|
|
my $cmd = shift;
|
|
my $res = shift;
|
|
|
|
$res = check_res($res) unless $cmd =~ m/^(join-system|create-resource)$/;
|
|
check_res_member($res) unless $cmd =~ m/^(join|create)-(system|resource)$/;
|
|
|
|
my $func = $cmd_table{$cmd};
|
|
&{$func}($cmd, $res, @_);
|
|
}
|
|
|
|
if($res eq "all") {
|
|
foreach $res (glob("$mars/resource-*")) {
|
|
next unless -e "$res/data-$host";
|
|
$res =~ s/^.*\/resource-(.*)$/$1/;
|
|
print "--------- resource $res\n";
|
|
do_res($cmd, $res, @ARGV);
|
|
}
|
|
} else {
|
|
do_res($cmd, $res, @ARGV);
|
|
}
|
|
|
|
_trigger();
|