mars/userspace/marsadm

1031 lines
32 KiB
Perl

#!/usr/bin/perl -w
# (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG
use strict;
use English;
use warnings;
umask 0077;
##################################################################
# messaging
my $warn = "";
my $notify = "";
sub lprint {
my ($text) = @_;
print "$warn$text";
if ($notify) {
system("/usr/bin/logger -t marsadm \"$warn$notify $text\"");
}
}
sub ldie {
my ($text) = @_;
$warn = "DYING: ";
lprint $text;
exit -1;
}
sub lwarn {
my ($text) = @_;
my $oldwarn = $warn;
$warn = "WARNING: ";
lprint $text;
$warn = $oldwarn;
}
##################################################################
# global variables
my $Id = '$Id$';
my $mars = "/mars";
my $host = `uname -n` or ldie "cannot determine my network node name\n";
chomp $host;
my $force = 0;
my $ip = _get_ip() or ldie "cannot determine my IP address\n";
##################################################################
# syntactic checks
sub check_id {
my $str = shift;
ldie "identifier '$str' has disallowed characters" unless $str =~ m/^[A-Za-z_][-A-Za-z0-9_]*$/;
ldie "identifier '$str' is too long (only 16 chars allowed)" if length($str) > 16;
}
##################################################################
# semantic checks
sub check_res {
my $res = shift;
if(not -d "$mars/resource-$res") {
# DO WHAT I MEAN: try to substitute a device name for a badly given resource name if it is unique
my $count = 0;
my $found;
my @tests = glob("$mars/resource-*/device-$host");
foreach my $test (@tests) {
my $target = readlink($test);
if($target eq $res) {
$found = $test;
$count++;
}
}
if(!$count) {
@tests = glob("$mars/resource-*/_direct-*-$host");
foreach my $test (@tests) {
my $target = readlink($test);
$target =~ s/^.*,//;
if($target eq $res) {
$found = $test;
$count++;
}
}
}
ldie "resource '$res' does not exist ($count replacements found)\n" unless $count == 1 and $found;
$found =~ s:^.*/resource-(.*)/.*$:$1:;
lwarn "substituting bad resource name '$res' by uniquely matching resource name '$found'\n";
$res = $found;
}
return $res;
}
sub check_res_member {
my $res = shift;
ldie "sorry, I have not yet joined to resource '$res'\n" unless -e "$mars/resource-$res/data-$host";
}
sub check_sync_finished {
my ($res, $host) = @_;
my $lnk = "$mars/resource-$res/syncstatus-$host";
if(lstat($lnk)) {
my $syncstatus = readlink($lnk);
my $size = readlink("$mars/resource-$res/size") or ldie "cannot read size\n";
ldie "sync has not yet finished, only $syncstatus / $size bytes transferred\n" unless $syncstatus >= $size;
}
lprint "OK, it seems that sync has finished on $host.\n";
}
sub check_primary {
my ($cmd, $res) = @_;
my $pri = "$mars/resource-$res/primary";
my $old = readlink($pri) or ldie "cannot determine current primary\n";
ldie "for operation '$cmd' I need to be primary\n" unless $old eq $host;
}
sub check_not_primary {
my ($cmd, $res) = @_;
my $pri = "$mars/resource-$res/primary";
my $old = readlink($pri) or ldie "cannot determine current primary\n";
ldie "operation '$cmd' cannot be executed on primary\n" if $old eq $host;
}
sub check_primary_gone {
my ($res) = @_;
for(;;) {
my @links = glob("$mars/resource-$res/actual-*/is-primary");
my $found = 0;
foreach my $link (@links) {
my $val = readlink($link);
$found++ if $val;
}
last if !$found;
lprint "waiting for $found other primary host(s) to disappear....\n";
sleep(5);
}
}
sub check_todo {
my ($res, $key, $val, $wait) = @_;
for(;;) {
my $path = "$mars/resource-$res/todo-$host/$key";
my $link = readlink($path) or ldie "cannot read symlink '$path'\n";
last if $link == $val;
ldie "$path must have value $val\n" if $wait;
lprint "waiting until $key reaches the value $val....\n";
sleep(5);
}
}
sub check_status {
my ($res, $key, $val, $wait) = @_;
for(;;) {
my $path = "$mars/resource-$res/actual-$host/$key";
my $link = readlink($path) or ldie "cannot read symlink '$path'\n";
last if $link == $val;
ldie "$path must have value $val\n" if $wait;
lprint "waiting until $key reaches the value $val....\n";
sleep(5);
}
}
sub _check_mtime {
my ($path, $age) = @_;
my $mt = (lstat($path))[9];
if(!$mt) {
return 0;
}
my $res = ($mt < time() - $age);
#lprint "XXX '$path' $res\n";
return $res;
}
sub _check_all_mtimes {
my ($path, $age) = @_;
my @list = glob($path);
my $res = 1;
foreach my $p (@list) {
if(!_check_mtime($p, $age)) {
$res = 0;
}
}
return $res;
}
sub _get_minmax {
my ($res, $glob, $take_symlink) = @_;
my $min = -1;
my $max = -1;
my @paths = glob($glob) or ldie "cannot find '$glob'\n";
foreach my $path (@paths) {
my $nr = $path;
if($take_symlink) {
$nr = readlink($path) or ldie "cannot read symlink '$path'\n";
}
$nr =~ s:^.*[a-z]+-([0-9]+)(-[^/]*)?$:$1:;
$min = $nr if ($nr < $min || $min < 0);
$max = $nr if ($nr > $max || $max < 0);
}
return ($min, $max);
}
sub get_minmax_logfiles {
my ($res) = @_;
return _get_minmax($res, "$mars/resource-$res/log-*", 0);
}
sub get_minmax_versions {
my ($res) = @_;
return _get_minmax($res, "$mars/resource-$res/version-*", 0);
}
sub get_minmax_any {
my ($res) = @_;
return _get_minmax($res, "$mars/resource-$res/{log,version}-*", 0);
}
sub get_minmax_replays {
my ($res) = @_;
return _get_minmax($res, "$mars/resource-$res/replay-*", 1);
}
sub check_splitbrain {
# check only the chain of $host (or all hosts if unset)
# check up to $sequence (or for all if < 0)
my ($res, $host, $sequence) = @_;
if($sequence < 0) {
my $pri = "$mars/resource-$res/primary";
my $old = readlink($pri) or ldie "cannot determine current primary\n";
_primary_res($res, "(none)", $pri, $old) unless $old eq "(none)";
_trigger();
sleep(5);
while(!_check_all_mtimes("$mars/resource-$res/{log,version,replay}-*", 60)) {
lprint "resource directory $res not stable, waiting....\n";
sleep(5);
}
while(1) {
my ($min_log, $max_log) = get_minmax_logfiles($res);
my ($min_ver, $max_ver) = get_minmax_versions($res);
my ($min_rep, $max_rep) = get_minmax_replays($res);
if($min_ver > $min_log || $max_ver < $max_log) {
lprint "some version links are missing...\n";
sleep(10);
next;
}
if($max_log >= $max_rep) {
lprint "resource $res: logfile $max_log is present.\n";
last;
}
lprint "resource $res: logfile $max_log is not yet transferred (need $max_rep), waiting....\n";
sleep(10);
}
} # $sequence < 0
my $glob = "$mars/resource-$res/version-[0-9]*";
my @links = glob($glob);
if(!@links) {
@links = glob("$mars/resource-$res/version-[0-9]*-*");
ldie "no version information available\n" unless @links;
lprint "assuming that I am primary for the first time\n";
return;
}
@links = sort(@links);
foreach my $link (@links) {
my $nr = $link;
$nr =~ s:^.*[a-z]+-([0-9]+)(-[^/]*)?$:$1:;
$nr = int($nr);
next if ($sequence >= 0 && $nr > $sequence);
my $fromhost = $link;
$fromhost =~ s:^.*version-[0-9]*-(.*)$:$1:;
my $version = readlink($link) or ldie "cannot read symlink '$link'\n";
my $otherhost = $version;
$otherhost =~ s:^[^,]*?,log-[0-9]*?-([^,]*?),.*$:$1:;
my $otherlink = sprintf("$mars/resource-$res/version-%09d-$otherhost", $nr);
my $otherversion = readlink($otherlink) or ldie "cannot read symlink '$otherlink'\n";
# ignore foreign mismatches
if($host) {
next if $fromhost ne $host;
}
# by defintion, the originator of a logfile is always "right"
next if $otherhost eq $fromhost;
# final check
ldie "splitbrain at sequence $nr detected\n" unless $version eq $otherversion;
}
}
sub get_size {
my $arg = shift;
return -1 unless $arg =~ m/^[0-9.]+[kmgtp]?$/i;
my $mod = $arg;
$arg =~ s/[^0-9]+$//;
$mod =~ s/^[0-9]+//;
$_ = $mod;
SWITCH: {
/k/i and $arg *= 1024, last SWITCH;
/m/i and $arg *= 1024 * 1024, last SWITCH;
/g/i and $arg *= 1024 * 1024 * 1024, last SWITCH;
/t/i and $arg *= 1024 * 1024 * 1024 * 1024, last SWITCH;
/p/i and $arg *= 1024 * 1024 * 1024 * 1024 * 1024, last SWITCH;
}
return $arg;
}
sub get_peers {
my ($res) = @_;
my @list = glob("$mars/resource-$res/connect-*");
return map { $_ =~ s:$mars/resource-$res/connect-::; $_ } @list;
}
##################################################################
# helpers
sub _trigger {
system("(echo 1 > /proc/sys/mars/trigger) >/dev/null 2>&1");
}
sub _switch {
my ($cmd, $res, $path, $on) = @_;
my $src = $on ? "1" : "0";
my $old = readlink($path);
if($old && $old eq $src) {
lprint "${cmd} on resource $res is already activated\n" if $cmd;
return;
}
my $tmp = $path;
$tmp =~ s/\/([^\/]+)$/.tmp.$1/;
symlink($src, $tmp) or ldie "cannot create switch symlink\n";
rename($tmp, $path) or ldie "cannot rename switch symlink\n";
lprint "successfully started ${cmd} on resource $res\n" if $cmd;
}
sub _writable {
my ($path, $on) = @_;
my $oldmode = (lstat $path)[2] & 0700;
my $newmode = $on ? $oldmode | 0200 : $oldmode & ~0200;
lprint "chmod '$path' $oldmode $newmode";
chmod($newmode, $path) == 1 or ldie "cannot chmod '$path'\n";
}
sub _get_ip {
chomp (my @info = `/sbin/ip addr show dev eth0`);
foreach (@info) {
m#\sinet\s(\d+\.\d+\.\d+\.\d+)# && return $1;
}
return undef;
}
sub _fake_versionlink {
my ($basedir, $prev, $primary) = @_;
$prev =~ s/^log-([0-9]+)-.*$/$1/;
$prev--;
if($prev > 0) {
my $prevversion = sprintf("$basedir/version-%09d-$primary", $prev);
my $prevlink = readlink($prevversion);
if (!$prevlink) { # try any one else
$prevversion = sprintf("$basedir/version-%09d-*", $prev);
my @test = glob($prevversion);
$prevlink = shift @test;
}
if ($prevlink) {
lprint "creating faked version symlink...\n";
my $myversion = sprintf("$basedir/version-%09d-$host", $prev);
system("rm -f $myversion.tmp");
symlink($prevlink, "$myversion.tmp") or ldie "cannot create faked version symlink '$myversion'\n";
system("mv $myversion.tmp $myversion");
} else {
lwarn "cannot read symlink '$prevversion' -- cannot create fake\n";
}
}
}
sub _set_replaylink {
my ($basedir, $replay, $primary) = @_;
my $replaylink = "$basedir/replay-$host";
my $oldreplay = readlink($replaylink);
# fake old version symlink when necessary
if ($oldreplay) {
my $oldbase = $oldreplay;
my $base = $replay;
$oldbase =~ s/^([^,]+).*/$1/;
$base =~ s/^([^,]+).*/$1/;
if ($base ne $oldbase) {
_fake_versionlink($basedir, $oldreplay, $primary);
}
}
# copy pervious version symlink
_fake_versionlink($basedir, $replay, $primary);
# create replay symlink
symlink($replay, "$replaylink.tmp") or ldie "cannot create replay status\n";
system("mv $replaylink.tmp $replaylink");
}
##################################################################
# commands
sub ignore_cmd {
my ($cmd, $res) = @_;
lprint "ignoring command '$cmd' on resource '$res'\n";
exit(0);
}
sub senseless_cmd {
my ($cmd, $res) = @_;
lprint "command '$cmd' makes no sense with MARS (ignoring)\n";
exit(0);
}
sub forbidden_cmd {
my ($cmd, $res) = @_;
ldie "command '$cmd' cannot be used with MARS (it is impossible to carry out uniquely and could therefore lead to a disaster)\n";
}
sub nyi_cmd {
my ($cmd, $res) = @_;
ldie "command '$cmd' is not yet implemented\n";
}
sub is_module_loaded {
open TEST, "lsmod | grep mars |";
my $res = <TEST>;
close TEST;
return $res;
}
sub _create_cluster {
my ($cmd) = @_;
system("mkdir $mars") unless -d $mars;
system("mkdir $mars/ips") unless -d "$mars/ips";
system("mkdir $mars/userspace") unless -d "$mars/userspace";
system("mkdir $mars/defaults") unless -d "$mars/defaults";
system("mkdir $mars/defaults-$host") unless -d "$mars/defaults-$host";
system("mkdir $mars/todo-global") unless -d "$mars/todo-global";
symlink($ip, "$mars/ips/ip-$host");
symlink("1", "$mars/todo-global/deleted-$host");
}
sub create_cluster {
my ($cmd, $peer) = @_;
ldie "cluster is already created\n" if !$force && -d "$mars/ips";
ldie "mars module is loaded, please unload first\n" if is_module_loaded();
_create_cluster(@_);
}
sub join_cluster {
my ($cmd, $peer) = @_;
if(glob("$mars/resource-*") or glob("$mars/ips/*")) {
ldie "Sorry, some resources already exist!\nThis is dangerous!\nIf you are sure that no resource clash is possible, re-invoke this command with '--force' option\n" unless $force;
}
ldie "mars module is loaded, please unload first\n" if is_module_loaded();
lprint "joining cluster via rsync (peer='$peer')\n";
# check connection
system("ssh $peer uname -a") == 0 or ldie "oops, no connection to $peer ...\n";
_create_cluster(@_);
system("rsync --recursive --links -v $peer:$mars/ips/ $mars/ips/") == 0 or ldie "oops\n";
symlink($ip, "$mars/ips/ip-$host");
system("rsync --recursive --links -v $mars/ips/ $peer:$mars/ips/") == 0 or ldie "oops\n";
}
sub leave_cluster {
my ($cmd) = @_;
my $check = "/mars/resource-*/*-$host";
ldie "I am member of some resources\n" if glob($check) && !$force;
_create_delete("$mars/ips/$host");
}
sub create_res {
my ($cmd, $res, $dev, $appear) = @_;
my $create = ($cmd eq "create-resource");
ldie "undefined device or size argument\n" unless $dev;
$appear = $res if !$appear;
check_id($appear) if $appear;
if($create) {
ldie "resource '$res' already exists\n" if -d "$mars/resource-$res";
lprint "creating new resource '$res'\n";
} else {
ldie "resource '$res' has been already joined -- this is dangerous!\n" if -e "$mars/resource-$res/connect-$host";
lprint "joining to existing resource '$res'\n";
}
my $size = get_size($dev);
if($size > 0) {
$dev = "";
} else {
ldie "block device '$dev' does not exist\n" unless -b $dev;
ldie "block device '$dev' must be an absolute path starting with '/'\n" unless $dev =~ m/^\//;
use Fcntl 'SEEK_END';
open(TEST, "<$dev") or ldie "cannot open device for reading\n";
$size = sysseek(TEST, 0, SEEK_END);
close(TEST);
lprint "device size = $size bytes\n";
ldie "implausible size $size" unless $size > 0;
}
my $tmp = "$mars/.tmp.$res";
my $primary;
my $replay;
if($create) {
_create_cluster(@_);
system("rm -rf $tmp");
system("mkdir $tmp") == 0 or ldie "could not create resource '$res'\n";
symlink($size, "$tmp/size") or ldie "cannot create size indicator symlink\n";
} else {
$tmp = "$mars/resource-$res";
ldie "resource '$res' does not exist\n" unless -d $tmp;
$primary = readlink("$tmp/primary") or ldie "cannot determine primary\n";
if($primary eq "(none)") {
my @list = glob("$tmp/replay-*") or ldie "cannot find any candidate for primary\n";
my $first = pop @list or ldie "bad glob list\n";
$primary = readlink($first) or ldie "cannot determine peer\n";
$primary =~ s/^log-[0-9]+-(.*),.*,.*$/$1/;
lprint "using '$primary' as primary\n";
}
ldie "resource '$res' is already joined\n" if -e "$tmp/data-$host";
ldie "my ip '$ip' is not registered -- please run 'join-cluster' first\n" unless -l "$mars/ips/ip-$host";
my $oldsize = readlink("$tmp/size") or ldie "cannot determine old size\n";
if($size < $oldsize) {
lprint "adjusting size to $oldsize\n";
$size = $oldsize;
}
ldie "sizes differ: real size = $oldsize, but requested size = $size\n" unless $oldsize == $size;
$replay = readlink("$tmp/replay-$primary") or ldie "cannot read replay status of primary '$primary'\n";
$replay =~ s/,[0-9]+,[0-9]+$/,0,0/;
}
my $file = "$tmp/data-$host";
if(!$dev) {
lprint "creating sparse file '$file' with size $size\n";
open(OUT, ">$file") or ldie "could not open '$file'\n";
use Fcntl 'SEEK_SET';
sysseek(OUT, $size-1, SEEK_SET) == $size-1 or ldie "could not seek\n";
syswrite(OUT, '\0', 1) == 1 or ldie "cannot init sparse file\n";
close OUT;
} else {
lprint "using existing device '$dev'\n";
symlink($dev, $file) or ldie "cannot create device symlink\n";
}
if($appear) {
# TODO: check for uniqeness of $appear
lprint "resource '$res' will appear as local device '/dev/mars/$appear'\n";
system("rm -f $tmp/device-$host");
symlink($appear, "$tmp/device-$host") or ldie "cannot create symlink for local device appearance\n";
}
mkdir("$tmp/userspace") unless -d "$tmp/userspace";
mkdir("$tmp/defaults") unless -d "$tmp/defaults";
mkdir("$tmp/defaults-$host");
mkdir("$tmp/actual-$host");
my $todo = "$tmp/todo-$host";
mkdir($todo);
symlink("1", "$todo/attach");
symlink("1", "$todo/connect");
symlink("1", "$todo/sync");
symlink("1", "$todo/allow-replay");
system("rm -f $tmp/syncstatus-$host");
if($create) {
symlink($host, "$tmp/primary") or ldie "cannot create primary symlink\n";
symlink($size, "$tmp/syncstatus-$host") or ldie "cannot create primary syncstatus\n";
symlink("log-000000001-$host,0,0", "$tmp/replay-$host") or ldie "cannot create replay status\n";
system("touch $tmp/log-000000001-$host");
rename($tmp, "$mars/resource-$res") or ldie "cannot finalize resource '$res'\n";
lprint "successfully created resource '$res'\n";
} else {
_set_replaylink($tmp, $replay, $primary);
symlink("0", "$tmp/syncstatus-$host") or ldie "cannot start initial sync\n";
system("rm -f $tmp/connect-$host");
symlink($primary, "$tmp/connect-$host") or ldie "cannot create peer connect symlink\n";
symlink($host, "$tmp/connect-$primary") unless -l "$tmp/connect-$primary";
lprint "successfully joined resource '$res'\n";
}
}
sub leave_res {
my ($cmd, $res) = @_;
check_not_primary(@_);
foreach my $tmp (glob("$mars/resource-$res/todo-$host/*")) {
my $status = readlink($tmp);
ldie "cannot read symlink '$tmp'\n" unless defined($status);
ldie "switch '$tmp' is not off\n" if $status;
}
foreach my $tmp (glob("$mars/resource-$res/actual-$host/*")) {
my $status = readlink($tmp);
ldie "cannot read symlink '$tmp'\n" unless defined($status);
ldie "running status '$tmp' is not off\n" if $status;
}
my $peerlink = "$mars/resource-$res/connect-$host";
my $peer = readlink($peerlink) or ldie "cannot read symlink '$peerlink'\n";
foreach my $tmp (glob("$mars/resource-$res/connect-*")) {
next if $tmp eq $peerlink;
my $target = readlink($tmp) or ldie "cannot read symlink '$tmp'\n";
next unless $target eq $host;
lprint "changing '$tmp' from '$host' to '$peer'\n";
unlink("$tmp.new");
symlink($peer, "$tmp.new") or ldie "cannot create symlink '$tmp.new'\n";
rename("$tmp.new", $tmp) or ldie "cannot create symlink '$tmp'\n";
}
unlink($peerlink);
}
sub logrotate_res {
my ($cmd, $res) = @_;
check_primary(@_);
my @paths = glob("$mars/resource-$res/log-*-$host") or ldie "cannot find any logfiles\n";
@paths = sort(@paths);
my $last = pop(@paths);
if(-z $last) {
lprint "an empty logfile '$last' already exists, nothing to do.\n";
return;
}
my $nr = $last;
$nr =~ s/^.*log-([0-9]+)-.+$/$1/;
my $next = sprintf("$mars/resource-$res/log-%09d-$host", $nr + 1);
ldie "logfile '$next' already exists\n" if -e $next;
system("touch $next");
}
sub _allowed_logdelete {
my ($cmd, $res) = @_;
my $min = -1;
my @paths = glob("$mars/resource-$res/replay-*") or ldie "cannot find any replay symlinks\n";
foreach my $path (@paths) {
my $target = readlink($path) or ldie "cannot read symlink '$path'\n";
my $nr = $target;
$nr =~ s/^log-([0-9]+)-.*$/$1/;
$min = $nr if ($nr < $min || $min < 0);
}
lprint "max allowed deletable logfile number: $min\n";
return $min;
}
sub _create_delete {
my ($target) = @_;
my $nr = 0;
my @paths = glob("$mars/todo-global/delete-*");
if(@paths) {
my $last = pop(@paths);
$nr = $last;
$nr =~ s/^.*delete-([0-9]+)$/$1/;
}
my $new = sprintf("$mars/todo-global/delete-%09d", $nr + 1);
lprint "create symlink $new -> $target\n";
symlink($target, $new);
}
sub logdelete_res {
my ($cmd, $res) = @_;
my @paths = glob("$mars/resource-$res/log-*") or ldie "cannot find any logfiles\n";
@paths = sort(@paths);
my $max = _allowed_logdelete(@_);
my $nr = 0;
for (;;) {
my $first = shift(@paths);
last unless $first;
$nr = $first;
$nr =~ s/^.*log-([0-9]+)-.+$/$1/;
last unless $nr < $max;
lprint "chosen '$first' for deletion\n";
_create_delete($first);
}
lprint "removing left-over version symlinks...\n";
foreach my $versionlink (glob("$mars/resource-$res/version-*")) {
my $nrv = $versionlink;
$nrv =~ s/^.*\/version-([0-9]+)-.+$/$1/;
next unless $nrv < $max - 1;
_create_delete($versionlink);
}
}
sub attach_res {
my ($cmd, $res) = @_;
my $detach = ($cmd eq "detach");
my $path = "$mars/resource-$res/todo-$host/attach";
_switch($cmd, $res, $path, !$detach);
}
sub connect_res {
my ($cmd, $res) = @_;
my $disconnect = ($cmd eq "disconnect");
my @paths = glob("$mars/resource-$res/todo-*/");
for my $path (@paths) {
_switch($cmd, $res, "$path/connect", !$disconnect);
}
}
sub connect_local_res {
my ($cmd, $res) = @_;
my $disconnect = ($cmd eq "disconnect-local");
my $path = "$mars/resource-$res/todo-$host/connect";
_switch($cmd, $res, $path, !$disconnect);
}
sub pause_sync_res {
my ($cmd, $res) = @_;
my $pause = ($cmd eq "pause-sync");
my @paths = glob("$mars/resource-$res/todo-*/");
for my $path (@paths) {
_switch($cmd, $res, "$path/sync", !$pause);
}
}
sub pause_sync_local_res {
my ($cmd, $res) = @_;
my $pause = ($cmd eq "pause-sync-local");
my $path = "$mars/resource-$res/todo-$host/sync";
_switch($cmd, $res, $path, !$pause);
}
sub pause_replay_res {
my ($cmd, $res) = @_;
my $pause = ($cmd eq "pause-replay");
my @paths = glob("$mars/resource-$res/todo-*/");
for my $path (@paths) {
_switch($cmd, $res, "$path/allow-replay", !$pause);
}
}
sub pause_replay_local_res {
my ($cmd, $res) = @_;
my $pause = ($cmd eq "pause-replay-local");
my $path = "$mars/resource-$res/todo-$host/allow-replay";
_switch($cmd, $res, $path, !$pause);
}
sub up_res {
my ($cmd, $res) = @_;
my $down = ($cmd eq "down");
if($down) {
pause_replay_res("pause-replay", $res);
pause_sync_res("pause-sync", $res);
connect_res("disconnect", $res);
attach_res("detach", $res);
} else {
attach_res("attach", $res);
connect_res("connect", $res);
pause_sync_res("resume-sync", $res);
pause_replay_res("resume-replay", $res);
}
}
sub fake_local_res {
my ($cmd, $res) = @_;
my $path = "$mars/resource-$res/todo-$host/sync";
_switch($cmd, $res, $path, 0);
#check_status($res, "copy-syncstatus-$host", 0);
my $size = readlink("$mars/resource-$res/size") or ldie "cannot read size\n";
my $target = "$mars/resource-$res/syncstatus-$host";
symlink($size, "$target.tmp") or ldie "cannot create faked syncstatus\n";
rename("$target.tmp", $target) or ldie "cannot reaname symlink\n";
}
sub _primary_res {
my ($res, $host, $pri, $old) = @_;
my $tmp = "$mars/resource-$res/.tmp.primary";
system("rm -f $tmp");
symlink($host, $tmp) or ldie "cannot create new primary symlink\n";
rename($tmp, $pri) or ldie "cannot install new primary symlink\n";
lprint "primary changed from '$old' to '$host'\n";
}
sub primary_res {
my ($cmd, $res) = @_;
my $sec = ($cmd eq "secondary");
my $pri = "$mars/resource-$res/primary";
my $old = readlink($pri) or ldie "cannot determine current primary\n";
if($sec) {
ldie "for safety reasons, switching to secondary is only allowed when I ($host) am primary\n" if($old ne $host);
$host = "(none)";
} elsif($old eq $host) {
lprint "I am already primary.\n";
exit(0);
} elsif($force) {
lprint "FORCING myself ($host) to primary...\n";
} else { # try to switch myself to primary
lprint "trying to switch $host to primary...\n";
check_sync_finished($res, $host);
check_todo($res, "connect", 1, 1);
_primary_res($res, "(none)", $pri, $old) unless $old eq "(none)";
check_primary_gone($res);
check_splitbrain($res, $host, -1);
}
_primary_res($res, $host, $pri, $old);
}
sub invalidate_res {
my ($cmd, $res) = @_;
check_not_primary(@_);
my $old_replay = readlink("$mars/resource-$res/replay-$host") or ldie "cannot read my own replay status symlink\n";
$old_replay =~ s/^([^,]+),.*/$1/;
my $repl = "$mars/resource-$res/todo-$host/allow-replay";
my $was_on = readlink($repl);
if ($was_on) {
_switch("pause-replay-local", $res, $repl, 0);
_trigger();
lprint "waiting...\n";
sleep(15);
}
my $dst = "$mars/resource-$res/syncstatus-$host";
system("rm -f $dst");
symlink("0", $dst) or ldie "cannot create invalidation symlink '$dst'\n";
my $primary = readlink("$mars/resource-$res/primary") or ldie "cannot determine primary\n";
my $replay = readlink("$mars/resource-$res/replay-$primary") or ldie "cannot read replay status of primary '$primary'\n";
$replay =~ s/,[0-9]+,[0-9]+$/,0,0/;
_set_replaylink("$mars/resource-$res", $replay, $primary);
if ($was_on) {
_trigger();
lprint "waiting...\n";
sleep(15);
_switch("resume-replay-local", $res, $repl, 1);
}
}
sub resize_res {
my ($cmd, $res) = @_;
check_primary(@_);
my @actsizes = glob("$mars/resource-$res/actsize-*");
ldie "resource $res has no actsize-* symlinks\n" unless @actsizes;
my $lnk = "$mars/resource-$res/size";
my $old_size = readlink($lnk) or ldie "cannot read symlink '$lnk'\n";
my $min_size = 0;
foreach my $actsize (@actsizes) {
my $this_size = readlink($actsize) or ldie "cannot read symlink '$actsize'\n";
if (!$min_size || $this_size < $min_size) {
$min_size = $this_size;
}
}
lprint "old_size=$old_size\n";
lprint "new_size=$min_size\n";
ldie "only increases of the size are possible\n" if $min_size <= $old_size && !$force;
foreach my $switch (glob("$mars/resource-$res/todo-*/sync")) {
my $this_switch = readlink($switch);
ldie "cannot read symlink '$switch'\n" unless defined($this_switch);
ldie "sync on '$switch' is switched on -- use marsadm pause-sync to stop\n" unless !$this_switch;
}
my @syncsizes = glob("$mars/resource-$res/syncstatus-$host");
foreach my $syncsize (@syncsizes) {
my $this_size = readlink($syncsize) or ldie "cannot read symlink '$syncsize'\n";
ldie "sync on $syncsize has not yet finished: $this_size != $old_size (DANGEROUS FIX: if you know what you are doing, marsadm fake-sync can 'fix' it -- but this may need a full-sync afterwards)\n" unless $this_size == $old_size;
}
foreach my $syncsize (@syncsizes) {
my $this_size = readlink($syncsize) or ldie "cannot read symlink '$syncsize'\n";
unlink("$syncsize.new");
symlink($min_size, "$syncsize.new") or ldie "cannot create size symlink '$syncsize.new'\n";
rename("$syncsize.new", $syncsize) or ldie "cannot create size symlink '$syncsize'\n";;
}
unlink("$lnk.new");
symlink($min_size, "$lnk.new") or ldie "cannot create size symlink '$lnk.new'\n";
rename("$lnk.new", $lnk) or ldie "cannot create size symlink '$lnk'\n";;
}
sub role_cmd {
my ($cmd, $res) = @_;
my $pri = "$mars/resource-$res/primary";
my $old = readlink($pri) or ldie "cannot determine current primary\n";
if($old eq $host) {
lprint "primary\n";
} else {
lprint "secondary\n";
}
}
sub helplist {
my $temp;
$temp = shift;
lprint "ERROR: $temp" if ($temp);
lprint "
marsadm <command> [<resource>] [<option>]
<command> =
cluster : create-cluster join-cluster leave-cluster
resource : create-resource join-resource leave-resource
primary secondary invalidate
up down
replication: connect disconnect connect-local disconnect-local
replay : pause-replay resume-replay pause-replay-local resume-replay-local
sync : pause-sync resume-sync pause-sync-local resume-sync-local fake-sync
logfile : log-rotate log-delete log-delete-all
device : attach detach
internal : help version
<resource> = name of resource or \"all\" for all resources
<option> = special by command
Advanced information are also available here: http://http://wiki.intranet.1and1.com/bin/view/PO/ProjektTEC1603
\n";
exit 0;
}
sub version {
lprint "$0 $Id\n";
#lprint "my IP is $ip\n";
exit 0;
}
##################################################################
my %cmd_table =
(
# new keywords
"create-cluster" => \&create_cluster,
"join-cluster" => \&join_cluster,
"leave-cluster" => \&leave_cluster,
"create-resource" => \&create_res,
"join-resource" => \&create_res,
"leave-resource" => \&leave_res,
"log-rotate" => \&logrotate_res,
"log-delete" => \&logdelete_res,
"log-delete-all" => \&logdelete_res,
"fake-sync" => \&fake_local_res,
# compatible keywords
"attach" => \&attach_res,
"detach" => \&attach_res,
"connect" => \&connect_res,
"disconnect" => \&connect_res,
"connect-local" => \&connect_local_res,
"disconnect-local" => \&connect_local_res,
"syncer" => \&ignore_cmd,
"up" => \&up_res,
"down" => \&up_res,
"primary" => \&primary_res,
"secondary" => \&primary_res,
"invalidate" => \&invalidate_res,
"invalidate-remote" => \&forbidden_cmd,
"resize" => \&resize_res,
"create-md" => \&senseless_cmd,
"get-gi" => \&ignore_cmd,
"show-gi" => \&ignore_cmd,
"dump-md" => \&senseless_cmd,
"outdate" => \&ignore_cmd,
"adjust" => \&ignore_cmd,
"wait-connect" => \&nyi_cmd,
"role" => \&role_cmd,
"state" => \&role_cmd,
"cstate" => \&nyi_cmd,
"status" => \&nyi_cmd,
"dump" => \&senseless_cmd,
"verify" => \&nyi_cmd,
"pause-sync" => \&pause_sync_res,
"resume-sync" => \&pause_sync_res,
"pause-sync-local" => \&pause_sync_local_res,
"resume-sync-local" => \&pause_sync_local_res,
"pause-replay" => \&pause_replay_res,
"resume-replay" => \&pause_replay_res,
"pause-replay-local" => \&pause_replay_local_res,
"resume-replay-local" => \&pause_replay_local_res,
"new-current-uuid" => \&senseless_cmd,
"dstate" => \&nyi_cmd,
"hidden-commands" => \&ignore_cmd,
);
my @args;
foreach my $arg (@ARGV) {
if($arg eq "--force") {
$force++;
next;
}
if($arg =~ s/^force-//) {
$force++;
}
push @args, $arg;
}
my $cmd = shift @args || helplist "command argument is missing\n";
$notify = "(cmd: $cmd)" unless $cmd eq "version";
if($cmd =~ m/^help$/ || $cmd =~ m/^h$/) {
helplist;
}
if($cmd =~ m/^version$/ || $cmd =~ m/^v$/) {
version;
}
ldie "only root may use this tool\n" if $< != 0; # getpid() seems to be missing in perlfunc
helplist "unknown command $cmd\n" if !exists $cmd_table{$cmd};
my $res = "";
unless($cmd =~ m/^(create|leave)-cluster$/) {
$res = shift @args || helplist "resource argument is missing\n";
check_id($res);
}
lprint "using FORCE option -- hopefully you know what you do!\n" if $force;
sub do_res {
my $cmd = shift;
my $res = shift;
$res = check_res($res) unless $cmd =~ m/^(join|create|leave)-cluster|create-resource$/;
check_res_member($res) unless $cmd =~ m/^(join|create)-(cluster|resource)|leave-cluster$/;
my $func = $cmd_table{$cmd};
&{$func}($cmd, $res, @_);
}
if($res eq "all") {
foreach $res (glob("$mars/resource-*")) {
next unless -e "$res/data-$host";
$res =~ s/^.*\/resource-(.*)$/$1/;
lprint "--------- resource $res\n";
do_res($cmd, $res, @args);
}
} else {
do_res($cmd, $res, @args);
}
_trigger();