#!/usr/bin/perl -w # (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG use strict; use English; use warnings; my $mars = "/mars"; my $host = `uname -n` or die "cannot determine my network node name\n"; chomp $host; my $ip = `ip a` or die "cannot determine my IP address\n"; $ip =~ s/\A.*inet +(?!127\.0\.)([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+).*\Z/$1/ms or die "cannot parse my IP address\n"; umask 0177; die "only root may use this tool\n" unless `whoami` eq "root\n"; # getpid() seems to be missing in perlfunc ################################################################## # syntactic checks sub check_id { my $str = shift; die "identifier '$str' has disallowed characters" unless $str =~ m/^[A-Za-z_][A-Za-z0-9_]*$/; die "identifier '$str' is too long (only 16 chars allowed)" if length($str) > 16; } ################################################################## # semantic checks sub check_res { my $res = shift; die "resource '$res' does not exist\n" unless -d "$mars/resource-$res"; } sub check_res_member { my $res = shift; die "sorry, I have not yet joined to resource '$res'\n" unless -e "$mars/resource-$res/data-$host"; } sub get_size { my $arg = shift; return -1 unless $arg =~ m/^[0-9.]+[kmgtp]?$/i; my $mod = $arg; $arg =~ s/[^0-9]+$//; $mod =~ s/^[0-9]+//; $_ = $mod; SWITCH: { /k/i and $arg *= 1024, last SWITCH; /m/i and $arg *= 1024 * 1024, last SWITCH; /g/i and $arg *= 1024 * 1024 * 1024, last SWITCH; /t/i and $arg *= 1024 * 1024 * 1024 * 1024, last SWITCH; /p/i and $arg *= 1024 * 1024 * 1024 * 1024 * 1024, last SWITCH; } return $arg; } ################################################################## # helpers sub _trigger { system("echo 1 > /proc/sys/mars 2>/dev/null"); } sub _switch { my ($path, $on) = @_; # Sadly, there is no system call lchmod(). # It would be better to use the x flag even of symlinks if it were possible. # As a workaround, we use the owner uid - fortunately this is # changeable, even by perl (although direct calls to lchown() # seems to be missing) if($on) { system("chown --no-dereference 0 $path"); } else { system("chown --no-dereference nobody $path"); } my $oldmode = (lstat $path)[2] & 0700; my $newmode = $on ? $oldmode | 0100 : $oldmode & ~0100; print "chmod '$path' $oldmode $newmode"; chmod($newmode, $path) == 1 or die "cannot chmod '$path'\n"; } sub _writable { my ($path, $on) = @_; my $oldmode = (lstat $path)[2] & 0700; my $newmode = $on ? $oldmode | 0200 : $oldmode & ~0200; print "chmod '$path' $oldmode $newmode"; chmod($newmode, $path) == 1 or die "cannot chmod '$path'\n"; } ################################################################## # commands sub ignore_cmd { my ($cmd, $res) = @_; print "ignoring command '$cmd' on resource '$res'\n"; exit(0); } sub senseless_cmd { my ($cmd, $res) = @_; print "command '$cmd' makes no sense with MARS (ignoring)\n"; exit(0); } sub forbidden_cmd { my ($cmd, $res) = @_; die "command '$cmd' cannot be used with MARS (it is impossible to carry out uniquely and could therefore lead to a disaster)\n"; } sub nyi_cmd { my ($cmd, $res) = @_; die "command '$cmd' is not yet implemented\n"; } sub join_system { my ($cmd, $peer, $force) = @_; if(glob("$mars/resource-*")) { die "Sorry, some resources already exist!\nThis is dangerous!\nIf you are sure that no resource clash is possible, re-invoke this command with '--force' option\n" unless ($force and $force =~ m/--force/); } print "joining system via rsync (peer='$peer')\n"; system("mkdir $mars") unless -d $mars; system("mkdir $mars/ips") unless -d "$mars/ips"; system("rsync --recursive --links -v $peer:$mars/ips/ $mars/ips/") == 0 or die "oops\n"; symlink($ip, "$mars/ips/ip-$host"); system("rsync --recursive --links -v $mars/ips/ $peer:$mars/ips/") == 0 or die "oops\n"; } sub create_res { my ($cmd, $res, $dev, $appear) = @_; my $create = ($cmd eq "create-resource"); die "undefined device or size argument\n" unless $dev; check_id($appear) if $appear; if($create) { die "resource '$res' already exists\n" if -d "$mars/resource-$res"; print "creating new resource '$res'\n"; } else { print "joining to existing resource '$res'\n"; } my $size = get_size($dev); if($size > 0) { $dev = ""; } else { die "block device '$dev' does not exist\n" unless -b $dev; die "block device '$dev' must be an absolute path starting with '/'\n" unless $dev =~ m/^\//; use Fcntl 'SEEK_END'; open(TEST, "<$dev") or die "cannot open device for reading\n"; $size = sysseek(TEST, 0, SEEK_END); close(TEST); print "device size = $size bytes\n"; die "implausible size" unless $size > 0; } my $tmp = "$mars/.tmp.$res"; my $primary; if($create) { system("mkdir $mars") unless -d $mars; system("mkdir $mars/ips") unless -d "$mars/ips"; symlink($ip, "$mars/ips/ip-$host"); system("rm -rf $tmp"); system("mkdir $tmp") == 0 or die "could not create resource '$res'\n"; symlink($size, "$tmp/size") or die "cannot create size indicator symlink\n"; } else { $tmp = "$mars/resource-$res"; die "resource '$res' does not exist\n" unless -d $tmp; $primary = readlink("$tmp/primary") or die "cannot determine primary\n"; die "resource '$res' is already joined\n" if -e "$tmp/data-$host"; die "my ip '$ip' is not registered -- please run 'join-system' first\n" unless -l "$mars/ips/ip-$host"; my $oldsize = readlink("$tmp/size") or die "cannot determine old size\n"; if($size < $oldsize) { print "adjusting size to $oldsize\n"; $size = $oldsize; } die "sizes differ: real size = $oldsize, but requested size = $size\n" unless $oldsize == $size; } my $file = "$tmp/data-$host"; if(!$dev) { print "creating sparse file '$file' with size $size\n"; open(OUT, ">$file") or die "could not open '$file'\n"; use Fcntl 'SEEK_SET'; sysseek(OUT, $size-1, SEEK_SET) == $size-1 or die "could not seek\n"; syswrite(OUT, '\0', 1) == 1 or die "cannot init sparse file\n"; close OUT; } else { print "using existing device '$dev'\n"; symlink($dev, $file) or die "cannot create device symlink\n"; } if($appear) { # TODO: check for uniqeness of $appear print "resource '$res' will appear as local device '/dev/mars/$appear'\n"; system("rm -f $tmp/device-$host"); symlink($appear, "$tmp/device-$host") or die "cannot create symlink for local device appearance\n"; } if($create) { symlink($host, "$tmp/primary") or die "cannot create primary symlink\n"; symlink("log-000000001-$host,0", "$tmp/replay-$host") or die "cannot create replay status\n"; rename($tmp, "$mars/resource-$res") or die "cannot finalize resource '$res'\n"; print "successfully created resource '$res'\n"; } else { my $replay = readlink("$tmp/replay-$primary") or die "cannot read replay status of primary '$primary'\n"; $replay =~ s/-$primary,/-$host,/ or die "something is wrong here\n"; system("rm -f $tmp/replay-$host"); symlink($replay, "$tmp/replay-$host") or die "cannot create replay status\n"; system("rm -f $tmp/syncstatus-$host"); symlink("0", "$tmp/syncstatus-$host") or die "cannot start initial sync\n"; system("rm -f $tmp/connect-$host"); symlink($primary, "$tmp/connect-$host") or die "cannot create peer symlink\n"; symlink($host, "$tmp/connect-$primary") unless ( -l "$tmp/connect-$primary" or -l "$tmp/off.connect-$primary" ); print "successfully joined resource '$res'\n"; } } sub attach_res { my ($cmd, $res) = @_; my $detach = ($cmd eq "detach"); my $path = "$mars/resource-$res/data-$host"; _switch($path, !$detach); } sub connect_res { my ($cmd, $res) = @_; my $disconnect = ($cmd eq "disconnect"); my $path = "$mars/resource-$res/connect-$host"; _switch($path, !$disconnect); } sub pause_res { my ($cmd, $res) = @_; my $pause = ($cmd eq "pause-sync"); my $path = "$mars/resource-$res/syncstatus-$host"; _switch($path, !$pause); } sub up_res { my ($cmd, $res) = @_; my $down = ($cmd eq "down"); if($down) { pause_res("pause-sync", $res); connect_res("disconnect", $res); attach_res("detach", $res); } else { attach_res("attach", $res); connect_res("connect", $res); pause_res("resume-sync", $res); } } sub primary_res { my ($cmd, $res) = @_; my $sec = ($cmd eq "secondary"); my $pri = "$mars/resource-$res/primary"; my $old = readlink($pri) or die "cannot determine current primary\n"; if($sec) { die "for safety reasons, switching to secondary is only allowed when I am primary\n" if($old ne $host); $host = "(none)"; } elsif($old eq $host) { print "I am already primary.\n"; exit(0); } # TODO: check whether we can switch without interrupting service.... my $tmp = "$mars/resource-$res/.tmp.primary"; system("rm -f $tmp"); symlink($host, $tmp) or die "cannot create new primary symlink\n"; rename($tmp, $pri) or die "cannot install new primary symlink\n"; print "primary changed from '$old' to '$host'\n"; } sub invalidate_res { my ($cmd, $res) = @_; my $pri = "$mars/resource-$res/primary"; my $old = readlink($pri) or die "cannot determine current primary\n"; my $dst = "$mars/resource-$res/syncstatus-$host"; die "primary side cannot be invalidated\n" if $old eq $host; system("rm -f $dst"); symlink("0", $dst) or die "cannot create invalidation symlink '$dst'\n"; } sub role_cmd { my ($cmd, $res) = @_; my $pri = "$mars/resource-$res/primary"; my $old = readlink($pri) or die "cannot determine current primary\n"; if($old eq $host) { print "primary\n"; } else { print "secondary\n"; } } ################################################################## my %cmd_table = ( # new keywords "join-system" => \&join_system, "create-resource" => \&create_res, "join-resource" => \&create_res, # compatible keywords "attach" => \&attach_res, "detach" => \&attach_res, "connect" => \&connect_res, "disconnect" => \&connect_res, "syncer" => \&ignore_cmd, "up" => \&up_res, "down" => \&up_res, "primary" => \&primary_res, "secondary" => \&primary_res, "invalidate" => \&invalidate_res, "invalidate-remote" => \&forbidden_cmd, "resize" => \&nyi_cmd, "create-md" => \&senseless_cmd, "get-gi" => \&ignore_cmd, "show-gi" => \&ignore_cmd, "dump-md" => \&senseless_cmd, "outdate" => \&ignore_cmd, "adjust" => \&ignore_cmd, "wait-connect" => \&nyi_cmd, "role" => \&role_cmd, "state" => \&role_cmd, "cstate" => \&nyi_cmd, "status" => \&nyi_cmd, "dump" => \&senseless_cmd, "verify" => \&nyi_cmd, "pause-sync" => \&pause_sync, "resume-sync" => \&pause_sync, "new-current-uuid" => \&senseless_cmd, "dstate" => \&nyi_cmd, "hidden-commands" => \&ignore_cmd, ); my $cmd = shift || die "command argument is missing\n"; my $res = shift || die "resource argument is missing\n"; die "unknown command '$cmd'\n" if !exists $cmd_table{$cmd}; check_id($res); sub do_res { my $cmd = shift; my $res = shift; check_res($res) unless $cmd =~ m/^(join-system|create-resource)$/; check_res_member($res) unless $cmd =~ m/^(join|create)-(system|resource)$/; my $func = $cmd_table{$cmd}; &{$func}($cmd, $res, @_); } if($res eq "all") { foreach $res (glob("$mars/resource-*")) { next unless -e "$res/data-$host"; $res =~ s/^.*\/resource-(.*)$/$1/; print "--------- resource $res\n"; do_res($cmd, $res, @ARGV); } } else { do_res($cmd, $res, @ARGV); } _trigger();