marsadm: use Lamport time for symlink creation

This commit is contained in:
Thomas Schoebel-Theuer 2013-04-28 22:52:58 +02:00 committed by Thomas Schoebel-Theuer
parent db8e4caacf
commit b7c1900820
1 changed files with 123 additions and 78 deletions

View File

@ -41,6 +41,10 @@ sub lwarn {
# low-level infrastructure # low-level infrastructure
my @link_list = ();
my %link_hash;
my $verbose = 0;
sub get_link { sub get_link {
my ($path, $unchecked) = @_; my ($path, $unchecked) = @_;
my $result = readlink($path); my $result = readlink($path);
@ -51,6 +55,60 @@ sub get_link {
return $result; return $result;
} }
sub to_tmp {
my $path = shift;
$path =~ s:^(.*)/:$1/.tmp.:;
return $path;
}
sub from_tmp {
my $path = shift;
$path =~ s:^(.*)/\.tmp\.:$1/:;
return $path;
}
sub set_link {
my ($src, $dst) = @_;
my $dst_tmp = to_tmp($dst);
unlink($dst_tmp);
symlink($src, $dst_tmp) or ldie "cannot create symlink '$dst' -> '$src'\n";
# the _order_ is important! remove existing intermediate element before re-appanding
if (exists($link_hash{$dst})) {
my @copy = @link_list;
@link_list = ();
foreach my $elem (@copy) {
next if $elem eq $dst;
push @link_list, $elem;
}
}
$link_hash{$dst} = $src;
push @link_list, $dst;
}
sub finish_links {
return unless @link_list;
my $timestamp = time();
undef $/;
my $lamport = "";
if (open(my $fh, "<", "/proc/sys/mars/lamport_clock")) {
$lamport = <$fh>;
}
if ($lamport =~ m/lamport_now=([0-9.]+)/) {
$timestamp = $1;
lprint "using lamport timestamp $timestamp\n" if $verbose;
}
while (my $link = shift @link_list) {
my $link_tmp = to_tmp($link);
utime($timestamp, $timestamp, $link_tmp);
rename($link_tmp, $link) or ldie "cannot finalize symlink '$link'\n";
if ($verbose) {
my $target = readlink($link);
lprint "created symlink '$link' -> '$target'\n";
}
}
_trigger();
}
################################################################## ##################################################################
# global variables and checks # global variables and checks
@ -350,7 +408,7 @@ sub check_splitbrain {
if ($sequence < 0) { if ($sequence < 0) {
my $old = _get_actual_primary($res) || "(none)"; my $old = _get_actual_primary($res) || "(none)";
_primary_res($res, "(none)", $old) unless $old eq "(none)"; _primary_res($res, "(none)", $old) unless $old eq "(none)";
_trigger(); finish_links();
sleep(5); sleep(5);
while (!_check_files_modified_any_of("$mars/resource-$res/{log,version,replay}-*", 60)) { while (!_check_files_modified_any_of("$mars/resource-$res/{log,version,replay}-*", 60)) {
lprint "resource directory $res not stable, waiting....\n"; lprint "resource directory $res not stable, waiting....\n";
@ -478,11 +536,7 @@ sub _switch {
lprint "${cmd} on resource $res is already activated\n" if $cmd; lprint "${cmd} on resource $res is already activated\n" if $cmd;
return; return;
} }
set_link($src, $path);
my $tmp = $path;
$tmp =~ s/\/([^\/]+)$/.tmp.$1/;
symlink($src, $tmp) or ldie "cannot create switch symlink\n";
rename($tmp, $path) or ldie "cannot rename switch symlink\n";
lprint "successfully started ${cmd} on resource $res\n" if $cmd; lprint "successfully started ${cmd} on resource $res\n" if $cmd;
} }
@ -576,9 +630,7 @@ sub _fake_versionlink {
} }
if ($pri_link) { if ($pri_link) {
lprint "creating new version symlink '$new_version' -> '$pri_link'\n"; lprint "creating new version symlink '$new_version' -> '$pri_link'\n";
system("rm -f $new_version.tmp"); set_link($pri_link, $new_version);
symlink($pri_link, "$new_version.tmp") or ldie "cannot create faked version symlink '$new_version'\n";
system("mv $new_version.tmp $new_version");
} else { } else {
lwarn "cannot read symlink '$pri_version' -- cannot create faked versionlink '$pri_version'\n"; lwarn "cannot read symlink '$pri_version' -- cannot create faked versionlink '$pri_version'\n";
} }
@ -590,9 +642,7 @@ sub _set_replaylink {
my $rep_path = "$basedir/replay-$host"; my $rep_path = "$basedir/replay-$host";
my $rep_val = sprintf("log-%09d-$primary,0,0", $log_nr); my $rep_val = sprintf("log-%09d-$primary,0,0", $log_nr);
lprint "creating new replaylink '$rep_path' -> '$rep_val'\n"; lprint "creating new replaylink '$rep_path' -> '$rep_val'\n";
system("rm -f $rep_path.tmp"); set_link($rep_val, $rep_path);
symlink($rep_val, "$rep_path.tmp") or ldie "cannot create symlink '$rep_path'\n";
system("mv $rep_path.tmp $rep_path");
if ($log_nr > 1) { if ($log_nr > 1) {
my $old_primary = ""; my $old_primary = "";
@ -644,8 +694,8 @@ sub _create_cluster {
system("mkdir $mars/defaults") unless -d "$mars/defaults"; system("mkdir $mars/defaults") unless -d "$mars/defaults";
system("mkdir $mars/defaults-$host") unless -d "$mars/defaults-$host"; system("mkdir $mars/defaults-$host") unless -d "$mars/defaults-$host";
system("mkdir $mars/todo-global") unless -d "$mars/todo-global"; system("mkdir $mars/todo-global") unless -d "$mars/todo-global";
symlink($ip, "$mars/ips/ip-$host"); set_link($ip, "$mars/ips/ip-$host");
symlink("1", "$mars/todo-global/deleted-$host"); set_link("1", "$mars/todo-global/deleted-$host");
} }
sub create_cluster { sub create_cluster {
@ -666,7 +716,7 @@ sub join_cluster {
system("ssh $peer uname -a") == 0 or ldie "oops, no connection to $peer ...\n"; system("ssh $peer uname -a") == 0 or ldie "oops, no connection to $peer ...\n";
_create_cluster(@_); _create_cluster(@_);
system("rsync --recursive --links -v $peer:$mars/ips/ $mars/ips/") == 0 or ldie "oops\n"; system("rsync --recursive --links -v $peer:$mars/ips/ $mars/ips/") == 0 or ldie "oops\n";
symlink($ip, "$mars/ips/ip-$host"); finish_links();
system("rsync --recursive --links -v $mars/ips/ $peer:$mars/ips/") == 0 or ldie "oops\n"; system("rsync --recursive --links -v $mars/ips/ $peer:$mars/ips/") == 0 or ldie "oops\n";
} }
@ -707,16 +757,15 @@ sub create_res {
ldie "implausible size $size" unless $size > 0; ldie "implausible size $size" unless $size > 0;
} }
my $tmp = "$mars/.tmp.$res"; my $tmp = "$mars/resource-$res";
my $primary; my $primary;
my $replay_nr = -1; my $replay_nr = -1;
if ($create) { if ($create) {
_create_cluster(@_); _create_cluster(@_);
system("rm -rf $tmp"); mkdir($tmp);
system("mkdir $tmp") == 0 or ldie "could not create resource '$res'\n"; ldie "could not create resource '$res'\n" unless -d $tmp;
symlink($size, "$tmp/size") or ldie "cannot create size indicator symlink\n"; set_link($size, "$tmp/size");
} else { } else {
$tmp = "$mars/resource-$res";
ldie "resource '$res' does not exist\n" unless -d $tmp; ldie "resource '$res' does not exist\n" unless -d $tmp;
$primary = _get_designated_primary($res); $primary = _get_designated_primary($res);
if ($primary eq "(none)") { if ($primary eq "(none)") {
@ -752,13 +801,12 @@ sub create_res {
close OUT; close OUT;
} else { } else {
lprint "using existing device '$dev'\n"; lprint "using existing device '$dev'\n";
symlink($dev, $file) or ldie "cannot create device symlink\n"; set_link($dev, $file);
} }
if ($appear) { if ($appear) {
# TODO: check for uniqeness of $appear # TODO: check for uniqeness of $appear
lprint "resource '$res' will appear as local device '/dev/mars/$appear'\n"; lprint "resource '$res' will appear as local device '/dev/mars/$appear'\n";
system("rm -f $tmp/device-$host"); set_link($appear, "$tmp/device-$host");
symlink($appear, "$tmp/device-$host") or ldie "cannot create symlink for local device appearance\n";
} }
mkdir("$tmp/userspace") unless -d "$tmp/userspace"; mkdir("$tmp/userspace") unless -d "$tmp/userspace";
@ -768,25 +816,25 @@ sub create_res {
mkdir("$tmp/actual-$host"); mkdir("$tmp/actual-$host");
my $todo = "$tmp/todo-$host"; my $todo = "$tmp/todo-$host";
mkdir($todo); mkdir($todo);
symlink("1", "$todo/attach"); set_link("1", "$todo/attach");
symlink("1", "$todo/connect"); set_link("1", "$todo/connect");
symlink("1", "$todo/sync"); set_link("1", "$todo/sync");
symlink("1", "$todo/allow-replay"); set_link("1", "$todo/allow-replay");
system("rm -f $tmp/syncstatus-$host"); unlink("$tmp/syncstatus-$host");
if ($create) { if ($create) {
symlink($host, "$tmp/primary") or ldie "cannot create primary symlink\n"; set_link($host, "$tmp/primary");
symlink($size, "$tmp/syncstatus-$host") or ldie "cannot create primary syncstatus\n"; set_link($size, "$tmp/syncstatus-$host");
symlink("log-000000001-$host,0,0", "$tmp/replay-$host") or ldie "cannot create replay status\n"; set_link("log-000000001-$host,0,0", "$tmp/replay-$host");
system("touch $tmp/log-000000001-$host"); system("touch $tmp/log-000000001-$host");
rename($tmp, "$mars/resource-$res") or ldie "cannot finalize resource '$res'\n"; finish_links();
lprint "successfully created resource '$res'\n"; lprint "successfully created resource '$res'\n";
} else { } else {
_set_replaylink($tmp, $replay_nr, $primary); _set_replaylink($tmp, $replay_nr, $primary);
symlink("0", "$tmp/syncstatus-$host") or ldie "cannot start initial sync\n"; set_link("0", "$tmp/syncstatus-$host");
system("rm -f $tmp/connect-$host"); set_link($primary, "$tmp/connect-$host");
symlink($primary, "$tmp/connect-$host") or ldie "cannot create peer connect symlink\n"; set_link($host, "$tmp/connect-$primary") unless -l "$tmp/connect-$primary";
symlink($host, "$tmp/connect-$primary") unless -l "$tmp/connect-$primary"; finish_links();
lprint "successfully joined resource '$res'\n"; lprint "successfully joined resource '$res'\n";
} }
} }
@ -809,9 +857,7 @@ sub leave_res {
my $target = get_link($tmp); my $target = get_link($tmp);
next unless $target eq $host; next unless $target eq $host;
lprint "changing '$tmp' from '$host' to '$peer'\n"; lprint "changing '$tmp' from '$host' to '$peer'\n";
unlink("$tmp.new"); set_link($peer, $tmp);
symlink($peer, "$tmp.new") or ldie "cannot create symlink '$tmp.new'\n";
rename("$tmp.new", $tmp) or ldie "cannot create symlink '$tmp'\n";
} }
unlink($peerlink); unlink($peerlink);
} }
@ -856,27 +902,30 @@ sub _get_deletable_logfiles {
return ($min, $max); return ($min, $max);
} }
my $delete_nr = -1;
sub _create_delete { sub _create_delete {
my ($target) = @_; my ($target) = @_;
my $nr = 0; if ($delete_nr < 0) { # compute only upon first call
my @paths = glob("$mars/todo-global/delete-*"); my @paths = glob("$mars/todo-global/delete-*");
foreach my $path (@paths) { foreach my $path (@paths) {
$path =~ m/-([0-9]+)/; $path =~ m/-([0-9]+)/;
if (defined($1) && $1 > $nr) { if (defined($1) && $1 > $delete_nr) {
$nr = $1; $delete_nr = $1;
}
}
my @paths2 = glob("$mars/todo-global/deleted-*");
foreach my $path (@paths2) {
my $link = get_link($path, 1);
$link =~ m/([0-9]+)/;
if (defined($1) && $1 > $delete_nr) {
$delete_nr = $1;
}
} }
} }
my @paths2 = glob("$mars/todo-global/deleted-*"); my $new = sprintf("$mars/todo-global/delete-%09d", ++$delete_nr);
foreach my $path (@paths2) {
my $link = get_link($path, 1);
$link =~ m/([0-9]+)/;
if (defined($1) && $1 > $nr) {
$nr = $1;
}
}
my $new = sprintf("$mars/todo-global/delete-%09d", $nr + 1);
lprint "create symlink $new -> $target\n"; lprint "create symlink $new -> $target\n";
symlink($target, $new); set_link($target, $new);
} }
sub logdelete_res { sub logdelete_res {
@ -1002,7 +1051,7 @@ sub set_replay_res {
ldie "you would need --force if you really know what you are doing.\n" unless $force; ldie "you would need --force if you really know what you are doing.\n" unless $force;
} }
_set_replaylink("$mars/resource-$res", $new_nr, ""); _set_replaylink("$mars/resource-$res", $new_nr, "");
symlink("$new_nr", "$mars/resource-$res/skip-check-$host"); set_link("$new_nr", "$mars/resource-$res/skip-check-$host");
} }
sub fake_local_res { sub fake_local_res {
@ -1012,17 +1061,13 @@ sub fake_local_res {
#check_status($res, "copy-syncstatus-$host", 0); #check_status($res, "copy-syncstatus-$host", 0);
my $size = get_link("$mars/resource-$res/size"); my $size = get_link("$mars/resource-$res/size");
my $target = "$mars/resource-$res/syncstatus-$host"; my $target = "$mars/resource-$res/syncstatus-$host";
symlink($size, "$target.tmp") or ldie "cannot create faked syncstatus\n"; set_link($size, $target);
rename("$target.tmp", $target) or ldie "cannot reaname symlink\n";
} }
sub _primary_res { sub _primary_res {
my ($res, $new, $old) = @_; my ($res, $new, $old) = @_;
my $tmp = "$mars/resource-$res/.tmp.primary";
my $pri = "$mars/resource-$res/primary"; my $pri = "$mars/resource-$res/primary";
system("rm -f $tmp"); set_link($new, $pri);
symlink($new, $tmp) or ldie "cannot create new primary symlink\n";
rename($tmp, $pri) or ldie "cannot install new primary symlink\n";
lprint "designated primary changed from '$old' to '$new'\n"; lprint "designated primary changed from '$old' to '$new'\n";
} }
@ -1053,12 +1098,12 @@ sub primary_res {
check_sync_finished($res, $new); check_sync_finished($res, $new);
check_todo($cmd, $res, "connect", 1, 0); check_todo($cmd, $res, "connect", 1, 0);
_primary_res($res, "(none)", $old) unless $old eq "(none)"; _primary_res($res, "(none)", $old) unless $old eq "(none)";
_trigger(); finish_links();
check_primary_gone($res); check_primary_gone($res);
check_splitbrain($res, $new, -1); check_splitbrain($res, $new, -1);
} }
_primary_res($res, $new, $old); _primary_res($res, $new, $old);
_trigger(); finish_links();
check_primary_settled($res); check_primary_settled($res);
lprint "resource '$res': designated primary successfully changed from $old to $new\n"; lprint "resource '$res': designated primary successfully changed from $old to $new\n";
} }
@ -1072,20 +1117,19 @@ sub invalidate_res {
my $was_on = get_link($repl); my $was_on = get_link($repl);
if ($was_on) { if ($was_on) {
_switch("pause-replay-local", $res, $repl, 0); _switch("pause-replay-local", $res, $repl, 0);
_trigger(); finish_links();
lprint "waiting...\n"; lprint "waiting...\n";
sleep(15); sleep(15);
} }
my $dst = "$mars/resource-$res/syncstatus-$host"; my $dst = "$mars/resource-$res/syncstatus-$host";
system("rm -f $dst"); set_link("0", $dst);
symlink("0", $dst) or ldie "cannot create invalidation symlink '$dst'\n";
my $primary = _get_designated_primary($res); my $primary = _get_designated_primary($res);
my $replay = get_link("$mars/resource-$res/replay-$primary"); my $replay = get_link("$mars/resource-$res/replay-$primary");
$replay =~ m/^log-([0-9]+)-/ or ldie "replay link '$replay' is not parsable\n"; $replay =~ m/^log-([0-9]+)-/ or ldie "replay link '$replay' is not parsable\n";
my $replay_nr = $1; my $replay_nr = $1;
_set_replaylink("$mars/resource-$res", $replay_nr, $primary); _set_replaylink("$mars/resource-$res", $replay_nr, $primary);
if ($was_on) { if ($was_on) {
_trigger(); finish_links();
lprint "waiting...\n"; lprint "waiting...\n";
sleep(15); sleep(15);
_switch("resume-replay-local", $res, $repl, 1); _switch("resume-replay-local", $res, $repl, 1);
@ -1129,15 +1173,13 @@ sub resize_res {
my $this_size = get_link($syncsize); my $this_size = get_link($syncsize);
ldie "sync on $syncsize has not yet finished: $this_size != $old_size (DANGEROUS FIX: if you know what you are doing, marsadm fake-sync can 'fix' it -- but this may need a full-sync afterwards)\n" unless $this_size == $old_size; ldie "sync on $syncsize has not yet finished: $this_size != $old_size (DANGEROUS FIX: if you know what you are doing, marsadm fake-sync can 'fix' it -- but this may need a full-sync afterwards)\n" unless $this_size == $old_size;
} }
foreach my $syncsize (@syncsizes) { if (0) {
my $this_size = get_link($syncsize); foreach my $syncsize (@syncsizes) {
unlink("$syncsize.new"); my $this_size = get_link($syncsize);
symlink($new_size, "$syncsize.new") or ldie "cannot create size symlink '$syncsize.new'\n"; set_link($new_size, $syncsize);
rename("$syncsize.new", $syncsize) or ldie "cannot create size symlink '$syncsize'\n";; }
} }
unlink("$lnk.new"); set_link($new_size, $lnk);
symlink($new_size, "$lnk.new") or ldie "cannot create size symlink '$lnk.new'\n";
rename("$lnk.new", $lnk) or ldie "cannot create size symlink '$lnk'\n";;
} }
sub role_cmd { sub role_cmd {
@ -1322,9 +1364,12 @@ my %cmd_table =
my @args; my @args;
foreach my $arg (@ARGV) { foreach my $arg (@ARGV) {
if ($arg eq "--force") { if ($arg eq "--force" || $arg eq "-f") {
$force++; $force++;
next; next;
} elsif ($arg eq "--verbose" || $arg eq "-v") {
$verbose++;
next;
} elsif ($arg =~ s/--timeout\s*=\s*([0-9]+)/$1/) { } elsif ($arg =~ s/--timeout\s*=\s*([0-9]+)/$1/) {
$timeout = $arg; $timeout = $arg;
next; next;
@ -1384,4 +1429,4 @@ if ($res eq "all" && $cmd ne "show") {
do_res($cmd, $res, @args); do_res($cmd, $res, @args);
} }
_trigger(); finish_links();