marsadm: split command "primary" into phases

This commit is contained in:
Thomas Schoebel-Theuer 2013-05-04 21:54:12 +02:00 committed by Thomas Schoebel-Theuer
parent dab60da817
commit 2dd3033ff4
4 changed files with 87 additions and 125 deletions

View File

@ -813,8 +813,8 @@ static int if_switch(struct if_brick *brick)
input->q = NULL;
}
#endif
if (atomic_read(&input->open_count) > 0) {
MARS_INF("device '%s' is open %d times, cannot shutdown\n", disk->disk_name, atomic_read(&input->open_count));
if (atomic_read(&brick->open_count) > 0) {
MARS_INF("device '%s' is open %d times, cannot shutdown\n", disk->disk_name, atomic_read(&brick->open_count));
status = -EBUSY;
goto done; // don't indicate "off" status
}
@ -861,14 +861,14 @@ static int if_open(struct block_device *bdev, fmode_t mode)
down(&brick->switch_sem);
if (unlikely(!brick->power.led_on)) {
MARS_INF("----------------------- BUSY %d ------------------------------\n", atomic_read(&input->open_count));
MARS_INF("----------------------- BUSY %d ------------------------------\n", atomic_read(&brick->open_count));
up(&brick->switch_sem);
return -EBUSY;
}
atomic_inc(&input->open_count);
atomic_inc(&brick->open_count);
MARS_INF("----------------------- OPEN %d ------------------------------\n", atomic_read(&input->open_count));
MARS_INF("----------------------- OPEN %d ------------------------------\n", atomic_read(&brick->open_count));
up(&brick->switch_sem);
return 0;
@ -877,14 +877,12 @@ static int if_open(struct block_device *bdev, fmode_t mode)
static int if_release(struct gendisk *gd, fmode_t mode)
{
struct if_input *input = gd->private_data;
struct if_brick *brick = input->brick;
int nr;
MARS_INF("----------------------- CLOSE %d ------------------------------\n", atomic_read(&input->open_count));
if (atomic_dec_and_test(&input->open_count)) {
struct if_brick *brick;
brick = input->brick;
MARS_INF("----------------------- CLOSE %d ------------------------------\n", atomic_read(&brick->open_count));
if (atomic_dec_and_test(&brick->open_count)) {
while ((nr = atomic_read(&input->flying_count)) > 0) {
MARS_INF("%d IO requests not yet completed\n", nr);
brick_msleep(1000);
@ -998,6 +996,7 @@ MARS_MAKE_STATICS(if);
static int if_brick_construct(struct if_brick *brick)
{
sema_init(&brick->switch_sem, 1);
atomic_set(&brick->open_count, 0);
return 0;
}
@ -1022,7 +1021,6 @@ static int if_input_construct(struct if_input *input)
INIT_LIST_HEAD(&input->plug_anchor);
sema_init(&input->kick_sem, 1);
spin_lock_init(&input->req_lock);
atomic_set(&input->open_count, 0);
atomic_set(&input->flying_count, 0);
atomic_set(&input->read_flying_count, 0);
atomic_set(&input->write_flying_count, 0);

View File

@ -47,7 +47,6 @@ struct if_input {
struct timer_list timer;
#endif
unsigned long capacity;
atomic_t open_count;
atomic_t plugged_count;
atomic_t flying_count;
// only for statistics
@ -78,6 +77,7 @@ struct if_brick {
int readahead;
bool skip_sync;
// inspectable
atomic_t open_count;
// private
struct semaphore switch_sem;
struct say_channel *say_channel;

View File

@ -3124,7 +3124,7 @@ int make_dev(void *buf, struct mars_dent *dent)
}
switch_on =
(rot->if_brick && atomic_read(&rot->if_brick->inputs[0]->open_count) > 0) ||
(rot->if_brick && atomic_read(&rot->if_brick->open_count) > 0) ||
(rot->todo_primary &&
!rot->trans_brick->replay_mode &&
rot->trans_brick->power.led_on);
@ -3157,6 +3157,7 @@ int make_dev(void *buf, struct mars_dent *dent)
}
dev_brick->show_status = _show_brick_status;
_dev_brick = (void*)dev_brick;
__show_actual(rot->parent_path, "open-count", atomic_read(&_dev_brick->open_count));
#if 0
if (_dev_brick->has_closed) {
_dev_brick->has_closed = false;

View File

@ -267,22 +267,12 @@ sub check_primary_gone {
for (;;) {
my $pri = _get_actual_primary($res);
last if !$pri;
last if $pri eq $host;
lprint "waiting for other primary host ($pri) to disappear....\n";
sleep_timeout();
}
}
sub check_primary_settled {
my ($res) = @_;
my $target_primary = _get_designated_primary($res);
for (;;) {
my $actual_primary = _get_actual_primary($res) || '(none)';
last if ($target_primary eq $actual_primary);
lprint "waiting for primary $target_primary to be settled\n";
sleep_timeout(2);
}
}
sub check_todo {
my ($cmd, $res, $key, $val, $wait, $unchecked, $inv) = @_;
my $path = "$mars/resource-$res/todo-$host/$key";
@ -405,74 +395,8 @@ sub get_minmax_replays {
return _get_minmax($res, "$mars/resource-$res/replay-*", 1);
}
sub check_splitbrain {
# check only the chain of $host (or all hosts if unset)
# check up to $sequence (or for all if < 0)
my ($res, $host, $sequence) = @_;
if ($sequence < 0) {
my $old = _get_actual_primary($res) || "(none)";
_primary_res($res, "(none)", $old) unless $old eq "(none)";
finish_links();
sleep(5);
while (!_check_files_modified_any_of("$mars/resource-$res/{log,version,replay}-*", 60)) {
lprint "resource directory $res not stable, waiting....\n";
sleep_timeout();
}
while (1) {
my ($min_log, $max_log) = get_minmax_logfiles($res);
my ($min_ver, $max_ver) = get_minmax_versions($res);
my ($min_rep, $max_rep) = get_minmax_replays($res);
if ($min_ver > $min_log || $max_ver < $max_log) {
lprint "some version links are missing...\n";
sleep_timeout();
next;
}
if ($max_log >= $max_rep) {
lprint "resource $res: logfile $max_log is present.\n";
last;
}
lprint "resource $res: logfile $max_log is not yet transferred (need $max_rep), waiting....\n";
sleep_timeout();
}
} # $sequence < 0
my $glob = "$mars/resource-$res/version-[0-9]*";
my @links = glob($glob);
if (!@links) {
@links = glob("$mars/resource-$res/version-[0-9]*-*");
ldie "no version information available\n" unless @links;
lprint "assuming that I am primary for the first time\n";
return;
}
@links = sort(@links);
foreach my $link (@links) {
my $nr = $link;
$nr =~ s:^.*[a-z]+-([0-9]+)(-[^/]*)?$:$1:;
$nr = int($nr);
next if ($sequence >= 0 && $nr > $sequence);
my $fromhost = $link;
$fromhost =~ s:^.*version-[0-9]*-(.*)$:$1:;
my $version = get_link($link);
my $otherhost = $version;
$otherhost =~ s/^[^,]*?,(?:log-[0-9]*?-|)([^,]*?),.*$/$1/;
my $otherlink = sprintf("$mars/resource-$res/version-%09d-$otherhost", $nr);
my $otherversion = get_link($otherlink);
# ignore foreign mismatches
if ($host) {
next if $fromhost ne $host;
}
# by defintion, the originator of a logfile is always "right"
next if $otherhost eq $fromhost;
# final check
ldie "splitbrain at sequence $nr detected\n" unless $version eq $otherversion;
}
sub try_to_avoid_splitbrain {
# NYI
}
sub get_size {
@ -1112,41 +1036,71 @@ sub _primary_res {
lprint "designated primary changed from '$old' to '$new'\n";
}
sub primary_res {
# check whether primary/secondary switching is possible at all
sub primary_phase0 {
my ($cmd, $res) = @_;
my $sec = ($cmd eq "secondary");
# we _must_ take the designated primary here, because the actual primary is a _runtime_ condition
ldie "cannot switch primary: mars kernel module is not loaded\n" unless ($cmd eq "secondary" || -d "/proc/sys/mars");
if ($cmd eq "primary" and !$force) {
check_sync_finished($res, $host);
check_todo($cmd, $res, "attach", 1, 0);
check_todo($cmd, $res, "connect", 1, 0);
check_todo($cmd, $res, "allow-replay", 1, 0);
#check_status($cmd, $res, "replay_rate", 0, 0, 1);
}
my $old = _get_designated_primary($res);
return if ($old eq $host and $cmd eq "primary");
return if $old eq "(none)";
my $device_in_use = get_link("$mars/resource-$res/actual-$old/open-count", 1);
if ($device_in_use) {
my $name = get_link("$mars/resource-$res/device-$old", 1) || "unknown";
lwarn "device '/dev/mars/$name' for resource '$res' is $device_in_use times in use on primary host '$old'\n";
ldie "first you must umount/close the device (on host '$old')\n" unless $force;
}
lprint "all preconditions OK for resource '$res'\n";
}
# when necessary, switch to secondary (intermediately)
sub primary_phase1 {
my ($cmd, $res) = @_;
return if ($force and $cmd eq "primary");
my $old = _get_designated_primary($res);
return if ($old eq $host and $cmd eq "primary");
my $new = "(none)";
return if $old eq $new;
_primary_res($res, $new, $old);
}
# when necessary, wait
sub primary_phase2 {
my ($cmd, $res) = @_;
return if $force;
return unless $cmd eq "primary";
check_primary_gone($res);
try_to_avoid_splitbrain(@_);
}
# when necessary, switch to primary
sub primary_phase3 {
my ($cmd, $res) = @_;
return unless $cmd eq "primary";
my $old = _get_designated_primary($res);
my $new = $host;
if ($sec) {
if ($old eq '(none)') {
lprint "resource '$res' is already designated as secondary everywhere\n";
return;
}
if (($old ne $host) && !$force && (_get_actual_primary($res) ne $host)) {
ldie "for safety reasons, switching to secondary is only allowed when I ($host) am designated primary or actually primary for resource '$res'\n";
}
$new = "(none)";
} elsif ($old eq $new) {
lprint "I am already designated primary on resource '$res'.\n";
return;
} elsif ($force) {
lprint "FORCING myself ($host) to be the designated primary...\n";
} elsif (! -d "/proc/sys/mars") {
ldie "cannot switch to primary: mars kernel module is not loaded\n";
} else { # try to switch myself to primary
lprint "trying to switch $new to primary...\n";
check_sync_finished($res, $new);
check_todo($cmd, $res, "connect", 1, 0);
_primary_res($res, "(none)", $old) unless $old eq "(none)";
finish_links();
check_primary_gone($res);
check_splitbrain($res, $new, -1);
}
return if $old eq $new;
_primary_res($res, $new, $old);
finish_links();
check_primary_settled($res);
lprint "resource '$res': designated primary successfully changed from $old to $new\n";
}
# wait for device to appear
sub primary_phase4 {
my ($cmd, $res) = @_;
return unless $cmd eq "primary";
return if $force;
my $name = get_link("$mars/resource-$res/device-$host");
my $dev = "/dev/mars/$name";
while (! -e $dev) {
lprint "device '$dev' not yet present\n";
sleep_timeout(1);
}
lprint "device '$dev' is present\n" if -b $dev;
}
sub invalidate_res {
@ -1375,8 +1329,17 @@ my %cmd_table =
"syncer" => \&ignore_cmd,
"up" => \&up_res,
"down" => \&up_res,
"primary" => \&primary_res,
"secondary" => \&primary_res,
"primary" => [
"check preconditions", \&primary_phase0,
"leave primary state", \&primary_phase1,
"wait when necessary", \&primary_phase2,
"switch to primary", \&primary_phase3,
"wait for device", \&primary_phase4,
],
"secondary" => [
"check preconditions", \&primary_phase0,
"leave primary state", \&primary_phase1,
],
"invalidate" => \&invalidate_res,
"invalidate-remote" => \&forbidden_cmd,
"resize" => \&resize_res,