mirror of https://github.com/schoebel/mars
marsadm: split command "primary" into phases
This commit is contained in:
parent
dab60da817
commit
2dd3033ff4
|
@ -813,8 +813,8 @@ static int if_switch(struct if_brick *brick)
|
|||
input->q = NULL;
|
||||
}
|
||||
#endif
|
||||
if (atomic_read(&input->open_count) > 0) {
|
||||
MARS_INF("device '%s' is open %d times, cannot shutdown\n", disk->disk_name, atomic_read(&input->open_count));
|
||||
if (atomic_read(&brick->open_count) > 0) {
|
||||
MARS_INF("device '%s' is open %d times, cannot shutdown\n", disk->disk_name, atomic_read(&brick->open_count));
|
||||
status = -EBUSY;
|
||||
goto done; // don't indicate "off" status
|
||||
}
|
||||
|
@ -861,14 +861,14 @@ static int if_open(struct block_device *bdev, fmode_t mode)
|
|||
down(&brick->switch_sem);
|
||||
|
||||
if (unlikely(!brick->power.led_on)) {
|
||||
MARS_INF("----------------------- BUSY %d ------------------------------\n", atomic_read(&input->open_count));
|
||||
MARS_INF("----------------------- BUSY %d ------------------------------\n", atomic_read(&brick->open_count));
|
||||
up(&brick->switch_sem);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
atomic_inc(&input->open_count);
|
||||
atomic_inc(&brick->open_count);
|
||||
|
||||
MARS_INF("----------------------- OPEN %d ------------------------------\n", atomic_read(&input->open_count));
|
||||
MARS_INF("----------------------- OPEN %d ------------------------------\n", atomic_read(&brick->open_count));
|
||||
|
||||
up(&brick->switch_sem);
|
||||
return 0;
|
||||
|
@ -877,14 +877,12 @@ static int if_open(struct block_device *bdev, fmode_t mode)
|
|||
static int if_release(struct gendisk *gd, fmode_t mode)
|
||||
{
|
||||
struct if_input *input = gd->private_data;
|
||||
struct if_brick *brick = input->brick;
|
||||
int nr;
|
||||
|
||||
MARS_INF("----------------------- CLOSE %d ------------------------------\n", atomic_read(&input->open_count));
|
||||
|
||||
if (atomic_dec_and_test(&input->open_count)) {
|
||||
struct if_brick *brick;
|
||||
brick = input->brick;
|
||||
MARS_INF("----------------------- CLOSE %d ------------------------------\n", atomic_read(&brick->open_count));
|
||||
|
||||
if (atomic_dec_and_test(&brick->open_count)) {
|
||||
while ((nr = atomic_read(&input->flying_count)) > 0) {
|
||||
MARS_INF("%d IO requests not yet completed\n", nr);
|
||||
brick_msleep(1000);
|
||||
|
@ -998,6 +996,7 @@ MARS_MAKE_STATICS(if);
|
|||
static int if_brick_construct(struct if_brick *brick)
|
||||
{
|
||||
sema_init(&brick->switch_sem, 1);
|
||||
atomic_set(&brick->open_count, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1022,7 +1021,6 @@ static int if_input_construct(struct if_input *input)
|
|||
INIT_LIST_HEAD(&input->plug_anchor);
|
||||
sema_init(&input->kick_sem, 1);
|
||||
spin_lock_init(&input->req_lock);
|
||||
atomic_set(&input->open_count, 0);
|
||||
atomic_set(&input->flying_count, 0);
|
||||
atomic_set(&input->read_flying_count, 0);
|
||||
atomic_set(&input->write_flying_count, 0);
|
||||
|
|
|
@ -47,7 +47,6 @@ struct if_input {
|
|||
struct timer_list timer;
|
||||
#endif
|
||||
unsigned long capacity;
|
||||
atomic_t open_count;
|
||||
atomic_t plugged_count;
|
||||
atomic_t flying_count;
|
||||
// only for statistics
|
||||
|
@ -78,6 +77,7 @@ struct if_brick {
|
|||
int readahead;
|
||||
bool skip_sync;
|
||||
// inspectable
|
||||
atomic_t open_count;
|
||||
// private
|
||||
struct semaphore switch_sem;
|
||||
struct say_channel *say_channel;
|
||||
|
|
|
@ -3124,7 +3124,7 @@ int make_dev(void *buf, struct mars_dent *dent)
|
|||
}
|
||||
|
||||
switch_on =
|
||||
(rot->if_brick && atomic_read(&rot->if_brick->inputs[0]->open_count) > 0) ||
|
||||
(rot->if_brick && atomic_read(&rot->if_brick->open_count) > 0) ||
|
||||
(rot->todo_primary &&
|
||||
!rot->trans_brick->replay_mode &&
|
||||
rot->trans_brick->power.led_on);
|
||||
|
@ -3157,6 +3157,7 @@ int make_dev(void *buf, struct mars_dent *dent)
|
|||
}
|
||||
dev_brick->show_status = _show_brick_status;
|
||||
_dev_brick = (void*)dev_brick;
|
||||
__show_actual(rot->parent_path, "open-count", atomic_read(&_dev_brick->open_count));
|
||||
#if 0
|
||||
if (_dev_brick->has_closed) {
|
||||
_dev_brick->has_closed = false;
|
||||
|
|
|
@ -267,22 +267,12 @@ sub check_primary_gone {
|
|||
for (;;) {
|
||||
my $pri = _get_actual_primary($res);
|
||||
last if !$pri;
|
||||
last if $pri eq $host;
|
||||
lprint "waiting for other primary host ($pri) to disappear....\n";
|
||||
sleep_timeout();
|
||||
}
|
||||
}
|
||||
|
||||
sub check_primary_settled {
|
||||
my ($res) = @_;
|
||||
my $target_primary = _get_designated_primary($res);
|
||||
for (;;) {
|
||||
my $actual_primary = _get_actual_primary($res) || '(none)';
|
||||
last if ($target_primary eq $actual_primary);
|
||||
lprint "waiting for primary $target_primary to be settled\n";
|
||||
sleep_timeout(2);
|
||||
}
|
||||
}
|
||||
|
||||
sub check_todo {
|
||||
my ($cmd, $res, $key, $val, $wait, $unchecked, $inv) = @_;
|
||||
my $path = "$mars/resource-$res/todo-$host/$key";
|
||||
|
@ -405,74 +395,8 @@ sub get_minmax_replays {
|
|||
return _get_minmax($res, "$mars/resource-$res/replay-*", 1);
|
||||
}
|
||||
|
||||
sub check_splitbrain {
|
||||
# check only the chain of $host (or all hosts if unset)
|
||||
# check up to $sequence (or for all if < 0)
|
||||
my ($res, $host, $sequence) = @_;
|
||||
if ($sequence < 0) {
|
||||
my $old = _get_actual_primary($res) || "(none)";
|
||||
_primary_res($res, "(none)", $old) unless $old eq "(none)";
|
||||
finish_links();
|
||||
sleep(5);
|
||||
while (!_check_files_modified_any_of("$mars/resource-$res/{log,version,replay}-*", 60)) {
|
||||
lprint "resource directory $res not stable, waiting....\n";
|
||||
sleep_timeout();
|
||||
}
|
||||
while (1) {
|
||||
my ($min_log, $max_log) = get_minmax_logfiles($res);
|
||||
my ($min_ver, $max_ver) = get_minmax_versions($res);
|
||||
my ($min_rep, $max_rep) = get_minmax_replays($res);
|
||||
if ($min_ver > $min_log || $max_ver < $max_log) {
|
||||
lprint "some version links are missing...\n";
|
||||
sleep_timeout();
|
||||
next;
|
||||
}
|
||||
if ($max_log >= $max_rep) {
|
||||
lprint "resource $res: logfile $max_log is present.\n";
|
||||
last;
|
||||
}
|
||||
lprint "resource $res: logfile $max_log is not yet transferred (need $max_rep), waiting....\n";
|
||||
sleep_timeout();
|
||||
}
|
||||
} # $sequence < 0
|
||||
|
||||
my $glob = "$mars/resource-$res/version-[0-9]*";
|
||||
my @links = glob($glob);
|
||||
if (!@links) {
|
||||
@links = glob("$mars/resource-$res/version-[0-9]*-*");
|
||||
ldie "no version information available\n" unless @links;
|
||||
lprint "assuming that I am primary for the first time\n";
|
||||
return;
|
||||
}
|
||||
|
||||
@links = sort(@links);
|
||||
foreach my $link (@links) {
|
||||
my $nr = $link;
|
||||
$nr =~ s:^.*[a-z]+-([0-9]+)(-[^/]*)?$:$1:;
|
||||
$nr = int($nr);
|
||||
|
||||
next if ($sequence >= 0 && $nr > $sequence);
|
||||
|
||||
my $fromhost = $link;
|
||||
$fromhost =~ s:^.*version-[0-9]*-(.*)$:$1:;
|
||||
|
||||
my $version = get_link($link);
|
||||
my $otherhost = $version;
|
||||
$otherhost =~ s/^[^,]*?,(?:log-[0-9]*?-|)([^,]*?),.*$/$1/;
|
||||
my $otherlink = sprintf("$mars/resource-$res/version-%09d-$otherhost", $nr);
|
||||
my $otherversion = get_link($otherlink);
|
||||
|
||||
# ignore foreign mismatches
|
||||
if ($host) {
|
||||
next if $fromhost ne $host;
|
||||
}
|
||||
|
||||
# by defintion, the originator of a logfile is always "right"
|
||||
next if $otherhost eq $fromhost;
|
||||
|
||||
# final check
|
||||
ldie "splitbrain at sequence $nr detected\n" unless $version eq $otherversion;
|
||||
}
|
||||
sub try_to_avoid_splitbrain {
|
||||
# NYI
|
||||
}
|
||||
|
||||
sub get_size {
|
||||
|
@ -1112,41 +1036,71 @@ sub _primary_res {
|
|||
lprint "designated primary changed from '$old' to '$new'\n";
|
||||
}
|
||||
|
||||
sub primary_res {
|
||||
# check whether primary/secondary switching is possible at all
|
||||
sub primary_phase0 {
|
||||
my ($cmd, $res) = @_;
|
||||
my $sec = ($cmd eq "secondary");
|
||||
# we _must_ take the designated primary here, because the actual primary is a _runtime_ condition
|
||||
ldie "cannot switch primary: mars kernel module is not loaded\n" unless ($cmd eq "secondary" || -d "/proc/sys/mars");
|
||||
if ($cmd eq "primary" and !$force) {
|
||||
check_sync_finished($res, $host);
|
||||
check_todo($cmd, $res, "attach", 1, 0);
|
||||
check_todo($cmd, $res, "connect", 1, 0);
|
||||
check_todo($cmd, $res, "allow-replay", 1, 0);
|
||||
#check_status($cmd, $res, "replay_rate", 0, 0, 1);
|
||||
}
|
||||
my $old = _get_designated_primary($res);
|
||||
return if ($old eq $host and $cmd eq "primary");
|
||||
return if $old eq "(none)";
|
||||
my $device_in_use = get_link("$mars/resource-$res/actual-$old/open-count", 1);
|
||||
if ($device_in_use) {
|
||||
my $name = get_link("$mars/resource-$res/device-$old", 1) || "unknown";
|
||||
lwarn "device '/dev/mars/$name' for resource '$res' is $device_in_use times in use on primary host '$old'\n";
|
||||
ldie "first you must umount/close the device (on host '$old')\n" unless $force;
|
||||
}
|
||||
lprint "all preconditions OK for resource '$res'\n";
|
||||
}
|
||||
|
||||
# when necessary, switch to secondary (intermediately)
|
||||
sub primary_phase1 {
|
||||
my ($cmd, $res) = @_;
|
||||
return if ($force and $cmd eq "primary");
|
||||
my $old = _get_designated_primary($res);
|
||||
return if ($old eq $host and $cmd eq "primary");
|
||||
my $new = "(none)";
|
||||
return if $old eq $new;
|
||||
_primary_res($res, $new, $old);
|
||||
}
|
||||
|
||||
# when necessary, wait
|
||||
sub primary_phase2 {
|
||||
my ($cmd, $res) = @_;
|
||||
return if $force;
|
||||
return unless $cmd eq "primary";
|
||||
check_primary_gone($res);
|
||||
try_to_avoid_splitbrain(@_);
|
||||
}
|
||||
|
||||
# when necessary, switch to primary
|
||||
sub primary_phase3 {
|
||||
my ($cmd, $res) = @_;
|
||||
return unless $cmd eq "primary";
|
||||
my $old = _get_designated_primary($res);
|
||||
my $new = $host;
|
||||
if ($sec) {
|
||||
if ($old eq '(none)') {
|
||||
lprint "resource '$res' is already designated as secondary everywhere\n";
|
||||
return;
|
||||
}
|
||||
if (($old ne $host) && !$force && (_get_actual_primary($res) ne $host)) {
|
||||
ldie "for safety reasons, switching to secondary is only allowed when I ($host) am designated primary or actually primary for resource '$res'\n";
|
||||
}
|
||||
$new = "(none)";
|
||||
} elsif ($old eq $new) {
|
||||
lprint "I am already designated primary on resource '$res'.\n";
|
||||
return;
|
||||
} elsif ($force) {
|
||||
lprint "FORCING myself ($host) to be the designated primary...\n";
|
||||
} elsif (! -d "/proc/sys/mars") {
|
||||
ldie "cannot switch to primary: mars kernel module is not loaded\n";
|
||||
} else { # try to switch myself to primary
|
||||
lprint "trying to switch $new to primary...\n";
|
||||
check_sync_finished($res, $new);
|
||||
check_todo($cmd, $res, "connect", 1, 0);
|
||||
_primary_res($res, "(none)", $old) unless $old eq "(none)";
|
||||
finish_links();
|
||||
check_primary_gone($res);
|
||||
check_splitbrain($res, $new, -1);
|
||||
}
|
||||
return if $old eq $new;
|
||||
_primary_res($res, $new, $old);
|
||||
finish_links();
|
||||
check_primary_settled($res);
|
||||
lprint "resource '$res': designated primary successfully changed from $old to $new\n";
|
||||
}
|
||||
|
||||
# wait for device to appear
|
||||
sub primary_phase4 {
|
||||
my ($cmd, $res) = @_;
|
||||
return unless $cmd eq "primary";
|
||||
return if $force;
|
||||
my $name = get_link("$mars/resource-$res/device-$host");
|
||||
my $dev = "/dev/mars/$name";
|
||||
while (! -e $dev) {
|
||||
lprint "device '$dev' not yet present\n";
|
||||
sleep_timeout(1);
|
||||
}
|
||||
lprint "device '$dev' is present\n" if -b $dev;
|
||||
}
|
||||
|
||||
sub invalidate_res {
|
||||
|
@ -1375,8 +1329,17 @@ my %cmd_table =
|
|||
"syncer" => \&ignore_cmd,
|
||||
"up" => \&up_res,
|
||||
"down" => \&up_res,
|
||||
"primary" => \&primary_res,
|
||||
"secondary" => \&primary_res,
|
||||
"primary" => [
|
||||
"check preconditions", \&primary_phase0,
|
||||
"leave primary state", \&primary_phase1,
|
||||
"wait when necessary", \&primary_phase2,
|
||||
"switch to primary", \&primary_phase3,
|
||||
"wait for device", \&primary_phase4,
|
||||
],
|
||||
"secondary" => [
|
||||
"check preconditions", \&primary_phase0,
|
||||
"leave primary state", \&primary_phase1,
|
||||
],
|
||||
"invalidate" => \&invalidate_res,
|
||||
"invalidate-remote" => \&forbidden_cmd,
|
||||
"resize" => \&resize_res,
|
||||
|
|
Loading…
Reference in New Issue