mirror of https://github.com/schoebel/mars
marsadm: fix external races on resize
This commit is contained in:
parent
8888571b2c
commit
1f2680dd62
|
@ -6045,6 +6045,88 @@ reference "sec:Scripting-HOWTO"
|
|||
.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Subsection
|
||||
Online Resizing during Operation
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
You should have LVM or some other means of increasing the physical size
|
||||
of your disk (e.g.
|
||||
via firmware of some RAID controllers).
|
||||
The network must be healthy.
|
||||
Do the following steps:
|
||||
\end_layout
|
||||
|
||||
\begin_layout Enumerate
|
||||
Increase your local disks (usually
|
||||
\family typewriter
|
||||
/dev/vg/mydata
|
||||
\family default
|
||||
)
|
||||
\emph on
|
||||
everywhere
|
||||
\emph default
|
||||
in the whole cluster.
|
||||
In order to avoid wasting space, increase them
|
||||
\emph on
|
||||
uniformly
|
||||
\emph default
|
||||
to the same size (when possible).
|
||||
The
|
||||
\family typewriter
|
||||
lvresize
|
||||
\family default
|
||||
tool is documented elsewhere.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Enumerate
|
||||
Check that all MARS switches are on.
|
||||
If not, say
|
||||
\family typewriter
|
||||
marsadm up mydata
|
||||
\family default
|
||||
everywhere.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Enumerate
|
||||
At the primary:
|
||||
\family typewriter
|
||||
marsadm resize mydata
|
||||
\end_layout
|
||||
|
||||
\begin_layout Enumerate
|
||||
If you have intermediate layers such as iSCSI, you may need some
|
||||
\family typewriter
|
||||
iscsiadm
|
||||
\family default
|
||||
update or other command.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Enumerate
|
||||
Now you may increase your filesystem.
|
||||
This is specific for the filesystem type and documented elsewhere.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
\noindent
|
||||
\begin_inset Graphics
|
||||
filename images/lightbulb_brightlit_benj_.png
|
||||
lyxscale 12
|
||||
scale 7
|
||||
|
||||
\end_inset
|
||||
|
||||
Hint: the secondaries will start syncing the increased new part of the underlyin
|
||||
g primary disk.
|
||||
In many cases, this is not really needed, because the new junk data just
|
||||
does not care.
|
||||
If you are sure and if you know what you are doing, you may use
|
||||
\family typewriter
|
||||
marsadm fake-sync mydata
|
||||
\family default
|
||||
to abort such unnecessary traffic.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Section
|
||||
The State of MARS
|
||||
\begin_inset CommandInset label
|
||||
|
@ -23328,28 +23410,37 @@ status open
|
|||
\begin_layout Plain Layout
|
||||
|
||||
\size scriptsize
|
||||
Precondition: all disks in the cluster participating in
|
||||
Precondition: The local host must be primary.
|
||||
All disks in the cluster participating in
|
||||
\family typewriter
|
||||
$res
|
||||
\family default
|
||||
must be physically larger than the logical resource size (e.g.
|
||||
by use of
|
||||
must be physically larger than the logical resource size (e.g, by use of
|
||||
|
||||
\family typewriter
|
||||
lvm
|
||||
\family default
|
||||
; can be checked by macros
|
||||
\family typewriter
|
||||
%disk-size{}
|
||||
\family default
|
||||
and
|
||||
\family typewriter
|
||||
%resource-size{}
|
||||
\family default
|
||||
).
|
||||
When the optional
|
||||
\family typewriter
|
||||
$size
|
||||
\family default
|
||||
argument is present, it must be smaller than the minimum of all physical
|
||||
sizes, but larger than the current logical size.
|
||||
sizes, but larger than the current logical size of the resource.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
\size scriptsize
|
||||
Postcondition: at the (future) primary (if any), the logical size of
|
||||
Postcondition: the logical size of
|
||||
\family typewriter
|
||||
/dev/mars/$dev_name
|
||||
\family default
|
||||
|
|
|
@ -422,6 +422,13 @@ sub check_res {
|
|||
return $res;
|
||||
}
|
||||
|
||||
sub _get_mars_size {
|
||||
my ($cmd, $res) = @_;
|
||||
my $dev_name = get_link("$mars/resource-$res/device-$host");
|
||||
my $info = "/sys/devices/virtual/block/mars!$dev_name/size";
|
||||
return `cat $info` * 512;
|
||||
}
|
||||
|
||||
sub check_sizes {
|
||||
my ($res, $peer) = @_;
|
||||
my $logical_size = get_link("$mars/resource-$res/size");
|
||||
|
@ -2192,8 +2199,13 @@ sub invalidate_res_phase3 {
|
|||
}
|
||||
}
|
||||
|
||||
sub resize_res {
|
||||
my %resize_device_size;
|
||||
my %resize_old_size;
|
||||
my %resize_new_size;
|
||||
|
||||
sub resize_phase0 {
|
||||
my ($cmd, $res, $size_arg) = @_;
|
||||
ldie "mars kernel module is not loaded. This is needed for communication with some other hosts!\n" if !is_module_loaded();
|
||||
my $new_size = 0;
|
||||
if ($size_arg) {
|
||||
$new_size = get_size($size_arg);
|
||||
|
@ -2201,39 +2213,70 @@ sub resize_res {
|
|||
lprint "new size: $new_size bytes\n";
|
||||
}
|
||||
check_primary(@_);
|
||||
my $my_size = get_link("$mars/resource-$res/actsize-$host");
|
||||
my @actsizes = glob("$mars/resource-$res/actsize-*");
|
||||
ldie "resource $res has no actsize-* symlinks\n" unless @actsizes;
|
||||
my $lnk = "$mars/resource-$res/size";
|
||||
my $old_size = get_link($lnk);
|
||||
my $min_size = 0;
|
||||
my $possible_size = 0;
|
||||
foreach my $actsize (@actsizes) {
|
||||
my $this_size = get_link($actsize);
|
||||
if (!$min_size || $this_size < $min_size) {
|
||||
$min_size = $this_size;
|
||||
if (!$possible_size || $this_size < $possible_size) {
|
||||
$possible_size = $this_size;
|
||||
}
|
||||
}
|
||||
lprint "old_size=$old_size\n";
|
||||
lprint "min_size=$min_size\n";
|
||||
$new_size = $min_size if !$new_size;
|
||||
lprint "possible_size=$possible_size\n";
|
||||
$new_size = $possible_size if !$new_size;
|
||||
lprint "new_size=$new_size\n";
|
||||
ldie "new size $new_size is higher than the minimum size of all volumes $min_size" if $new_size > $min_size; # no override with --force possible
|
||||
# for now, disallow decreasing until some bugs are fixed
|
||||
ldie "new size $new_size is higher than the possible size (minimum of all volumes) $possible_size" if $new_size > $possible_size; # no override with --force possible
|
||||
# disallow decreasing
|
||||
ldie "only increases of the size are possible!\n" if $new_size < $old_size;
|
||||
ldie "only increases of the size are possible without --force\n" if $new_size <= $old_size && !$force;
|
||||
foreach my $switch (glob("$mars/resource-$res/todo-*/sync")) {
|
||||
my $this_switch = get_link($switch);
|
||||
ldie "sync on '$switch' is switched on -- use marsadm pause-sync to stop\n" unless !$this_switch;
|
||||
}
|
||||
my @syncsizes = glob("$mars/resource-$res/syncstatus-$host");
|
||||
foreach my $syncsize (@syncsizes) {
|
||||
my $this_size = get_link($syncsize);
|
||||
ldie "sync on $syncsize has not yet finished: $this_size != $old_size (DANGEROUS FIX: if you know what you are doing, marsadm fake-sync can 'fix' it -- but this may need a full-sync afterwards)\n" unless $this_size == $old_size;
|
||||
}
|
||||
foreach my $syncsize (@syncsizes) {
|
||||
my $this_size = get_link($syncsize);
|
||||
set_link($new_size, $syncsize);
|
||||
}
|
||||
my $waste = $my_size - $new_size;
|
||||
lwarn "You are wasting $waste bytes locally\n" if $my_size > $new_size;
|
||||
# remember values
|
||||
$resize_device_size{$res} = _get_mars_size(@_);
|
||||
$resize_old_size{$res} = $old_size;
|
||||
lwarn "internal mismatch between actual device size and resource size: $resize_device_size{$res} != $resize_old_size{$res}\n" unless $resize_device_size{$res} == $resize_old_size{$res};
|
||||
$resize_new_size{$res} = $new_size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub resize_phase1 {
|
||||
my ($cmd, $res) = @_;
|
||||
my $old_size = $resize_old_size{$res} or ldie "bad internal size value\n";
|
||||
my $new_size = $resize_new_size{$res} or ldie "bad internal size value\n";
|
||||
# for safety, check again
|
||||
ldie "only increases of the size are possible!\n" if $new_size < $old_size;
|
||||
check_primary(@_);
|
||||
# Mark the primary data / its size as authoritative
|
||||
my $act_lnk = "$mars/resource-$res/syncstatus-$host";
|
||||
set_link($new_size, $act_lnk);
|
||||
finish_links(); # Chance for errors to pop up
|
||||
# Now set the new resource size
|
||||
my $lnk = "$mars/resource-$res/size";
|
||||
set_link($new_size, $lnk);
|
||||
finish_links();
|
||||
}
|
||||
|
||||
sub resize_phase2 {
|
||||
my ($cmd, $res) = @_;
|
||||
my $old_size = $resize_old_size{$res} or ldie "bad internal size value\n";
|
||||
my $new_size = $resize_new_size{$res} or ldie "bad internal size value\n";
|
||||
for (;;) {
|
||||
my $new_device_size = _get_mars_size(@_);
|
||||
if ($new_device_size == $resize_new_size{$res}) {
|
||||
lprint "Device size is now $new_device_size.\n";
|
||||
last;
|
||||
}
|
||||
lprint "Device size $new_device_size has not yet reached the new size $resize_new_size{$res}.\n";
|
||||
if ($new_device_size != $resize_device_size{$res}) {
|
||||
lwarn "The size has changed, but did not reach the correct value.";
|
||||
lwarn "Assuming some rounding problems (which may occur at some device types)\n";
|
||||
last;
|
||||
}
|
||||
sleep_timeout();
|
||||
}
|
||||
}
|
||||
|
||||
sub role_cmd {
|
||||
|
@ -4561,7 +4604,12 @@ my %cmd_table =
|
|||
"When successful, /dev/mars/\$res at the primary will be increased",
|
||||
"in size. In addition, all secondaries will start an incremental",
|
||||
"fast full-sync to get the enlarged parts from the primary.",
|
||||
\&resize_res,
|
||||
\&resize_phase0,
|
||||
"check preconditions",
|
||||
\&resize_phase1,
|
||||
"set new size",
|
||||
\&resize_phase2,
|
||||
"wait for change",
|
||||
],
|
||||
"check-resize" => \&ignore_cmd,
|
||||
"create-md" => \&senseless_cmd,
|
||||
|
|
Loading…
Reference in New Issue