diff --git a/docu/mars-manual.lyx b/docu/mars-manual.lyx index 2754ebae..8dbdb279 100644 --- a/docu/mars-manual.lyx +++ b/docu/mars-manual.lyx @@ -6045,6 +6045,88 @@ reference "sec:Scripting-HOWTO" . \end_layout +\begin_layout Subsection +Online Resizing during Operation +\end_layout + +\begin_layout Standard +You should have LVM or some other means of increasing the physical size + of your disk (e.g. + via firmware of some RAID controllers). + The network must be healthy. + Do the following steps: +\end_layout + +\begin_layout Enumerate +Increase your local disks (usually +\family typewriter +/dev/vg/mydata +\family default +) +\emph on +everywhere +\emph default + in the whole cluster. + In order to avoid wasting space, increase them +\emph on +uniformly +\emph default + to the same size (when possible). + The +\family typewriter +lvresize +\family default + tool is documented elsewhere. +\end_layout + +\begin_layout Enumerate +Check that all MARS switches are on. + If not, say +\family typewriter +marsadm up mydata +\family default + everywhere. +\end_layout + +\begin_layout Enumerate +At the primary: +\family typewriter +marsadm resize mydata +\end_layout + +\begin_layout Enumerate +If you have intermediate layers such as iSCSI, you may need some +\family typewriter +iscsiadm +\family default + update or other command. +\end_layout + +\begin_layout Enumerate +Now you may increase your filesystem. + This is specific for the filesystem type and documented elsewhere. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Hint: the secondaries will start syncing the increased new part of the underlyin +g primary disk. + In many cases, this is not really needed, because the new junk data just + does not care. + If you are sure and if you know what you are doing, you may use +\family typewriter +marsadm fake-sync mydata +\family default + to abort such unnecessary traffic. +\end_layout + \begin_layout Section The State of MARS \begin_inset CommandInset label @@ -23328,28 +23410,37 @@ status open \begin_layout Plain Layout \size scriptsize -Precondition: all disks in the cluster participating in +Precondition: The local host must be primary. + All disks in the cluster participating in \family typewriter $res \family default - must be physically larger than the logical resource size (e.g. - by use of + must be physically larger than the logical resource size (e.g, by use of + \family typewriter lvm \family default +; can be checked by macros +\family typewriter +%disk-size{} +\family default + and +\family typewriter +%resource-size{} +\family default ). When the optional \family typewriter $size \family default argument is present, it must be smaller than the minimum of all physical - sizes, but larger than the current logical size. + sizes, but larger than the current logical size of the resource. \end_layout \begin_layout Plain Layout \size scriptsize -Postcondition: at the (future) primary (if any), the logical size of +Postcondition: the logical size of \family typewriter /dev/mars/$dev_name \family default diff --git a/userspace/marsadm b/userspace/marsadm index a5a2db81..5c02a0cf 100755 --- a/userspace/marsadm +++ b/userspace/marsadm @@ -422,6 +422,13 @@ sub check_res { return $res; } +sub _get_mars_size { + my ($cmd, $res) = @_; + my $dev_name = get_link("$mars/resource-$res/device-$host"); + my $info = "/sys/devices/virtual/block/mars!$dev_name/size"; + return `cat $info` * 512; +} + sub check_sizes { my ($res, $peer) = @_; my $logical_size = get_link("$mars/resource-$res/size"); @@ -2192,8 +2199,13 @@ sub invalidate_res_phase3 { } } -sub resize_res { +my %resize_device_size; +my %resize_old_size; +my %resize_new_size; + +sub resize_phase0 { my ($cmd, $res, $size_arg) = @_; + ldie "mars kernel module is not loaded. This is needed for communication with some other hosts!\n" if !is_module_loaded(); my $new_size = 0; if ($size_arg) { $new_size = get_size($size_arg); @@ -2201,39 +2213,70 @@ sub resize_res { lprint "new size: $new_size bytes\n"; } check_primary(@_); + my $my_size = get_link("$mars/resource-$res/actsize-$host"); my @actsizes = glob("$mars/resource-$res/actsize-*"); ldie "resource $res has no actsize-* symlinks\n" unless @actsizes; my $lnk = "$mars/resource-$res/size"; my $old_size = get_link($lnk); - my $min_size = 0; + my $possible_size = 0; foreach my $actsize (@actsizes) { my $this_size = get_link($actsize); - if (!$min_size || $this_size < $min_size) { - $min_size = $this_size; + if (!$possible_size || $this_size < $possible_size) { + $possible_size = $this_size; } } lprint "old_size=$old_size\n"; - lprint "min_size=$min_size\n"; - $new_size = $min_size if !$new_size; + lprint "possible_size=$possible_size\n"; + $new_size = $possible_size if !$new_size; lprint "new_size=$new_size\n"; - ldie "new size $new_size is higher than the minimum size of all volumes $min_size" if $new_size > $min_size; # no override with --force possible - # for now, disallow decreasing until some bugs are fixed + ldie "new size $new_size is higher than the possible size (minimum of all volumes) $possible_size" if $new_size > $possible_size; # no override with --force possible + # disallow decreasing ldie "only increases of the size are possible!\n" if $new_size < $old_size; - ldie "only increases of the size are possible without --force\n" if $new_size <= $old_size && !$force; - foreach my $switch (glob("$mars/resource-$res/todo-*/sync")) { - my $this_switch = get_link($switch); - ldie "sync on '$switch' is switched on -- use marsadm pause-sync to stop\n" unless !$this_switch; - } - my @syncsizes = glob("$mars/resource-$res/syncstatus-$host"); - foreach my $syncsize (@syncsizes) { - my $this_size = get_link($syncsize); - ldie "sync on $syncsize has not yet finished: $this_size != $old_size (DANGEROUS FIX: if you know what you are doing, marsadm fake-sync can 'fix' it -- but this may need a full-sync afterwards)\n" unless $this_size == $old_size; - } - foreach my $syncsize (@syncsizes) { - my $this_size = get_link($syncsize); - set_link($new_size, $syncsize); - } + my $waste = $my_size - $new_size; + lwarn "You are wasting $waste bytes locally\n" if $my_size > $new_size; + # remember values + $resize_device_size{$res} = _get_mars_size(@_); + $resize_old_size{$res} = $old_size; + lwarn "internal mismatch between actual device size and resource size: $resize_device_size{$res} != $resize_old_size{$res}\n" unless $resize_device_size{$res} == $resize_old_size{$res}; + $resize_new_size{$res} = $new_size; + return 0; +} + +sub resize_phase1 { + my ($cmd, $res) = @_; + my $old_size = $resize_old_size{$res} or ldie "bad internal size value\n"; + my $new_size = $resize_new_size{$res} or ldie "bad internal size value\n"; + # for safety, check again + ldie "only increases of the size are possible!\n" if $new_size < $old_size; + check_primary(@_); + # Mark the primary data / its size as authoritative + my $act_lnk = "$mars/resource-$res/syncstatus-$host"; + set_link($new_size, $act_lnk); + finish_links(); # Chance for errors to pop up + # Now set the new resource size + my $lnk = "$mars/resource-$res/size"; set_link($new_size, $lnk); + finish_links(); +} + +sub resize_phase2 { + my ($cmd, $res) = @_; + my $old_size = $resize_old_size{$res} or ldie "bad internal size value\n"; + my $new_size = $resize_new_size{$res} or ldie "bad internal size value\n"; + for (;;) { + my $new_device_size = _get_mars_size(@_); + if ($new_device_size == $resize_new_size{$res}) { + lprint "Device size is now $new_device_size.\n"; + last; + } + lprint "Device size $new_device_size has not yet reached the new size $resize_new_size{$res}.\n"; + if ($new_device_size != $resize_device_size{$res}) { + lwarn "The size has changed, but did not reach the correct value."; + lwarn "Assuming some rounding problems (which may occur at some device types)\n"; + last; + } + sleep_timeout(); + } } sub role_cmd { @@ -4561,7 +4604,12 @@ my %cmd_table = "When successful, /dev/mars/\$res at the primary will be increased", "in size. In addition, all secondaries will start an incremental", "fast full-sync to get the enlarged parts from the primary.", - \&resize_res, + \&resize_phase0, + "check preconditions", + \&resize_phase1, + "set new size", + \&resize_phase2, + "wait for change", ], "check-resize" => \&ignore_cmd, "create-md" => \&senseless_cmd,