diff --git a/docu/mars-for-programmers.lyx b/docu/mars-for-programmers.lyx new file mode 100644 index 00000000..54139c6a --- /dev/null +++ b/docu/mars-for-programmers.lyx @@ -0,0 +1,43288 @@ +#LyX 2.3 created this file. For more info see http://www.lyx.org/ +\lyxformat 544 +\begin_document +\begin_header +\save_transient_properties true +\origin unavailable +\textclass scrreprt +\begin_preamble +\usepackage{listings} +\end_preamble +\options abstracton,dvipsnames +\use_default_options true +\begin_modules +customHeadersFooters +enumitem +fixltx2e +\end_modules +\maintain_unincluded_children false +\language english +\language_package default +\inputencoding auto +\fontencoding global +\font_roman "default" "default" +\font_sans "default" "default" +\font_typewriter "default" "default" +\font_math "auto" "auto" +\font_default_family rmdefault +\use_non_tex_fonts false +\font_sc false +\font_osf false +\font_sf_scale 100 100 +\font_tt_scale 100 100 +\use_microtype false +\use_dash_ligatures false +\graphics default +\default_output_format default +\output_sync 0 +\bibtex_command default +\index_command default +\paperfontsize 10 +\spacing single +\use_hyperref true +\pdf_title "MARS Manual" +\pdf_author "Thomas Schöbel-Theuer" +\pdf_bookmarks true +\pdf_bookmarksnumbered false +\pdf_bookmarksopen false +\pdf_bookmarksopenlevel 1 +\pdf_breaklinks true +\pdf_pdfborder true +\pdf_colorlinks true +\pdf_backref false +\pdf_pdfusetitle true +\papersize a4paper +\use_geometry true +\use_package amsmath 1 +\use_package amssymb 1 +\use_package cancel 1 +\use_package esint 1 +\use_package mathdots 1 +\use_package mathtools 1 +\use_package mhchem 1 +\use_package stackrel 1 +\use_package stmaryrd 1 +\use_package undertilde 1 +\cite_engine basic +\cite_engine_type default +\biblio_style plain +\use_bibtopic false +\use_indices false +\paperorientation portrait +\suppress_date false +\justification true +\use_refstyle 1 +\use_minted 0 +\index Index +\shortcut idx +\color #008000 +\end_index +\leftmargin 3.7cm +\topmargin 2.7cm +\rightmargin 2.8cm +\bottommargin 2.3cm +\secnumdepth 3 +\tocdepth 3 +\paragraph_separation indent +\paragraph_indentation default +\is_math_indent 0 +\math_numbering_side default +\quotes_style english +\dynamic_quotes 0 +\papercolumns 1 +\papersides 2 +\paperpagestyle headings +\tracking_changes false +\output_changes false +\html_math_output 0 +\html_css_as_file 0 +\html_be_strict false +\end_header + +\begin_body + +\begin_layout Title + +\family typewriter +MARS User Manual +\begin_inset Newline newline +\end_inset + + +\begin_inset space ~ +\end_inset + + +\end_layout + +\begin_layout Subtitle +Multiversion Asynchronous Replicated Storage +\begin_inset Newline newline +\end_inset + + +\begin_inset space ~ +\end_inset + + +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/earth-mars-transfer.fig + width 70col% + +\end_inset + + +\end_layout + +\begin_layout Author +Thomas Schöbel-Theuer ( +\family typewriter +tst@1und1.de +\family default +) +\end_layout + +\begin_layout Date +Version 0.1a-72 +\end_layout + +\begin_layout Lowertitleback +\noindent +Copyright (C) 2013-16 Thomas Schöbel-Theuer +\begin_inset Newline newline +\end_inset + +Copyright (C) 2013-16 1&1 Internet AG (see +\begin_inset Flex URL +status open + +\begin_layout Plain Layout + +http://www.1und1.de +\end_layout + +\end_inset + + shortly called 1&1 in the following). +\begin_inset Newline newline +\end_inset + + +\size footnotesize +Permission is granted to copy, distribute and/or modify this document under + the terms of the GNU Free Documentation License, Version 1.3 or any later + version published by the Free Software Foundation; with no Invariant Sections, + no Front-Cover Texts, and no Back-Cover Texts. + A copy of the license is included in the section entitled +\begin_inset Quotes eld +\end_inset + + +\begin_inset CommandInset ref +LatexCommand nameref +reference "chap:GNU-FDL" + +\end_inset + + +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Abstract + +\family typewriter +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +sloppy +\end_layout + +\end_inset + + MARS +\family default + is a block-level storage replication system for long distances / flaky + networks under GPL. + It runs as a Linux kernel module. + The sysadmin interface is similar to DRBD +\begin_inset Foot +status open + +\begin_layout Plain Layout +Registered trademarks are the property of their respective owner. +\end_layout + +\end_inset + +, but its internal engine is completely different from DRBD: it works with + +\series bold +transaction logging +\series default +, similar to some database systems. +\end_layout + +\begin_layout Abstract +Therefore, MARS can provide stronger +\series bold +consistency guarantees +\series default +. + Even in case of network bottlenecks / problems / failures, the secondaries + may become outdated (reflect an elder state), but never become inconsistent. + In contrast to DRBD, MARS preserves the +\series bold +order of write operations +\series default + even when the network is flaky ( +\series bold +Anytime Consistency +\series default +). +\end_layout + +\begin_layout Abstract +The current version of MARS supports +\begin_inset Formula $k>2$ +\end_inset + + replicas and works +\series bold +asynchronously +\series default +. + Therefore, application performance is completely decoupled from any network + problems. + Future versions are planned to also support synchronous or near-synchronous + modes. +\end_layout + +\begin_layout Abstract +MARS supports a new method for building Cloud Storage / Software Defined + Storage, called +\series bold +LV Football +\series default +. +\end_layout + +\begin_layout Abstract +It comes with some automation scripts, leading to a similar functionality + than Kubernetes, but devoted to stateful LVs over +\series bold +virtual LVM pools +\series default + in the petabytes range. +\end_layout + +\begin_layout Abstract +\paragraph_spacing double +\noindent +\begin_inset space ~ +\end_inset + + +\begin_inset Newline newline +\end_inset + + +\begin_inset space ~ +\end_inset + + +\begin_inset Newline newline +\end_inset + + +\begin_inset Box Frameless +position "c" +hor_pos "c" +has_inner_box 1 +inner_pos "c" +use_parbox 0 +use_makebox 1 +width "100col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/earth-mars-transfer.fig + width 70col% + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset CommandInset toc +LatexCommand tableofcontents + +\end_inset + + +\end_layout + +\begin_layout Chapter +Quick Start Guide +\begin_inset CommandInset label +LatexCommand label +name "chap:Quick-Start-Guide" + +\end_inset + + +\end_layout + +\begin_layout Standard +This chapter is for impatient but experienced sysadmins who already know + DRBD. + For more complete information, refer to chapter +\begin_inset CommandInset ref +LatexCommand nameref +reference "chap:The-Sysadmin-Interface" + +\end_inset + +. +\end_layout + +\begin_layout Section +Preparation: What you Need +\begin_inset CommandInset label +LatexCommand label +name "sec:Preparation:-What-you" + +\end_inset + + +\end_layout + +\begin_layout Standard +Typically, you will use MARS at servers in a datacenter for replication + of big masses of data. +\end_layout + +\begin_layout Standard +Typically, you will use MARS for replication +\emph on +between +\emph default + multiple datacenters, when the distances are greater than +\begin_inset Formula $\approx50$ +\end_inset + + km. + Many other solutions, even from commercial storage vendors, will not work + reliably over large distances when your network is not +\emph on +extremely +\emph default + reliable, or when you try to push huge masses of data from high-performance + applications through a network bottleneck. + If you ever encountered suchalike problems (or try to avoid them in advance), + MARS is for you. +\end_layout + +\begin_layout Standard +You can use MARS both at dedicated storage servers (e.g. + for serving Windows clients), or at standalone Linux servers where CPU + and storage are not separated. +\end_layout + +\begin_layout Standard +In order to protect your data from low-level disk failures, you should use + a hardware RAID controller with BBU. + Software RAID is explicitly +\emph on +not +\emph default + recommended, because it generally provides worse performance due to the + lack of a hardware BBU (for some benchmark comparisons with/out BBU, see + +\begin_inset Flex URL +status collapsed + +\begin_layout Plain Layout + +https://github.com/schoebel/blkreplay/raw/master/doc/blkreplay.pdf +\end_layout + +\end_inset + +). +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Don't set your hardware BBU cache to +\begin_inset Quotes eld +\end_inset + +writethrough +\begin_inset Quotes erd +\end_inset + + mode. + This may lead to tremendous performance degradation. + Use the +\begin_inset Quotes eld +\end_inset + +writeback +\begin_inset Quotes erd +\end_inset + + strategy instead. + It should be operationally safe, because in case of power loss the BBU + cache content will be preserved thanks to the battery, and/or thanks to + goldcaps for saving the cache content into some flash chips. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +For better performance, use newer MARS versions from branch +\family typewriter +mars0.1a.y +\family default + or later. + Check the trips and tricks from sections +\begin_inset CommandInset ref +LatexCommand vref +reference "sec:IO-Performance-Tuning" +plural "false" +caps "false" +noprefix "false" + +\end_inset + + and +\begin_inset CommandInset ref +LatexCommand vref +reference "subsec:Tuning-Network-Performance" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. + You may also play around with +\family typewriter +/proc/sys/mars/aio_sync_mode +\family default + when actuality is less important. + Further tuning of +\family typewriter +/proc/sys/mars/io_tuning/ +\family default + and many more tunables is currently only recommended for experts. + Future versions of MARS are planned to provide better performance with + software RAID. +\end_layout + +\begin_layout Standard +Typically, you will need more than one RAID set +\begin_inset Foot +status open + +\begin_layout Plain Layout +For low-cost storage, RAID-5 is no longer regarded safe for today's typical + storage sizes, because the error rate is regarded too high. + Therefore, use RAID-6. + If you need more than 15 disks in total, create multiple RAID sets (each + having at most 15 disks, better about 12 disks) and stripe them via LVM + (or via your hardware RAID controller if it supports RAID-60). +\end_layout + +\end_inset + + for big masses of data. + Therefore, use of LVM is also recommended +\begin_inset Foot +status open + +\begin_layout Plain Layout +You may also combine MARS with commercial storage boxes connected via Fibrechann +el or iSCSI, but we have not yet operational experiences at 1&1 with such + setups. +\end_layout + +\end_inset + + for your data. +\end_layout + +\begin_layout Standard +MARS' tolerance of networking problems comes with some cost. + You will need some extra space for the transaction logfiles of MARS, residing + at the +\family typewriter +/mars/ +\family default + filesystem. +\end_layout + +\begin_layout Standard +The exact space requirements for +\family typewriter +/mars/ +\family default + depend on the +\emph on +average write rate +\emph default + of your application, not on the size of your data. + We found that only few applications are writing more than 1 TB per day. + Most are writing even less than 100 GB per day. + Usually, you want to dimension +\family typewriter +/mars/ +\family default + such that you can survive a network loss lasting 3 days / about one weekend. + This can be achieved with current technology rather easily: as a simple + rule of thumb, just use one +\series bold +dedicated disk +\series default + having a capacity of 4 TB or more. + Typically, that will provide you with plenty of headroom even for bigger + networking incidents. +\end_layout + +\begin_layout Standard +Dedicated disks for +\family typewriter +/mars/ +\family default + have another advantage: their mechanical head movement is completely independen +t from your data head movements. + For best performance, attach that dedicated disk to your hardware RAID + controller with BBU, building a separate RAID set (even if it consists + only of a single disk – notice that the +\series bold +hardware BBU +\series default + is the crucial point). +\end_layout + +\begin_layout Standard +If you are concerned about reliability, use two disks switched together + as a relatively small RAID-1 set. + For extremely high performance demands, you may consider (and check) RAID-10. +\end_layout + +\begin_layout Standard +Since the transaction logfiles are highly sequential in their access pattern, + a cheap but high-capacity SATA disk (or nearline-SAS disk) is usually sufficien +t. + At the time of this writing, standard SATA SSDs have shown to be +\emph on +not +\emph default + (yet) preferable. + Although they offer high random IOPS rate, their sequential throughput + is worse, and their long-term stability is questioned by many people at + the time of this writing. + However, as technology evolves and becomes more mature, this could change + in future. +\end_layout + +\begin_layout Standard +Use +\family typewriter +ext4 +\family default + for +\family typewriter +/mars/ +\family default +. + Avoid +\family typewriter +ext3 +\family default +, and don't use +\family typewriter +xfs +\family default + +\begin_inset Foot +status open + +\begin_layout Plain Layout +It seems that the late internal resource allocation strategy of +\family typewriter +xfs +\family default + (or another currently unknown reason) could be the reason for some resource + deadlocks which appear only with +\family typewriter +xfs +\family default + and only under +\emph on +extremely +\emph default + high IO load in combination with high memory pressure. +\end_layout + +\end_inset + + at all. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Notice that the filesystem +\family typewriter +/mars/ +\family default + has nothing to do with an ordinary filesystem. + It is completely reserved for MARS internal purposes, namely as a +\series bold +storage container +\series default + for MARS' persistent data. + It does not obey any userspace rules like FHS (filesystem hierarchy standard), + and it should not be accessed by any userspace tool execpt the official + +\family typewriter +marsadm +\family default + tool. + Its internal data format should be a regarded as a +\series bold +blackbox +\series default + by you. + The internal data format may change in future, or the complete +\family typewriter +/mars/ +\family default + filesystem may be even replaced by a totally different container format, + while the official +\family typewriter +marsadm +\family default + interface is supposed to remain stable. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +That said, you might look into its contents +\emph on +by hand +\emph default + for curiosity or for +\emph on +debugging purposes +\emph default +, and only as root. + But don't program any tools / monitoring scripts / etc bypassing the official + +\family typewriter +marsadm +\family default + tool. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + Like DRBD, the current version of MARS has +\series bold +no security +\series default + built in. + MARS assumes that it is running in a +\series bold +trusted network +\series default +. + Anyone who can connect to the MARS ports (default 7777 to 7779) can potentially + breach in and become root! Therefore, you +\series bold +must +\series default + protect your network by appropriate means, such as firewalling and/or encrypted + VPN. +\end_layout + +\begin_layout Standard +Currently, MARS provides no shared secret like DRBD, because a simple shared + secret is way too weak to provide any real security (potentially misleading + people about the real level of security). + Future versions of MARS should provide at least 2-factor authorization, + and encryption via dynamic session keys. + Until that is implemented, use a secured VPN instead! And don't forget + to +\emph on +audit +\emph default + it for security holes! +\end_layout + +\begin_layout Section +Setup Primary and Secondary Cluster Nodes +\begin_inset CommandInset label +LatexCommand label +name "sec:Setup-Primary-and" + +\end_inset + + +\end_layout + +\begin_layout Standard +If you already use DRBD, you may migrate to MARS (or even back from MARS + to DRBD) if you use +\emph on +external +\begin_inset Foot +status open + +\begin_layout Plain Layout + +\emph on +Internal +\emph default + DRBD metadata should also work as long as the filesystem inside your block + device / disk already exists and is not re-created. + The latter would destroy the DRBD metadata, but even that will not hurt + you really: you can always switch back to DRBD using +\emph on +external +\emph default + metadata, as long as you have some small spare space somewhere. +\end_layout + +\end_inset + + +\emph default + DRBD metadata (which is not touched by MARS). + +\end_layout + +\begin_layout Subsection +Kernel and MARS Module +\end_layout + +\begin_layout Standard +The MARS kernel module should be available or can be built via one of the + following methods: +\end_layout + +\begin_layout Enumerate +As an external Debian or rpm kernel module, as provided by a package contributor + (or hopefully by standard distros in the future). +\end_layout + +\begin_layout Enumerate +As a separate kernel module, only for experienced +\begin_inset Foot +status open + +\begin_layout Plain Layout +You should be familiar with the problems arising from orthogonal combination + of different kernel versions with different MARS module versions and with + different +\family typewriter +marsadm +\family default + userspace tool versions at the package management level. + Hint: +\family typewriter +modinfo +\family default + is your friend. +\end_layout + +\end_inset + + sysadmins: see file +\family typewriter +Makefile.dist +\family default + (tested with some older versions of Debian; may need some extra work with + other distros). +\end_layout + +\begin_layout Enumerate +Build for senior sysadmins or developers, inplace in the kernel source tree: + first apply +\family typewriter +0001-mars-minimum-pre-patch-for-mars.patch +\family default + and +\family typewriter +0001-mars-SPECIAL-for-in-tree-build.patch +\family default + or similar, then +\family typewriter +cd block/ && git clone --recurse-submodules https://github.com/schoebel/mars +\family default +. + Then +\family typewriter +cd .. + +\family default + and build your kernel as usual. + Config options for MARS should appear under +\begin_inset Quotes eld +\end_inset + +Enable the block layer +\begin_inset Quotes erd +\end_inset + +. + Just activate MARS as a +\series bold +kernel module +\series default + via +\begin_inset Quotes eld +\end_inset + +m +\begin_inset Quotes erd +\end_inset + + (don't try a fixed compile-in), and leave all else MARS config options + at the default (except you know what you are doing). +\end_layout + +\begin_layout Standard +Further / more accurate / latest instructions can be found in +\family typewriter +README +\family default + and in +\family typewriter +INSTALL +\family default +. + You must not only install the kernel and the +\family typewriter +mars.ko +\family default + kernel module to all of your cluster nodes, but also the +\family typewriter +marsadm +\family default + userspace tool. +\end_layout + +\begin_layout Standard +Starting with +\family typewriter +mars0.1stable38 +\family default + and other branches having merged this feature, a prepatch for vanilla kernels + 3.2 through 4.4 is no longer needed. + However, +\series bold +IO performance +\series default + is currently somewhat worse when the pre-patch is not applied. + This will be addressed in a later release. +\end_layout + +\begin_layout Standard +Therefore, application of the pre-patch to the kernel is +\emph on +recommended +\emph default + for large-scale production systems for now. +\end_layout + +\begin_layout Standard +Kernel pre-patches can be found in the +\family typewriter +pre-patches/ +\family default + subdirectory of the MARS source tree. + Following are the types of pre-patches: +\end_layout + +\begin_layout Itemize + +\family typewriter +0001-mars-minimum-pre-patch-for-mars.patch +\family default + or similar. + Please prefer this one (when present for your kernel version) in front + of +\family typewriter +0001-mars-generic-pre-patch-for-mars.patch +\family default + or similar. + The latter should not be used anymore, except for testing or as an emergency + fallback. +\end_layout + +\begin_layout Itemize + +\family typewriter +0001-mars-SPECIAL-for-in-tree-build.patch +\family default + or similar. + This is +\emph on +only +\emph default + needed when building the MARS kernel module together with all other kernel + modules in a single +\family typewriter +make +\family default + pass. + For separate external module builds, this patch +\emph on +must not +\emph default + be applied (but the pre-patch +\emph on +should +\emph default + when possible). + When using this patch, please apply the aforementioned pre-patch also, + because your kernel is patched anyway. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Starting from version +\family typewriter +mars0.1stable56 +\family default + or +\family typewriter +mars0.1abeta8 +\family default +, +\series bold +submodules +\series default + have been added to the github repo of MARS. + If you have an old checkout, please say +\family typewriter +git pull --recurse-submodules=yes +\family default + or similar. + Otherwise you may be missing an important future part of the MARS release, + without notice (depending on your local +\family typewriter +git +\family default + version and its local configuration). +\end_layout + +\begin_layout Subsection +Setup your Cluster Nodes +\begin_inset CommandInset label +LatexCommand label +name "subsec:Setup-your-Cluster" + +\end_inset + + +\end_layout + +\begin_layout Standard +For your cluster, you need at least two nodes. + In the following, they will be called A and B. + In the beginning, A will have the +\family typewriter +primary +\family default + role, while B will be your initial +\family typewriter +secondary +\family default +. + The roles may change later. +\end_layout + +\begin_layout Enumerate +You must be +\family typewriter +root +\family default +. +\end_layout + +\begin_layout Enumerate +On each of A and B, create the +\family typewriter +/mars/ +\family default + mountpoint. +\end_layout + +\begin_layout Enumerate +On each node, create an +\family typewriter +ext4 +\family default + filesystem on your separate disk / RAID set via +\family typewriter +mkfs.ext4 +\family default + (for requirements on size etc see section +\begin_inset CommandInset ref +LatexCommand nameref +reference "sec:Preparation:-What-you" + +\end_inset + +). +\end_layout + +\begin_layout Enumerate +On each node, mount that filesystem to +\family typewriter +/mars/ +\family default +. + It is advisable to add an entry to +\family typewriter +/etc/fstab +\family default +. +\end_layout + +\begin_layout Enumerate +For security reasons, execute +\family typewriter +chmod 0700 /mars +\family default + everyhwere after +\family typewriter +/mars/ +\family default + has been mounted. + If you forget this step, any following +\family typewriter +marsadm +\family default + command will drop you a warning, but will fix the problem for you. +\end_layout + +\begin_layout Enumerate +On node A, say +\family typewriter +marsadm create-cluster +\family default +. +\begin_inset Newline newline +\end_inset + +This must be done +\emph on +exactly once +\emph default +, on exactly one node of your cluster. + Never do this twice or on different nodes, because that would create two + different clusters which would have nothing to do with each other. + The +\family typewriter +marsadm +\family default + tool protects you against accidentally joining / merging two different + clusters. + If you accidentally created two different clusters, just umount that +\family typewriter +/mars/ +\family default + partition and start over with step 3 at that node. +\end_layout + +\begin_layout Enumerate +On node B, you must have a working +\family typewriter +ssh +\family default + connection to node A (as +\family typewriter +root +\family default +). + Test it by saying +\family typewriter +ssh A w +\family default + on node B. + It should work without entering a password (otherwise, use +\family typewriter +ssh-agent +\family default + to achieve that). + In addition, +\family typewriter +rsync +\family default + must be installed. +\end_layout + +\begin_layout Enumerate +On node B, say +\family typewriter +marsadm join-cluster A +\end_layout + +\begin_layout Enumerate +Only +\emph on +after +\begin_inset Foot +status open + +\begin_layout Plain Layout +In fact, you may already +\family typewriter +modprobe mars +\family default + at node A after the +\family typewriter +marsadm create-cluster +\family default +. + Just don't do any of the +\family typewriter +*-cluster +\family default + operations when the kernel module is loaded. + All other operations should have no such restriction. +\end_layout + +\end_inset + + +\emph default + that, do +\family typewriter +modprobe mars +\family default + on each node. +\end_layout + +\begin_layout Section +Creating and Maintaining Resources +\begin_inset CommandInset label +LatexCommand label +name "sec:Creating-and-Maintaining" + +\end_inset + + +\end_layout + +\begin_layout Standard +In the following example session, a block device +\family typewriter +/dev/lv-x/mydata +\family default + (shortly called +\emph on +disk +\emph default +) must already exist on both nodes A and B, respectively, having the same +\begin_inset Foot +status open + +\begin_layout Plain Layout +Actually, the disk at the initially secondary side may be larger than that + at the initially primary side. + This will waste space and is therefore not recommended. +\end_layout + +\end_inset + + size. + For the sake of simplicity, the disk (underlying block device) as well + as its later logical resource name as well as its later virtual device + name will all be named uniformly by the same suffix +\family typewriter +mydata +\family default +. + In general, you might name each of them differently, but that is not recommende +d since it may easily lead to confusion in larger installations. +\end_layout + +\begin_layout Standard +You may have already some data inside your disk +\family typewriter +/dev/lv-x/mydata +\family default + at the initially primary side A. + Before using it for MARS, it must be unused for any other purpose (such + as being mounted, or used by DRBD, etc). + MARS will require +\series bold +exclusive access +\series default + to it. +\end_layout + +\begin_layout Enumerate +On node A, say +\family typewriter +marsadm create-resource mydata /dev/lv-x/mydata +\family default +. +\begin_inset Newline newline +\end_inset + +As a result, a directory +\family typewriter +/mars/resource-mydata/ +\family default + will be created on node A, containing some symlinks. + Node A will automatically start in the primary role for this resource. + Therefore, a new pseudo-device +\family typewriter +/dev/mars/mydata +\family default + will also appear after a few seconds. +\begin_inset Newline newline +\end_inset + +Note that the initial contents of +\family typewriter +/dev/mars/mydata +\family default + will be exactly the same as in your pre-existing disk +\family typewriter +/dev/lv-x/mydata +\family default +. +\begin_inset Newline newline +\end_inset + +If you like, you may already use +\family typewriter +/dev/mars/mydata +\family default + for mounting your already pre-existing data, or for creating a fresh filesystem +, or for exporting via iSCSI, and so on. + You may even do so before any other cluster node has joined the resource + (so-called +\begin_inset Quotes eld +\end_inset + +standalone mode +\begin_inset Quotes erd +\end_inset + +). + But you can also do so later after setup of (one ore many) secondaries. +\end_layout + +\begin_layout Enumerate +Wait a few seconds until the directory +\family typewriter +/mars/resource-mydata/ +\family default + and its symlink contents also appears on cluster node B. + The command +\family typewriter +marsadm wait-cluster +\family default + may be helpful. +\end_layout + +\begin_layout Enumerate +On node B, say +\family typewriter +marsadm join-resource mydata /dev/lv-x/mydata +\family default +. +\begin_inset Newline newline +\end_inset + +As a result, the initial full-sync from node A to node B should start automatica +lly. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Of course, your old contents of your disk +\family typewriter +/dev/lv-x/mydata +\family default + at side B (and +\emph on +only +\emph default + there!) is overwritten by the version from side A. + Since you are an experienced sysadmin, you knew that, and it was just the + effect you deliberately wanted to achieve. + If you didn't check that your old contents didn't contain any valuable + data (or if you accidentally provided a wrong disk device argument), it + is too late now. + The +\family typewriter +marsadm +\family default + command checks that the disk device argument is really a block device, + and that exclusive access to it is possible (as well as some further safety + checks, e.g. + matching sizes). + However, MARS cannot know the +\emph on +purpose +\emph default + of your generic block device. + MARS (as well as DRBD) is completely ignorant of the +\emph on +contents +\emph default + of a generic block device; it does not interpret it in any way. + Therefore, you may use MARS (as well as DRBD) for mirroring Windows filesystems +, or raw devices from databases, or virtual machines, or whatever. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Check that state +\family typewriter +Orphan +\family default + is left after a while on B. + Notice that +\family typewriter +join-resource +\family default + is only +\emph on +starting +\emph default + a new replica, but does not wait for its completion. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Hint: by default, MARS uses the so-called +\begin_inset Quotes eld +\end_inset + +fast fullsync +\begin_inset Quotes erd +\end_inset + + algorithm. + It works similar to +\family typewriter +rsync +\family default +, first reading the data on both sides and computing an md5 checksum for + each block. + Heavy-weight data is only transferred over the long-distance network upon + checksum mismatch. + This is extremely fast if your data is already (almost) identical on both + sides. + Conversely, if you know in advance that your initial data is completely + different on both sides, you may choose to switch off the fast fullsync + algorithm via +\family typewriter +echo 0 > /proc/sys/mars/do_fast_fullsync +\family default + in order to save the additional IO overhead and network latencies introduced + by the separate checksum comparison steps. +\end_layout + +\begin_layout Enumerate +Optionally, only for experienced sysadmins who +\emph on +really +\emph default + know what they are doing: if you will create a +\emph on +new +\emph default + filesystem on +\family typewriter +/dev/mars/mydata +\family default + +\emph on +after(!) +\emph default + having created the MARS resource as well as +\emph on +after +\emph default + having already joined it on every replica, you may abandon the fast fullsync + phase +\emph on +before +\emph default + creating the fresh filesystem, because the old content of +\family typewriter +/dev/mars/mydata +\family default + will then be just garbage not used by the freshly created filesystem +\begin_inset Foot +status open + +\begin_layout Plain Layout +It is +\emph on +vital +\emph default + that the transaction logfile contents created by +\family typewriter +mkfs +\family default + is +\emph on +fully +\emph default + propagated to the secondaries and then replayed there. +\end_layout + +\begin_layout Plain Layout +Analogously, another exception is also possible, but at your own risk (be + careful, really!): when migrating your data from DRBD to MARS, and you + have ensured that (1) at the end of using DRBD both your replicas were + really equal (you should have checked that), and (2) before and after setting + up any side of MARS ( +\family typewriter +create-resource +\family default + as well as +\family typewriter +join-resource +\family default +) nothing has been written at all to it (i.e. + no usage, neither of +\family typewriter +/dev/lv/mydata +\family default + nor of +\family typewriter +/dev/mars/mydata +\family default + has occurred in any way), the first transaction logfile +\family typewriter +/mars/resource-mydata/log-000000001-$primary +\family default + created by MARS will be empty. + Check whether this is really true! Then, and only then, you may also issue + a +\family typewriter +fake-sync +\family default +. +\end_layout + +\end_inset + +. + Then, and only then, you may say +\family typewriter +marsadm fake-sync mydata +\family default + in order to abort the sync operation. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + Never do a +\family typewriter +fake-sync +\family default + unless you are +\series bold +absolutely sure +\series default + that you really don't need to sync the data! Otherwise, you are +\emph on +guaranteed +\emph default + to have produced harmful inconsistencies. + If you accidentally issued +\family typewriter +fake-sync +\family default +, you may startover the fast full sync at your secondary side by saying + +\family typewriter +marsadm invalidate mydata +\family default + (analogously to the corresponding DRBD command). +\end_layout + +\begin_layout Section +Keeping Resources Operational +\end_layout + +\begin_layout Subsection +Logfile Rotation / Deletion +\begin_inset CommandInset label +LatexCommand label +name "subsec:Logfile-Rotation" + +\end_inset + + +\end_layout + +\begin_layout Standard +As explained in section +\begin_inset CommandInset ref +LatexCommand nameref +reference "sec:The-Transaction-Logger" + +\end_inset + +, all changes to your resource data are recorded in transaction logfiles + residing on the +\family typewriter +/mars/ +\family default + filesystem. + These files are always growing over time. + In order to avoid filesystem overflow, the following must be done in regular + time intervals: +\end_layout + +\begin_layout Enumerate + +\family typewriter +marsadm log-rotate all +\family default + +\begin_inset Newline newline +\end_inset + +This starts appending to a new logfile on all of your resources. + The logfiles are automatically numbered by an increasing 9-digit logfile + number. + This will suffice for many centuries even if you would logrotate once a + minute. + Practical frequencies for logfile rotation are more like once an hour, + or every 10 minutes when having highly-loaded storage servers. +\end_layout + +\begin_layout Enumerate + +\family typewriter +marsadm log-delete-all all +\family default + +\begin_inset Newline newline +\end_inset + +This determines all logfiles from all resources which are no longer needed + (i.e. + which are +\emph on +fully +\emph default + replayed, on +\emph on +all +\emph default + relevant secondaries). + All superfluous logfiles are then deleted, including all copies on all + secondaries. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + The current version of MARS deletes either +\emph on +all +\emph default + replicas of a logfile everywhere, or +\emph on +none +\emph default + of the replicas. + This is a simple rule, but has the drawback that one node may hinder other + nodes from freeing space in +\family typewriter +/mars/ +\family default +. + In particular, the command +\family typewriter +marsadm pause-replay $res +\family default + (as well as +\family typewriter +marsadm disconnect $res +\family default +) will freeze the space reclamation in the whole cluster when the pause + is lasting very long. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + During such space accumulation, also the number of so-called deletions + will accumulate in /mars/todo-global/ and sibling directories. + In very big installations consisting of thousands of nodes, it is a good + idea to regularly monitor the number of deletions similarly to the following: + +\family typewriter +$(find /mars/ -name +\begin_inset Quotes eld +\end_inset + +delete-* +\begin_inset Quotes erd +\end_inset + + | wc -l) +\family default + should not exceed a limit of ~150 entries. +\end_layout + +\begin_layout Standard +Please prefer the short form +\family typewriter +marsadm cron +\family default + as an equivalent to scripting two separate commands +\family typewriter +marsadm log-rotate all +\family default + and +\family typewriter +marsadm log-delete-all all +\family default +. + The short form is not only easier to remember, but also future-proof in + case some new MARS features should be implemented in future. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Best practice is to run +\family typewriter +marsadm cron +\family default + in a +\family typewriter +cron +\family default + job, such as +\family typewriter +/etc/cron.d/mars +\family default +. + An example cronjob can be found in the +\family typewriter +userspace/cron.d/ +\family default + subdirectory of the git repo. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +In addition, you should establish some regular monitoring of the free space + present in the +\family typewriter +/mars/ +\family default + filesystem. +\end_layout + +\begin_layout Standard +More detailed information about about avoidance of +\family typewriter +/mars/ +\family default + overflow is in section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Defending-Overflow" + +\end_inset + +. +\end_layout + +\begin_layout Subsection +Switch Primary / Secondary Roles +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Graphics + filename images/switching.fig + width 90col% + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +In contrast to DRBD, MARS distinguishes between +\emph on +intended +\emph default + and +\emph on +forced +\emph default + switching. + This distinction is necessary due to differences in the communication architect +ure (asynchronous communication vs synchronous communication, see sections + +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-Lamport-Clock" + +\end_inset + + and +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-Symlink-Tree" + +\end_inset + +). +\end_layout + +\begin_layout Standard +Asynchronous communication means that (in worst case) a message may take + (almost) arbitrary time in a distorted network to propagate to another + node. + As a consequence, the risk for accidentally creating an (unintended) split + brain is increased (compared to a synchronous system like DRBD). +\end_layout + +\begin_layout Standard +In order to minimize this risk, MARS has invested a lot of effort into an + internal handover protocol when you start an +\emph on +intended +\emph default + primary switch. +\end_layout + +\begin_layout Subsubsection +Intended Switching / Planned Handover +\begin_inset CommandInset label +LatexCommand label +name "subsec:Intended-Switching" + +\end_inset + + +\end_layout + +\begin_layout Standard +Before starting a planned handover from your old primary +\family typewriter +A +\family default + to a new primary +\family typewriter +B +\family default +, you should check the replication of the resource. + As a human, use +\family typewriter +marsadm view mydata +\family default +. + For scripting, use the macros from section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Predefined-Trivial-Macros" + +\end_inset + + (see also section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Scripting-HOWTO" + +\end_inset + +; an example can be found in +\begin_inset Flex URL +status collapsed + +\begin_layout Plain Layout + +contrib/example-scripts/check-mars-switchable.sh +\end_layout + +\end_inset + +). + The network should be OK, and the amount of replication delay should be + as low as possible. + Otherwise, handover may take a very long time. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Best practice is to +\series bold +prepare a planned handover +\series default + by the following steps: +\end_layout + +\begin_layout Enumerate +Check the network and the replication lag. + It should be low (a few hundred megabytes, or a low number of gigabytes + - see also the rough time forecast shown by +\family typewriter +marsadm view mydata +\family default + when there is a larger replication delay, or directly access the forecast + by +\family typewriter +marsadm view-replinfo +\family default +). +\end_layout + +\begin_layout Enumerate +Only when the +\family typewriter +systemd +\family default + method from section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:systemd-Templates" + +\end_inset + + is +\emph on +not +\emph default + used: stop your application, then umount +\family typewriter +/dev/mars/mydata +\family default + on host +\family typewriter +A +\family default +. +\end_layout + +\begin_layout Enumerate +Optionally: when the +\family typewriter +systemd +\family default + method from section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:systemd-Templates" + +\end_inset + + is +\emph on +not +\emph default + used, and when scripting something else, or when typing extremely fast + by hand, or for better safety: say +\family typewriter +marsadm wait-umount mydata +\family default + on host +\family typewriter +B +\family default +. + When your network is OK, the propagation of the device usage state +\begin_inset Foot +status open + +\begin_layout Plain Layout +Notice that the usage check for +\family typewriter +/dev/mars/mydata +\family default + on host +\family typewriter +B +\family default + is based on the +\emph on +open count +\emph default + transferred from +\emph on +another +\emph default + node +\family typewriter +A +\family default +. + Since MARS is operating asynchronously (in contrast to DRBD), it may take + some time until our node +\family typewriter +B +\family default + knows that the device is no longer used at +\family typewriter +A +\family default +. + This can lead to a race condition if you automate an intended takeover + with a script like +\family typewriter +ssh root@A +\begin_inset Quotes eld +\end_inset + +umount /dev/mars/mydata +\begin_inset Quotes erd +\end_inset + +; ssh root@B +\begin_inset Quotes eld +\end_inset + +marsadm primary mydata +\begin_inset Quotes erd +\end_inset + + +\family default + because your second ssh command may be faster than the internal MARS symlink + tree propagation (cf section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-Symlink-Tree" + +\end_inset + +). + In order to prevent such races, you are strongly advised to use the command +\end_layout + +\begin_layout Itemize + +\family typewriter +marsadm wait-umount mydata +\end_layout + +\begin_layout Plain Layout +on node +\family typewriter +B +\family default + before trying to become primary. + See also section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Scripting-HOWTO" + +\end_inset + +. +\end_layout + +\end_inset + + should take only a few seconds. + Otherwise, check for any network problems or any other problems. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +This step is not really necessary, because +\family typewriter +marsadm primary +\family default + will also wait for the +\family typewriter +umount +\family default + before it will proceed. + However, scripting this intermediate step gives you some more options: + if the +\family typewriter +umount +\family default + takes too long, you may program a different action, like re-starting at + the old primary, or its contrary, some forced umount, or even continuing + with a forceful failover instead (see section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Forced-Switching" + +\end_inset + +). +\end_layout + +\begin_layout Enumerate +Optionally, and when the +\family typewriter +systemd +\family default + method from section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:systemd-Templates" + +\end_inset + + is +\emph on +not +\emph default + used: on host +\family typewriter +B +\family default +, wait until +\family typewriter +marsadm view mydata +\family default + (or +\family typewriter +view-diskstate +\family default +) shows +\family typewriter +UpToDate +\family default +. + It is possible to omit this step, but then you have no control on the duration + of the handover, and in case of any transfer problems, disk space problems, + etc you are potentially risking to produce a split brain (although +\family typewriter +marsadm +\family default + will do its best to avoid it). + Doing the wait by yourself, +\emph on +before +\emph default + starting +\family typewriter +marsadm primary +\family default +, has a big advantage: you can abort the handover cycle at any time, just + by re-mounting the device +\family typewriter +/dev/mars/mydata +\family default + at the old primary +\family typewriter +A +\family default + again, and by re-starting your application. + Once you have started +\family typewriter +marsadm primary +\family default + on host +\family typewriter +B +\family default +, you might have to switch back, or possibly even via +\family typewriter +primary --force +\family default + (see sections +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Forced-Switching" + +\end_inset + + and +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Split-Brain-Resolution" + +\end_inset + +). +\end_layout + +\begin_layout Standard +Switching the roles is very similar to DRBD: just issue the command +\end_layout + +\begin_layout Itemize + +\family typewriter +marsadm primary mydata +\end_layout + +\begin_layout Standard +on your formerly secondary node +\family typewriter +B +\family default +. + In combination with a properly set-up +\family typewriter +systemd +\family default + method (see section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:systemd-Templates" + +\end_inset + +), this will even automatically start your application at the new site. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +The most important difference to DRBD: don't use an intermediate +\family typewriter +marsadm secondary mydata +\family default + anywhere. + Although it would be possible, it has some +\emph on +disadvantages +\emph default +. + Always switch +\emph on +directly +\emph default +! +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +In contrast to DRBD, MARS remembers the designated primary, even when your + system crashes and reboots. + While in case of a crash you have to re-setup DRBD with commands like +\family typewriter +drbdadm up +\begin_inset Formula $\ldots$ +\end_inset + +; drbdadm primary +\begin_inset Formula $\ldots$ +\end_inset + + +\family default +, MARS will automatically resume its former roles just by saying +\family typewriter +modprobe mars +\family default +. + In combination with a properly set-up +\family typewriter +systemd +\family default + method (see section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:systemd-Templates" + +\end_inset + +), this will even automatically re-start your application. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Another fundamental difference to DRBD: when the network is healthy, there + can only exist +\emph on +one +\emph default + designated primary at a time (modulo some communication delays caused by + the +\begin_inset Quotes eld +\end_inset + +eventually consistent +\begin_inset Quotes erd +\end_inset + + communication model, see section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-Lamport-Clock" + +\end_inset + +). + By saying +\family typewriter +marsadm primary mydata +\family default + on host +\family typewriter +B +\family default +, +\series bold +all other +\series default + hosts (including +\family typewriter +A +\family default +) will +\series bold +automatically go into secondary role +\series default + after a while! +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +You simply +\emph on +don't need +\emph default + an intermediate +\family typewriter +marsadm secondary mydata +\family default + for planned handover! +\end_layout + +\begin_layout Standard +Precondition for a plain +\family typewriter +marsadm primary +\family default + (without +\family typewriter +systemd +\family default +) is that you are up, that means in attached and connected state (cf. + +\family typewriter +marsadm up +\family default +), that you are no sync target anymore, and (only when +\family typewriter +systemd +\family default + isn't configured to automatically stop the application at the old site) + that any old primary (in this case +\family typewriter +A +\family default +) does not use its +\family typewriter +/dev/mars/mydata +\family default + device any longer, and that the network is healthy. + If some (parts of) logfiles are not yet (fully) transferred to the new + primary, you will need enough space on +\family typewriter +/mars/ +\family default + at the target side. + If one of the preconditions described in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Operation-of-the" + +\end_inset + + is violated, +\family typewriter +marsadm primary +\family default + may refuse to start. +\end_layout + +\begin_layout Standard +These preconditions try to protect you from doing silly things, such as + accidentally provoking a split brain error state. + We try to avoid split brain as best as we can. + Therefore, we distinguish between +\emph on +intended +\emph default + and +\emph on +emergeny +\emph default + switching. + Intended switching will try to avoid split brain +\emph on +as best as it can +\emph default +. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Don't +\emph on +rely +\emph default + on split brain avoidance, in particular when scripting any higher-level + applications such as cluster managers (cf. + section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Scripting-HOWTO" + +\end_inset + +). + +\family typewriter +marsadm +\family default + does its best, but at least in case of (unnoticed) network outages / partitions + (or +\emph on +extremely, really extremely +\emph default + slow / overloaded networks), an attempt to become +\family typewriter +UpToDate +\family default + may fail. + If you want to +\emph on +ensure +\emph default + that no split brain can result from intended primary switching, please + obey the the best practices from above, and please give the +\family typewriter +primary +\family default + command only after your secondary is +\emph on +known +\begin_inset Foot +status open + +\begin_layout Plain Layout +As noted in many places in this manual, checking this cannot be done by + looking at the local state of a single cluster node. + You have to check several nodes. + +\family typewriter +marsadm +\family default + can only check the +\emph on +local +\emph default + node reliably! +\end_layout + +\end_inset + + +\emph default + to be +\emph on +really +\emph default + +\family typewriter +UpToDate +\family default + (see +\family typewriter +marsadm wait-cluster +\family default + and +\family typewriter +marsadm view +\family default + and other macros described in section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Inspecting-the-State" + +\end_inset + +). +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + A +\emph on +very rough +\emph default + estimation of the time to become +\family typewriter +UpToDate +\family default + is displayed by +\family typewriter +marsadm view mydata +\family default + or other macros (e.g. + +\family typewriter +view-replinfo +\family default +). + However, on very flaky networks, the estimation may not only flicker much, + but also be inaccurate. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + Planned handover is refused +\emph on +by default +\emph default + when some sync is running somewhere. + By adding the option +\family typewriter +--ignore-sync +\family default +, you are no longer protected by this +\emph on +safety measure +\emph default +, and you are willing to accept that any already running syncs will restart + from point 0, in order to ensure consistency. +\end_layout + +\begin_layout Subsubsection +Forced Switching +\begin_inset CommandInset label +LatexCommand label +name "subsec:Forced-Switching" + +\end_inset + + +\end_layout + +\begin_layout Standard +In case the connection to the old primary is lost for whatever reason, we + just don't know anything about its +\emph on +current +\emph default + state (which may deviate from its +\emph on +last known +\emph default + state). + The following command sequence will skip many checks (essentially you just + need to be attached and you must not be a current sync target) and tell + your node to become primary forcefully: +\end_layout + +\begin_layout Itemize + +\family typewriter +marsadm pause-fetch mydata +\family default + +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_deeper +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + notice that this is similar to +\family typewriter +drbdadm disconnect mydata +\family default + as you are probably used from DRBD. + For better compatibility with DRBD, you may use the alternate syntax +\family typewriter +marsadm disconnect mydata +\family default + instead. + However, there is a subtle difference to DRBD: DRBD will drop +\emph on +both +\emph default + sides of its single bi-directional connection and no longer try to re-connect + from any of both sides, while +\family typewriter +pause-fetch +\family default + is equivalent to +\family typewriter +pause-fetch-local +\family default +, which instructs only the +\emph on +local +\emph default + host to stop fetching logfiles. + Other members of the cluster, including the former primary, are +\emph on +not +\emph default + instructed to do so. + They may continue fetching logfiles over their own private TCP connections, + potentially using many connections in parallel, and potentially even from + any +\emph on +other +\emph default + member of the resource, if they think they can get the data from there. + In order to instruct +\begin_inset Foot +status open + +\begin_layout Plain Layout +Notice that not all such instructions may arrive at all sites when the network + is interrupted (or extremely slow). +\end_layout + +\end_inset + + +\emph on +all +\emph default + members of the resource to stop fetching logfiles, you may use +\family typewriter +marsadm pause-fetch-global mydata +\family default + instead (cf section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Operation-of-the" + +\end_inset + +). +\end_layout + +\end_deeper +\begin_layout Itemize + +\family typewriter +marsadm primary mydata --force +\family default + +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_deeper +\begin_layout Standard +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + this is the forceful failover. + Depending on the current replication lag, you may loose some data. + Use +\family typewriter +--force +\family default + only if you know what you are doing! +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + When +\family typewriter +systemd +\family default + is configured properly (see section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:systemd-Templates" + +\end_inset + +), your application will start automatically at the new primary site. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + when the network is interrupted, the old primary site cannot know this, + and will continue running. + Once the metadata exchange is working again (by default on port 7777), + the old site will be automatically shut down by its local +\family typewriter +systemd +\family default + configuration, when configured properly (see section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:systemd-Templates" + +\end_inset + +). + In difference to the +\emph on +planned +\emph default + handover from section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Intended-Switching" + +\end_inset + +, this may happen much later. + In case of long-last network outages, even days or weeks! +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + +Running both sites in parallel for a long time may seriously damage your + business. + Ensure that any +\series bold +customer traffic +\series default + cannot go to the old site! Be sure to configure your BGP in a proper way, + such that +\emph on +only +\emph default +, and +\emph on +only +\emph default + the new site will receive any customer traffic from both inside and outside + networks, like the internet. +\end_layout + +\end_deeper +\begin_layout Itemize + +\family typewriter +marsadm resume-fetch mydata +\family default + +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_deeper +\begin_layout Standard +As such, the new primary does not really need this, because primaries are + producing their own logfiles without need for fetching. + This is only to undo the previous +\family typewriter +pause-fetch +\family default +, in order to avoid future surprises when the new primary will somewhen + change to secondary mode again (in the far-distant future), and you have + forgotten to remember the fact that fetching had been switched off. + +\end_layout + +\end_deeper +\begin_layout Standard +When using +\family typewriter +--force +\family default +, many precondition checks and other internal checks are skipped, and in + particular the internal handover protocol for split brain avoidance. +\end_layout + +\begin_layout Standard +Therefore, use of +\family typewriter +--force +\family default + is +\emph on +likely +\emph default + to +\series bold +provoke a split brain +\series default +. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + +\series bold +Split brain +\series default + is always an +\series bold +erroneous state +\series default + which should be never entered deliberately! Once you have entered it accidental +ly, you +\series bold +must +\series default + resolve it ASAP (see section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Split-Brain-Resolution" + +\end_inset + +), otherwise you cannot operate your resource in the long term. +\end_layout + +\begin_layout Standard +In order to impede you from giving an accidental +\family typewriter +--force +\family default +, the precondition is different: +\family typewriter +--force +\family default + works only in +\emph on +locally disconnected +\emph default + state. + This is similar to DRBD. +\end_layout + +\begin_layout Standard +Remember: +\family typewriter +marsadm primary +\family default + without +\family typewriter +--force +\family default + tries to prevent split brain as best as it can. + Use of the +\family typewriter +--force +\family default + option will almost +\emph on +certainly +\emph default + provoke a split brain, at least if the old primary continues to operate + on its local +\family typewriter +/dev/mars/mydata +\family default + device. + Therefore, you are +\series bold +strongly advised +\series default + to do this +\series bold +only +\series default + after +\end_layout + +\begin_layout Enumerate + +\family typewriter +marsadm primary +\family default + without +\family typewriter +--force +\family default + has failed +\emph on +for no good reason +\emph default + +\begin_inset Foot +status open + +\begin_layout Plain Layout +Most reasons will be displayed by +\family typewriter +marsadm +\family default + when it is rejecting the planned handhover. +\end_layout + +\end_inset + +, and +\end_layout + +\begin_layout Enumerate +You are sure you +\emph on +really +\emph default + want to switch, even when that eventually leads to a split brain. + You also declare that you are willing to do +\emph on +manual +\emph default + split-brain resolution as described in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Split-Brain-Resolution" + +\end_inset + +, or even destruction / reconstruction of a damaged node as described in + section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Final-Destroy-of" + +\end_inset + +. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Notice: in case of +\emph on +connection loss +\emph default + (e.g. + networking problems / network partitions), you may not be able to reliably + detect whether a split brain actually resulted, or not. +\end_layout + +\begin_layout Paragraph +Some Background +\end_layout + +\begin_layout Standard +In contrast to DRBD, split brain situations are handled differently by MARS + . + When two primaries are accidentally active at the same time, each of them + writes into different logfiles +\family typewriter +/mars/resource-mydata/log-000000001-A +\family default + and +\family typewriter +/mars/resource-mydata/log-000000001-B +\family default + where the +\emph on +origin +\emph default + host is always recorded in the filename. + Therefore, both nodes +\emph on +can theoretically +\emph default + run in primary mode independently from each other, at least for some time. + They +\emph on +might +\emph default + even +\family typewriter +log-rotate +\family default + independently from each other. + However, this is really no good idea. + The replication to third nodes will likely get stuck, and your +\family typewriter +/mars/ +\family default + filesystem(s) will eventually run out of space. + Any further secondary node (when having +\begin_inset Formula $k>2$ +\end_inset + + replicas) will certainly get into serious problems: it simply does not + know which split-brain version it should follow. + Therefore, you will certainly loose the actuality of your redundancy. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + +\family typewriter +marsadm secondary +\family default + is +\emph on +strongly discouraged +\emph default +. + It tells the whole cluster that +\emph on +nobody +\emph default + is designated as primary any more. + +\emph on +All +\emph default + nodes should go into secondary mode, globally. + In the current version of MARS, the secondaries will no long fetch any + logfiles, since they don't know which version is the +\begin_inset Quotes eld +\end_inset + +right +\begin_inset Quotes erd +\end_inset + + one. + Syncing is also not possible. + When the device +\family typewriter +/dev/mars/mydata +\family default + is in use somewhere, it will remain in +\emph on +actual +\emph default + primary mode during that time. + As soon as the local +\family typewriter +/dev/mars/mydata +\family default + is released, the node will +\emph on +actually +\emph default + go into secondary mode if it is no longer designated as primary. + You should avoid it in advance by always +\emph on +directly +\emph default + switching over from one primary to another one, without intermediate +\family typewriter +secondary +\family default + command. + This is different from DRBD. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + Split brain situations are detected +\emph on +passively +\emph default + by secondaries. + Whenever a secondary detects that somewhere a split brain has happend, + it refuses to replay any logfiles behind the split point (and also to fetch + them when possible), or anywhere where something appears suspect or ambiguous. + This tries to keep its local disk state always being consistent, but outdated + with respect to any of the split brain versions. + As a consequence, becoming primary may be impossible, because it cannot + always know which logfiles are the correct ones to replay before +\family typewriter +/dev/mars/mydata +\family default + can appear. + The ambiguity must be resolved first. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + If you +\emph on +really +\emph default + need the local device +\family typewriter +/dev/mars/mydata +\family default + to disappear +\emph on +everywhere +\emph default + in a split brain situation, you don't need a +\emph on +strongly discouraged +\emph default + +\family typewriter +marsadm secondary +\family default + command for this. + +\family typewriter +marsadm detach +\family default + or +\family typewriter +marsadm down +\family default + can do it also, without destroying knowledge about the former designated + primary. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + +\family typewriter +marsadm primary –force +\family default + is rejected in newer +\begin_inset Foot +status open + +\begin_layout Plain Layout +Beware: older versions before +\family typewriter +mars0.1stable52 +\family default + did deliberately skip this check because a few years ago somebody at 1&1 + did place a +\emph on +requirement +\emph default + on this. + Fortunately, the requirement now has gone, so a more safe behaviour could + be implemented. + The new behaviour is for your safety, to prevent you from doing +\begin_inset Quotes eld +\end_inset + +silly +\begin_inset Quotes erd +\end_inset + + things in case you are under pressure during an incident (try to safeguard + human error as best as possible). +\end_layout + +\end_inset + + marsadm versions if your replica is a current sync target. + This is not a bug: it should prevent you from forcing an inconsistent replica + into primary mode, which will +\emph on +certainly +\emph default + lead to inconsistent data. + However, in extreme rare cases of severe damage of +\emph on +all +\emph default + of your replicas, you may be desperate. + Only in such a rare case, and only then, you might decide to force any + of your replicas (e.g. + based on their last sync progress bar) into primary role although none + of the re-syncs had finished before. + In such a case, and only if you really know what you are doing, you may + use +\family typewriter +marsadm fake-sync +\family default + to first mark your inconsisten replica as UpToDate (which is a +\series bold +lie +\series default +) and then force it to primary as explained above. + Afterwards, you will certainly need an +\family typewriter +fsck +\family default + or similar repair before you can restart your application. + Good luck! And don't forget to check the size of +\family typewriter +lost+found +\family default + afterwards. + This is really your +\emph on +very last +\emph default + chance if nothing else had succeeded before. +\end_layout + +\begin_layout Subsection +Split Brain Resolution +\begin_inset CommandInset label +LatexCommand label +name "subsec:Split-Brain-Resolution" + +\end_inset + + +\end_layout + +\begin_layout Standard +Split brain can naturally occur during a long-lasting network outage (aka + network partition) when you (forcefully) switch primaries inbetween, or + due to final loss of your old primary node (fatal node crash) when not + all logfile data had been transferred immediately before the final crash. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + Remember that split brain is an +\series bold +erroneous state +\series default + which must be resolved as soon as possible! +\end_layout + +\begin_layout Standard +Whenever split brain occurs for whatever reason, you have two choices for + resolution: either destroy one of your versions, or retain it under a different + resource name. +\end_layout + +\begin_layout Standard +In any of both cases, do the following steps ASAP: +\end_layout + +\begin_layout Enumerate + +\series bold +Manually +\series default + check which (surviving) version is the +\begin_inset Quotes eld +\end_inset + +right +\begin_inset Quotes erd +\end_inset + + one. + Any error is up to you: destroying the wrong version is +\emph on +your +\emph default + fault, not the fault of MARS. +\end_layout + +\begin_layout Enumerate +If you did not already switch your primary to the final destination determined + in the previous step, do it now (see description in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Forced-Switching" + +\end_inset + +). + Don't use an intermediate +\family typewriter +marsadm secondary +\family default + command (as known from DRBD): +\emph on +directly +\emph default + switch to the new designated primary! +\end_layout + +\begin_layout Enumerate +Unless +\family typewriter +systemd +\family default + is configured properly (see section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:systemd-Templates" + +\end_inset + +), do the following manually: on each non-right version (which you don't + want to retain) which had been primary before, umount your +\family typewriter +/dev/mars/mydata +\family default + or otherwise stop using it (e.g. + stop iSCSI or other users of the device). + Wait until each of them has actually left primary state and until their + local logfile(s) have been fully written back to the underlying disk. +\end_layout + +\begin_layout Enumerate +Wait until the network works again. + All your (surviving) cluster nodes +\emph on +must +\emph default + +\begin_inset Foot +status open + +\begin_layout Plain Layout +If you are a MARS expert and you really know what you are doing (in particular, + you can anticipate the effects of the Lamport clock and of the symlink + update protocol including the +\begin_inset Quotes eld +\end_inset + +eventually consistent +\begin_inset Quotes erd +\end_inset + + behaviour including the not-yet-consistent intermediate states, see sections + +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-Lamport-Clock" + +\end_inset + + and +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-Symlink-Tree" + +\end_inset + +), you may deviate from this requirement. +\end_layout + +\end_inset + + be able to communicate with each other. + If that is not possible, or if it takes too long, you may fall back to + the method described in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Final-Destroy-of" + +\end_inset + +, but do this only as far as necessary. +\end_layout + +\begin_layout Standard +The next steps are different for different use cases: +\end_layout + +\begin_layout Paragraph +Destroying a Wrong Split Brain Version +\end_layout + +\begin_layout Standard +Continue with the following steps, each on those cluster node(s) where you + do not want to retain its split-brain version. + In preference, start with the old +\begin_inset Quotes eld +\end_inset + +wrong +\begin_inset Quotes erd +\end_inset + + primaries first (see advice at the end of this section): +\end_layout + +\begin_layout Enumerate-Resume + +\family typewriter +marsadm invalidate mydata +\family default + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +setcounter{enumi}{4} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +When no split brain is reported anymore after that (via +\family typewriter +marsadm view all +\family default +), you are done. + You need to repeat this on other secondaries only when necessary. +\end_layout + +\begin_layout Standard +In very rare cases when things are screwed up very heavily (e.g. + a partly destroyed +\family typewriter +/mars/ +\family default + partition), you may try an alternate method described in appendix +\begin_inset CommandInset ref +LatexCommand ref +reference "chap:Alternative-Methods-for" + +\end_inset + +. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Check that state +\family typewriter +Orphan +\family default + is left after a while. + Notice that +\family typewriter +invalidate +\family default + is only +\emph on +restarting +\emph default + an existing replica, but does not wait for its completion. +\end_layout + +\begin_layout Paragraph +Keeping a Split Brain Version +\end_layout + +\begin_layout Standard +On those cluster node(s) where you want to retain the version (e.g. + for inspection purposes): +\end_layout + +\begin_layout Enumerate-Resume + +\family typewriter +marsadm leave-resource mydata +\end_layout + +\begin_layout Enumerate-Resume +After having done this on +\emph on +all +\emph default + those cluster nodes, check that the split brain is gone (e.g. + by saying +\family typewriter +marsadm view mydata +\family default +), as documented above. + In very rare cases, you might also need a +\family typewriter +log-purge-all +\family default + (see page +\begin_inset CommandInset ref +LatexCommand pageref +reference "log-purge-all$res" + +\end_inset + +). +\end_layout + +\begin_layout Enumerate-Resume +Rename the underlying local disk +\family typewriter +/dev/lv-x/mydata +\family default + is into something like +\family typewriter +/dev/lv-x/mynewdata +\family default + (see +\family typewriter +man lvrename +\family default +) This is +\emph on +extremely +\emph default + recommended to avoid confusion with the old resource name! +\end_layout + +\begin_layout Enumerate-Resume +Check that each underlying local disk +\family typewriter +/dev/lv-x/mynewdata +\family default + is really usable afterwards, e.g. + by test-mounting it (or +\family typewriter +fsck +\family default + if you can afford it). + If all is OK, don't forget to umount it before proceeding with the next + step. +\end_layout + +\begin_layout Enumerate-Resume +Create a completely new MARS resource out of the underlying disk +\family typewriter +/dev/lv-x/mynewdata +\family default + having a different name, best is +\family typewriter +mynewdata +\family default + (see description in section +\begin_inset CommandInset ref +LatexCommand vref +reference "sec:Creating-and-Maintaining" + +\end_inset + +). +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + Generally: +\series bold + best practice +\series default + is to always keep your LV names equal to your MARS resource names. + This can avoid a +\emph on +lot +\emph default + of unnecessary confusion. +\end_layout + +\begin_layout Paragraph +Keeping a Good Version +\end_layout + +\begin_layout Standard +When you had a secondary which did not participate in the split brain, but + just got confused and therefore stopped replaying logfiles immediately + before the split-brain point, it may very well happen +\begin_inset Foot +status open + +\begin_layout Plain Layout +In general, such a +\begin_inset Quotes eld +\end_inset + +good +\begin_inset Quotes erd +\end_inset + + behaviour cannot be guaranteed for all secondaries. + Race conditions in complex networks may asynchronously transfer +\begin_inset Quotes eld +\end_inset + +wrong +\begin_inset Quotes erd +\end_inset + + logfile data to a secondary much earlier than conflicting +\begin_inset Quotes eld +\end_inset + +good +\begin_inset Quotes erd +\end_inset + + logfile data which will be marked +\begin_inset Quotes eld +\end_inset + +good +\begin_inset Quotes erd +\end_inset + + only in the +\emph on +future. + +\emph default + It is impossible to predict this in advance. +\end_layout + +\end_inset + + that you don't need to do any action for it. + When all wrong versions have disappeared from the cluster (by +\family typewriter +invalidate +\family default + or +\family typewriter +leave-resource +\family default + as described before), the confusion should be over, and the secondary should + automatically resume tracking of the new unique version. +\end_layout + +\begin_layout Standard +Please check that +\emph on +all +\emph default + of your secondaries are no longer stuck. + You need to execute split brain resolution only for +\emph on +stuck +\emph default + nodes. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + Hint / advice for +\begin_inset Formula $k>2$ +\end_inset + + replicas: it is a good idea to start split brain resolution +\emph on +first +\emph default + with those (few) nodes which had been (accidentally) primary before, but + are not the new designated primary. + Usually, you had 2 primaries during split brain, so this will apply only + to +\emph on +one +\emph default + of them. + Leave the other one intact, by not umounting +\family typewriter +/dev/mars/mydata +\family default + at all, and keeping your applications running. + Even during emergency mode, see section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Emergency-Mode" + +\end_inset + +. + +\emph on +First +\emph default + resolve the problem of the +\begin_inset Quotes eld +\end_inset + +wrong +\begin_inset Quotes erd +\end_inset + + primary(s) via +\family typewriter +invalidate +\family default + or +\family typewriter +leave-resource +\family default +. + Wait for a short while. + Then check the rest of your secondaries, whether they now are already following + the new (unique) primary, and finally check whether the split brain warning + reported by +\family typewriter +marsadm view all +\family default + is gone everywhere. + This way, you can often skip unnecessary invalidations of replicas. +\end_layout + +\begin_layout Subsection +Final Destruction of a Damaged Node +\begin_inset CommandInset label +LatexCommand label +name "subsec:Final-Destroy-of" + +\end_inset + + +\end_layout + +\begin_layout Standard +When a node has eventually died, do the following steps ASAP: +\end_layout + +\begin_layout Enumerate + +\emph on +Physically +\emph default + remove the dead node from your network. + Unplug all network cables! Failing to do so might provoke a disaster in + case it somehow resurrects in an uncontrolled manner, such as a partly-damaged + +\family typewriter +/mars/ +\family default + filesystem, a half-defective kernel, RAM / kernel memory corruption, disk + corruption, or whatever. + Don't risk any such unpredictable behaviour! +\end_layout + +\begin_layout Enumerate + +\series bold +Manually +\series default + check which of the surviving versions will be the +\begin_inset Quotes eld +\end_inset + +right +\begin_inset Quotes erd +\end_inset + + one. + Any error is up to you: resurrecting an unnecessarily old / outdated version + and/or destroying the newest / best version is +\emph on +your +\emph default + fault, not the fault of MARS. +\end_layout + +\begin_layout Enumerate +If you did not already switch your primary to the final destination determined + in the previous step, do it now (see description in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Forced-Switching" + +\end_inset + +). +\end_layout + +\begin_layout Enumerate +On a surviving node, but preferably +\emph on +not +\emph default + the new designated primary, give the following commands: +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_deeper +\begin_layout Enumerate + +\family typewriter +marsadm --host=your-damaged-host down mydata +\end_layout + +\begin_layout Enumerate + +\family typewriter +marsadm --host=your-damaged-host leave-resource mydata +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + Check for misspellings, in particular the hostname of the dead node, and + check the command syntax before typing return! Otherwise, you may forcefully + destroy the wrong +\begin_inset Foot +status open + +\begin_layout Plain Layout +That said, MARS is rather tolerant of human error. + Once a sysadmin accidentally destroyed a cluster while it was continuously + running as primary. + Fortunately, the problem was detected early enough for a correction without + causing any extraordinary customer downtime outside of accepted tolerances, + and no data loss at all. +\end_layout + +\end_inset + + node! +\end_layout + +\end_deeper +\begin_layout Enumerate +In case any of the previous commands should fail (which is rather likely), + repeat it with an additional +\family typewriter +--force +\family default + option. + Don't use +\family typewriter +--force +\family default + in the first place, alway try first without it! +\end_layout + +\begin_layout Enumerate +Repeat the same with +\emph on +all +\emph default + resources which were formerly present at +\family typewriter +your-damaged-host +\family default +. +\end_layout + +\begin_layout Enumerate +Finally, say +\family typewriter +marsadm --host=your-damaged-host leave-cluster +\family default + (optionally augmented with +\family typewriter +--force +\family default +). +\end_layout + +\begin_layout Standard +Now your surviving nodes should +\emph on +believe +\emph default + that the old node +\family typewriter +your-damaged-host +\family default + does no longer exist, and that it does no longer participate in any resource. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + Even if your dead node comes to life again in some way: always ensure that + the mars kernel module cannot run any more. + +\emph on +Never +\emph default + do a +\family typewriter +modprobe mars +\family default + on a node marked as dead this way! +\end_layout + +\begin_layout Standard +Further instructions for complicated cases are in appendix +\begin_inset CommandInset ref +LatexCommand ref +reference "chap:Alternative-De--and" + +\end_inset + + and +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Cleanup-in-case" + +\end_inset + +. +\end_layout + +\begin_layout Subsection +Online Resizing during Operation +\end_layout + +\begin_layout Standard +You should have LVM or some other means of increasing the physical size + of your disk (e.g. + via firmware of some RAID controllers). + The network must be healthy. + Do the following steps: +\end_layout + +\begin_layout Enumerate +Increase your local disks (usually +\family typewriter +/dev/vg/mydata +\family default +) +\emph on +everywhere +\emph default + in the whole cluster. + In order to avoid wasting space, increase them +\emph on +uniformly +\emph default + to the same size (when possible). + The +\family typewriter +lvresize +\family default + tool is documented elsewhere. +\end_layout + +\begin_layout Enumerate +Check that all MARS switches are on. + If not, say +\family typewriter +marsadm up mydata +\family default + everywhere. +\end_layout + +\begin_layout Enumerate +At the primary: +\family typewriter +marsadm resize mydata +\end_layout + +\begin_layout Enumerate +If you have intermediate layers such as iSCSI, you may need some +\family typewriter +iscsiadm +\family default + update or other command. +\end_layout + +\begin_layout Enumerate +Now you may increase your filesystem. + This is specific for the filesystem type and documented elsewhere. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Hint: the secondaries will start syncing the increased new part of the underlyin +g primary disk. + In many cases, this is not really needed, because the new junk data just + does not care. + If you are sure and if you know what you are doing, you may use +\family typewriter +marsadm fake-sync mydata +\family default + to abort such unnecessary traffic. +\end_layout + +\begin_layout Section +The State of MARS +\begin_inset CommandInset label +LatexCommand label +name "sec:The-State-of" + +\end_inset + + +\end_layout + +\begin_layout Standard +In general, MARS tries to +\emph on +hide +\emph default + any network failures from you as best as it can. + After a network problem, any internal low-level socket connections are + +\emph on +transparently +\emph default + tried to re-open ASAP, without need for sysadmin intervention. + In difference to DRBD, network failures will +\emph on +not +\emph default + automatically alter the state of MARS, such as switching to +\family typewriter +disconnected +\family default + after a +\family typewriter +ko_timeout +\family default + or similar. + From a high-level sysadmin viewpoint, communication may just take a very + long time to succeed. +\end_layout + +\begin_layout Standard +When the behaviour of MARS is different from DRBD, it is usually intended + as a feature. +\end_layout + +\begin_layout Standard +MARS is not only an +\series bold +asynchronous +\series default + system at block IO level, but also +\series bold +at control level +\series default +. +\end_layout + +\begin_layout Standard +This is +\emph on +necessary +\emph default + because in a widely distributed long-distance system running on slow or + even temporarily failing networks, actions may take a long time, and there + may be many actions +\series bold +started in parallel +\series default +. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Synchronous concepts are generally not sufficient for expressing that. + Because of inherent asynchronicity and of dynamic creation / joining of + resources, it is neither possible to comprehensively depict a complex distribut +ed MARS system, nor a comprehensive standalone snippet of MARS, as a finite + state transition diagram +\begin_inset Foot +status open + +\begin_layout Plain Layout +Probably it could be possible to formally model MARS as a Petri net. + However, complete Petri nets are tending to become very conplex, and to + describe lots of low-level details. + Expressing hierarchy, in a top-down fashion, is cumbersome. + We find no clue in trying to do so. +\end_layout + +\end_inset + +. +\end_layout + +\begin_layout Standard +Although MARS tries to +\emph on +approximate +\emph default + / +\emph on +emulate +\emph default + the synchronous control behaviour of DRBD at the interface level ( +\family typewriter +marsadm +\family default +) in many situations as best as it can, the +\emph on +internal +\emph default + control model is necessarily asynchronous. + As an experiencend sysadmin, you will be curious how it works in principle. + When you know something about it, you will no longer be surprised when + some (detail) behaviour is different from DRBD. +\end_layout + +\begin_layout Standard +The general principle is an asynchronous 2-edge handshake protocol, which + is used almost everywhere in MARS: +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Graphics + filename images/handshake.fig + width 80col% + +\end_inset + + +\end_layout + +\begin_layout Standard +We have a binary todo switch, which can be either in state +\begin_inset Quotes eld +\end_inset + +on +\begin_inset Quotes erd +\end_inset + + or +\begin_inset Quotes eld +\end_inset + +off +\begin_inset Quotes erd +\end_inset + +. + In addition, we have an actual response indicator, which is similar to + an LED indicating the actual status. + In our example, we imagine that both are used for controlling a big ventilator, + having a huge inert mass. + Imagine a big machine from a power plant, which is as tall as a human. +\end_layout + +\begin_layout Standard +We start in a situation where the binary switch is off, and the ventilator + is stopped. + At point 1, we turn on the switch. + At that moment, a big contactor will sound like +\begin_inset Quotes eld +\end_inset + +zonggg +\begin_inset Quotes erd +\end_inset + +, and a big motor will start to hum. + At first you won't hear anything else. + It will take a while, say 1 minute, until the big wheel will have reached + its final operating RPM, due to the huge inert mass. + During that spin-up, the lights in your room will become slightly darker. + When having reached the full RPM at point 2, your workplace will then be + noisier, but in exchange your room lights will be back at ordinary strength, + and the actual response LED will start to lit in order to indicate that + the big fan is now operational. +\end_layout + +\begin_layout Standard +Assume we want to turn the system off. + When turning the todo switch to +\begin_inset Quotes eld +\end_inset + +off +\begin_inset Quotes erd +\end_inset + + at point 3, first nothing will seem to happen at all. + The big wheel will keep spinning due to its heavy inert mass, and the RPM + as well as the sound will go down only slowly. + During spin-down, the actual response LED will stay illuminated, in order + to warn you that you should not touch the wheel, otherwise you may get + injuried +\begin_inset Foot +status open + +\begin_layout Plain Layout +Notice that it is only safe to access the wheel when +\emph on +both +\emph default + the switch and the LED are off. + Conversely, if at least one of them is on, something is going on inside + the machine. + Transferred to MARS: always look at +\emph on +both +\emph default + the todo switch and the correponding actual indicator in order to not miss + something. +\end_layout + +\end_inset + +. + The LED will only go off after, say, 2 minutes, when the wheel has actually + stopped at point 4. + After that, the cycle may potentially start over again. +\end_layout + +\begin_layout Standard +As you can see, all four possible cartesian product combinations between + two boolean values are occurring in the diagram. +\end_layout + +\begin_layout Standard +The same handshake protocol is used in MARS for communication between userspace + and kernelspace, as well as for communication in the widely distributed + system. +\end_layout + +\begin_layout Section +Inspecting the State of MARS +\begin_inset CommandInset label +LatexCommand label +name "sec:Inspecting-the-State" + +\end_inset + + +\end_layout + +\begin_layout Standard +The main command for viewing the current state of MARS is +\end_layout + +\begin_layout Itemize + +\family typewriter +marsadm view mydata +\end_layout + +\begin_layout Standard +or its more specialized variant +\end_layout + +\begin_layout Itemize + +\family typewriter +marsadm view- +\emph on +$macroname +\emph default + mydata +\end_layout + +\begin_layout Standard +where +\family typewriter +\emph on +$macroname +\family default +\emph default + is one of the macros described in chapter +\begin_inset CommandInset ref +LatexCommand ref +reference "chap:The-Macro-Processor" + +\end_inset + +, or a macro which has been written by yourself. +\end_layout + +\begin_layout Standard +As always, you may replace the resource name +\family typewriter +mydata +\family default + with the special keyword +\family typewriter +all +\family default + in order to get the state of all locally joined resources, as well as a + list of all those resources. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +When using the variant +\family typewriter +marsadm view all +\family default +, additionally the global communication status will be displayed. + This helps humans in diagnosing problems. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Hint: use the compound command +\family typewriter +watch marsadm view all +\family default + for continuous display of the current state of MARS. + When starting this side-by-side in +\family typewriter +ssh +\family default + terminal windows for all your cluster nodes, you can easily watch what's + going on in the whole cluster. +\end_layout + +\begin_layout Chapter +Basic Working Principle +\end_layout + +\begin_layout Standard +Even if you are impatient, please read this chapter. + At the +\emph on +surface +\emph default +, MARS appears to be very similar to DRBD. + It looks like almost being a drop-in replacement for DRBD. +\end_layout + +\begin_layout Standard +When taking this naïvely, you could easily step into some trivial pitfalls, + because the internal working principle of MARS is totally different from + DRBD. + Please forget (almost) anything you already know about the internal working + principles of DRBD, and look at the very different working principles of + MARS. +\end_layout + +\begin_layout Section +The Transaction Logger +\begin_inset CommandInset label +LatexCommand label +name "sec:The-Transaction-Logger" + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Graphics + filename images/MARS_Data_Flow.pdf + lyxscale 60 + width 100text% + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +The basic idea of MARS is to record all changes made to your block device + in a so-called +\series bold +transaction logfile +\series default +. + +\emph on +Any +\emph default + write reqeuest is treated like a transaction which changes the contents + of your block device. +\end_layout + +\begin_layout Standard +This is similar in concept to some database systems, but there exists no + separate +\begin_inset Quotes eld +\end_inset + +commit +\begin_inset Quotes erd +\end_inset + + operation: +\emph on +any +\emph default + write request is acting like a commit. +\end_layout + +\begin_layout Standard +The picture shows the flow of write requests. + Let's start with the primary node. +\end_layout + +\begin_layout Standard +Upon submission of a write request on +\family typewriter +/dev/mars/mydata +\family default +, it is first buffered in a +\emph on +temporary +\emph default + memory buffer. +\end_layout + +\begin_layout Standard +The temporary memory buffer serves multiple purposes: +\end_layout + +\begin_layout Itemize +It keeps track of the order of write operations. +\end_layout + +\begin_layout Itemize +Additionally, it keeps track of the positions in the underlying disk +\family typewriter +/dev/lv-x/mydata +\family default +. + In particular, it detects when the same block is overwritten multiple times. +\end_layout + +\begin_layout Itemize +During pending write operation, any concurrent reads are served from the + memory buffer. +\end_layout + +\begin_layout Standard +After the write has been buffered in the temporary memory buffer, the main + logger thread of the transaction logger creates a so-called +\emph on +log entry +\emph default + and starts an +\begin_inset Quotes eld +\end_inset + +append +\begin_inset Quotes erd +\end_inset + + operation on the transaction logfile. + The log entry contains vital information such as the logical block number + in the underlying disk, the length of the data, a timestamp, some header + magic in order to detect corruption, the log entry sequence number, of + course the data itself, and optional information like a checksum or compression + information. +\end_layout + +\begin_layout Standard +Once the log entry has been written through to the +\family typewriter +/mars/ +\family default + filesystem via fsync(), the application waiting for the write operation + at +\family typewriter +/dev/mars/mydata +\family default + is signalled that the write was successful. +\end_layout + +\begin_layout Standard +This may happen even +\emph on +before +\emph default + the writeback to the underlying disk +\family typewriter +/dev/lv-x/mydata +\family default + has started. + Even when you power off the system right now, the information is not lost: + it is present in the logfile, and can be reconstructed from there. +\end_layout + +\begin_layout Standard +Notice that the order of log records present in the transaction log defines + a total order among the write requests which is +\emph on +compatible +\emph default + to the partial order of write requests issued on +\family typewriter +/dev/mars/mydata +\family default +. +\end_layout + +\begin_layout Standard +Also notice that despite its sequential nature, the transaction logfile + is typically +\emph on +not +\emph default + the performance bottleneck of the system: since appending to a logfile + is almost purely sequential IO, it runs much faster than random IO on typical + datacenter workloads. +\end_layout + +\begin_layout Standard +In order to reclaim the temporary memory buffer, its content must be written + back to the underlying disk +\family typewriter +/dev/lv-x/mydat +\family default +a somewhen. + After writeback, the temporary space is freed. + The writeback can do the following optimizations: +\end_layout + +\begin_layout Enumerate +writeback may be in +\emph on +any +\emph default + order; in particular, it may be +\emph on +sorted +\emph default + according to ascending sector ´numbers. + This will reduce the average seek distances of magnetic disks in general. +\end_layout + +\begin_layout Enumerate +when the same sector is overwritten multiple times, only the +\begin_inset Quotes eld +\end_inset + +last +\begin_inset Quotes erd +\end_inset + + version need to be written back, skipping some intermediate versions. +\end_layout + +\begin_layout Standard +In case the primary node crashes during writeback, it suffices to replay + the log entries from some point in the past until the end of the transaction + logfile. + It does no harm if you accidentally replay some log entries twice or even + more often: since the replay is in the original total order, any temporary + inconsistency is +\emph on +healed +\emph default + by the logfile application. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +In mathematics, the property that you can apply your logfile twice to your + data (or even as often as you want), is called +\series bold +idempotence +\series default +. + This is a very desirable property: it ensures that nothing goes wrong when + replaying +\begin_inset Quotes eld +\end_inset + +too much +\begin_inset Quotes erd +\end_inset + + / starting your replay +\begin_inset Quotes eld +\end_inset + +too early +\begin_inset Quotes erd +\end_inset + +. + Idempotence is even more beneficial: in case anything should go wrong with + your data on your disk (e.g. + IO errors), replaying your logfile once more often may +\begin_inset Foot +status open + +\begin_layout Plain Layout +Miracles cannot be guaranteed, but +\emph on +higher chances +\emph default + and +\emph on +improvements +\emph default + can be expected (e.g. + better chances for +\family typewriter +fsck +\family default +). +\end_layout + +\end_inset + + even +\series bold +heal +\series default + some defects. + Good news for desperate sysadmins forced to work with flaky hardware! +\end_layout + +\begin_layout Standard +The basic idea of the asynchronous replication of MARS is rather simple: + just transfer the logfiles to your secondary nodes, and replay them onto + their copy of the disk data (also called +\emph on +mirror +\emph default +) in the same order as the total order defined by the primary. +\end_layout + +\begin_layout Standard +Therefore, a mirror of your data on any secondary may be outdated, but it + always corresponds to some version which was valid in the past. + This property is called +\series bold +anytime consistency +\begin_inset Foot +status open + +\begin_layout Plain Layout +Your secondary nodes are always consistent in themselves. + Notice that this kind of consistency is a +\emph on +local +\emph default + consistency model. + There exists no global consistency in MARS. + Global consistency would be practically impossible in long-distance replication + where Einstein's law of the speed of light is limiting global consistency. + The front-cover pictures showing the planets Earth and Mars tries to lead + your imagination away from global consistency models as used in +\begin_inset Quotes eld +\end_inset + +DRBD Think(tm) +\begin_inset Quotes erd +\end_inset + +, and try to prepare you mentally for local consistency as in +\begin_inset Quotes eld +\end_inset + +MARS Think(tm) +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\end_inset + +. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +As you can see in the picture, the process of transfering the logfiles is + +\emph on +independent +\emph default + from the process which replays the logfiles onto the data at some secondary + site. + Both processes can be switched on / off separately (see commands +\family typewriter +marsadm {dis,}connect +\family default + and +\family typewriter +marsadm {pause,resume}-replay +\family default + in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Operation-of-the" + +\end_inset + +). + This may be +\emph on +exploited +\emph default +: for example, you may replicate your logfiles as soon as possible (to protect + against catastrophic failures), but deliberately wait one hour until it + is replayed (under regular circumstances). + If your data inside your filesystem +\family typewriter +/mydata/ +\family default + at the primary site is accidentally destroyed by +\family typewriter +rm -rf /mydata/ +\family default +, you have an old copy at the secondary site. + This way, you can substitute +\emph on +some parts +\begin_inset Foot +status open + +\begin_layout Plain Layout +Please note that MARS cannot +\emph on +fully +\emph default + substitute a backup system, because it can keep only +\emph on +physical +\emph default + copies, and does not create logical copies. +\end_layout + +\end_inset + + +\emph default + of conventional backup functionality by MARS. + In case you need the actual version, just replay in +\begin_inset Quotes eld +\end_inset + +fast-forward +\begin_inset Quotes erd +\end_inset + + mode (similar to old-fashioned video tapes). +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Future versions of MARS Full are planned to also allow +\begin_inset Quotes eld +\end_inset + +fast-backward +\begin_inset Quotes erd +\end_inset + + rewinding, of course at some cost. +\end_layout + +\begin_layout Section +The Lamport Clock +\begin_inset CommandInset label +LatexCommand label +name "sec:The-Lamport-Clock" + +\end_inset + + +\end_layout + +\begin_layout Standard +MARS is always +\emph on +asynchonously +\emph default + communicating in the distributed system on +\emph on +any +\emph default + topics, even strategic decisions. +\end_layout + +\begin_layout Standard +If there were a +\emph on +strict +\emph default + global consistency model, which would be roughly equivalent to a standalone + model, we would need +\emph on +locking +\emph default + in order to serialize conflicting requests. + It is known for many decades that +\emph on +distributed locks +\emph default + do not only suffer from performance problems, but they are also cumbersome + to get them working reliably in scenarios where nodes or network links + may fail at any time. +\end_layout + +\begin_layout Standard +Therefore, MARS uses a very different consistency model: +\series bold +Eventually Consistent +\series default +. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Notice that the network bottleneck problems described in section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Network-Bottlenecks" + +\end_inset + + are +\emph on +demanding +\emph default + an +\begin_inset Quotes eld +\end_inset + +eventually consistent +\begin_inset Quotes erd +\end_inset + + model. + You have +\series bold +no chance +\series default + against natural laws, like Einstein's laws. + In order to cope with the problem area, you have to +\emph on +invest some additional effort +\emph default +. + Unfortunately, asynchronous communication models are more tricky to program + and to debug than simple strictly consistent models. + In particular, you +\emph on +have to cope with +\emph default + additional +\series bold +race conditions +\series default + +\emph on +inherent +\emph default + +\emph on +to +\emph default + the +\begin_inset Quotes eld +\end_inset + +eventually consistent +\begin_inset Quotes erd +\end_inset + + model. + In the face of the laws of the universe, motivate yourself by looking at + the graphics at the cover page: the planets are a +\emph on +symbol +\emph default + for what you have to do! +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Example: the asynchronous communication protocol of MARS leads to a different + behaviour from DRBD in case of +\series bold +network partitions +\series default + (temporary interruption of communication between some cluster nodes), because + MARS +\emph on +remembers +\emph default + the old state of remote nodes over long periods of time, while DRBD knows + absolutely nothing about its peers in disconnected state. + Sysadmins familiar with DRBD might find the following behaviour unusual: +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +\align center + +\size tiny +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +Event +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +DRBD Behaviour +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +MARS Behaviour +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +1. + the network partitions +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +automatic disconnect +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +nothing happens, but replication lags behind +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +2. + on A: +\family typewriter +umount $device +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +works +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +works +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +3. + on A: +\family typewriter +{drbd,mars}adm secondary +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +works +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +works +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +4. + on B: +\family typewriter +{drbd,mars}adm primary +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +works, split brain happens +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\series bold +\size tiny +refused +\series default + because B believes that A is primary +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +5. + the network resumes +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +automatic connect attempt fails +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +communication automatically resumes +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +If you intentionally want to switch over (and to produce a split brain as + a side effect), the following variant must be used with MARS: +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +\align center + +\size tiny +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +Event +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +DRBD Behaviour +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +MARS Behaviour +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +1. + the network partitions +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +automatic disconnect +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +nothing happens, but replication lags behind +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +2. + on A: +\family typewriter +umount $device +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +works +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +works +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +3. + on A: +\family typewriter +{drbd,mars}adm secondary +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +works +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +works (but +\emph on +not remmonended! +\emph default +) +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +4. + on B: +\family typewriter +{drbd,mars}adm primary +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +split brain, but nobody knows +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\series bold +\size tiny +refused +\series default + because B believes that A is primary +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +5. + on B: +\family typewriter +marsadm disconnect +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +works, nothing happens +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +6. + on B: +\family typewriter +marsadm primary --force +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +works, split brain happens on B, but A doesn't know +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +7. + on B: +\family typewriter +marsadm connect +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +works, nothing happens +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +8. + the network resumes +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +automatic connect attempt fails +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size tiny +communication resumes, A now detects the split brain +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +In order to implement the consistency model +\begin_inset Quotes eld +\end_inset + +eventually consistent +\begin_inset Quotes erd +\end_inset + +, MARS uses a so-called Lamport +\begin_inset Foot +status open + +\begin_layout Plain Layout +Published in the late 1970s by Leslie Lamport, also known as inventor of + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +LaTeX +\end_layout + +\end_inset + +. +\end_layout + +\end_inset + + clock. + MARS uses a special variant called +\begin_inset Quotes eld +\end_inset + +physical Lamport clock +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Standard +The physical Lamport clock is another almost-realtime clock which +\emph on +can +\emph default + run independently from the Linux kernel system clock. + However, the Lamport clock tries to remain as near as possible to the system + clock. +\end_layout + +\begin_layout Standard +Both clocks can be queried at any time via +\family typewriter +cat /proc/sys/mars/lamport_clock +\family default +. + The result will show both clocks in parallel, in units of seconds since + the Unix epoch, with nanosecond resolution. +\end_layout + +\begin_layout Standard +When there are no network messages at all, both the system clock and the + Lamport clock will show almost the same time (except some minor differences + of a few nanoseconds resulting from the finite processor clock speed). +\end_layout + +\begin_layout Standard +The physical Lamport clock works rather simple: +\emph on +any +\emph default + message on the network is augmented with a Lamport time stamp telling when + the message was +\emph on +sent +\emph default + according to the local Lamport clock of the sender. + Whenever that message is received by some receiver, it checks whether the + time ordering relation would be violated: whenever the Lamport timestamp + in the message would claim that the sender had sent it +\emph on +after +\emph default + it arrived at the receiver (according to drifts in their respective local + clocks), something must be wrong. + In this case, the local Lamport clock of the +\emph on +receiver +\emph default + is advanced shortly after the sender Lamport timestamp, such that the time + ordering relation is no longer violated. +\end_layout + +\begin_layout Standard +As a consequence, any local Lamport clock may precede the corresponding + local system clock. + In order to avoid accumulation of deltas between the Lamport and the system + clock, the Lamport clock will run slower after that, possibly until it + reaches the system clock again (if no other message arrives which sets + it forward again). + After having reached the system clock, the Lamport clock will continue + with +\begin_inset Quotes eld +\end_inset + +normal +\begin_inset Quotes erd +\end_inset + + speed. +\end_layout + +\begin_layout Standard +MARS uses the local Lamport clock for anything where other systems would + use the local system clock: for example, timestamp generation in the +\family typewriter +/mars/ +\family default + filesystem. + Even symlinks created there are timestamped according to the Lamport clock. + Both the kernel module and the userspace tool +\family typewriter +marsadm +\family default + are always operating in the timescale of the Lamport clock. + Most importantly, all timestamp comparisons are always carried out with + respect to Lamport time. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Bigger differences between the Lamport and the system clock can be annoying + from a human point of view: when typing +\family typewriter +ls -l /mars/resource-mydata/ +\family default + many timestamps may appear as if they were created in the +\begin_inset Quotes eld +\end_inset + +future +\begin_inset Quotes erd +\end_inset + +, because the +\family typewriter +ls +\family default + command compares the output formatting against the system clock (it does + not even know of the existence of the MARS Lamport clock). +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + Always use +\family typewriter +ntp +\family default + (or another clock synchronization service) in order to pre-synchronize + your system clocks as close as possible. + Bigger differences are not only annoying, but may lead some people to wrong + conclusions and therefore even lead to bad human decisions! +\end_layout + +\begin_layout Standard +In a professional datacenter, you should use +\family typewriter +ntp +\family default + anyway, and you should monitor its effectiveness anyway. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Hint: many internal logfiles produced by the MARS kernel module contain + Lamport timestamps written as numerical values. + In order to convert them into human-readable form, use the command +\family typewriter +marsadm cat /mars/5.total.status +\family default + or similar. +\end_layout + +\begin_layout Section +The Symlink Tree +\begin_inset CommandInset label +LatexCommand label +name "sec:The-Symlink-Tree" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + The symlink tree as described here will be replaced by another representation + in future versions of MARS. + Therefore, don't do any scripting by directly accessing symlinks! Use the + primitive macros described in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Predefined-Trivial-Macros" + +\end_inset + +. +\end_layout + +\begin_layout Standard +The current +\family typewriter +/mars/ +\family default + filesystem container format contains not only transaction logfiles, but + also acts as a generic storage for (persistent) state information. + Both configuration information and runtime state information are currently + stored in symlinks. + Symlinks are +\begin_inset Quotes eld +\end_inset + +misused +\begin_inset Foot +status open + +\begin_layout Plain Layout +This means, the symlink targets need not be other files or directories, + but just any values like integers or strings. +\end_layout + +\end_inset + + +\begin_inset Quotes erd +\end_inset + + in order to represent some +\family typewriter +key -> value +\family default + pairs. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +It is not yet clear / decided, but there is a +\emph on +chance +\emph default + that the +\emph on +concept +\emph default + of +\family typewriter +key -> value +\family default + pairs will be retained in future versions of MARS. + Instead of being represented by symlinks, another representation will be + used, such that hopefully the +\family typewriter +key +\family default + part will remain in the form of a pathname, even if there were no longer + a physical representation in an actual filesystem. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + A fundamentally different behaviour than DRBD: when your DRBD primary crashed + some time ago, and now comes up again, you have to setup DRBD again by + a sequence of commands like +\family typewriter +modprobe drbd; drbdadm up all; drbdadm primary all +\family default + or similar. + In contrast, MARS needs only +\family typewriter +modprobe mars +\family default + (after +\family typewriter +/mars/ +\family default + has been mounted by +\family typewriter +/etc/fstab +\family default +). + The +\emph on +persistence +\emph default + of the symlinks residing in +\family typewriter +/mars/ +\family default + will automatically remember your previous state, even if some your resources + were primary while others were secondary (mixed operations). + You don't need to do any actions in order to +\begin_inset Quotes eld +\end_inset + +restore +\begin_inset Quotes erd +\end_inset + + a previous state, no matter how +\begin_inset Quotes eld +\end_inset + +complex +\begin_inset Quotes erd +\end_inset + + it was. +\end_layout + +\begin_layout Standard +(Almost) all symlinks appearing in the +\family typewriter +/mars/ +\family default + directory tree are automatically replicated thoughout the whole cluster, + provided that the cluster +\family typewriter +uuid +\family default +s are equal +\begin_inset Foot +status open + +\begin_layout Plain Layout +This is protection against accidental +\begin_inset Quotes eld +\end_inset + +merging +\begin_inset Quotes erd +\end_inset + + of two unrelated clusters which had been created at different times with + different +\family typewriter +uuids +\family default +. +\end_layout + +\end_inset + + at all sites. + Thus the +\family typewriter +/mars/ +\family default + directory forms some kind of +\emph on +global namespace +\emph default +. +\end_layout + +\begin_layout Standard +In order to avoid name clashes, each pathname created at node A follows + a convention: the node name A should be a suffix of the pathname. + Typically, internal MARS names follow the scheme +\family typewriter +/mars/ +\emph on +something +\emph default +/myname-A +\family default +. + When using the expert command +\family typewriter +marsadm {get,set}-link +\family default + (which will likely be replaced by something else in future MARS releases), + you should follow the best practice of systematically using pathnames like + +\family typewriter +/mars/userspace/myname-A +\family default + or similar. + As a result, each node will automatically get informed about the state + at any other node, like B when the corresponding information is recorded + on node B under the name +\family typewriter +/mars/userspace/myname-B +\family default + (context-dependent names). +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + Experts only: the symlink replication works generically. + You might use the +\family typewriter +/mars/userspace/ +\family default + directory in order to place your own symlink there (for whatever purpose, + which need not have to do with MARS). + However, the symlinks are likely to disappear. + Use +\family typewriter +marsadm {get,set}-link +\family default + instead. + There is a chance that these abstract commands (or variants thereof) will + be retained, by acting on the new data representation in future, even if + the old symlink format will vanish some day. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Important: the convention of placing the +\series bold +creator host name +\series default + inside your pathnames should be used wherever possible. + The name part is a kind of +\begin_inset Quotes eld +\end_inset + +ownership indicator +\begin_inset Quotes erd +\end_inset + +. + It is crucial that no other host writes any symlink not +\begin_inset Quotes eld +\end_inset + +belonging +\begin_inset Quotes erd +\end_inset + + to him. + Other hosts may read foreign information as often as they want, but never + modify them. + This way, your cluster nodes are able to +\emph on +communicate +\emph default + with each other via symlink / information updates. +\end_layout + +\begin_layout Standard +Although experts might create (and change) the current symlinks with userspace + tools like +\family typewriter +ln -s +\family default +, you should use the following marsadm commands instead: +\end_layout + +\begin_layout Itemize + +\family typewriter +marsadm set-link myvalue /mars/userspace/mykey-A +\end_layout + +\begin_layout Itemize + +\family typewriter +marsadm delete-file /mars/userspace/mykey-A +\end_layout + +\begin_layout Standard +There are many reasons for this: first, the +\family typewriter +marsadm set-link +\family default + command will automatically use the Lamport clock for symlink creation, + and therefore will avoid any errors resulting from a +\begin_inset Quotes eld +\end_inset + +wrong +\begin_inset Quotes erd +\end_inset + + system clock (as in +\family typewriter +ln -s +\family default +). + Second, the +\family typewriter +marsadm delete-file +\family default + (which also deletes symlinks) works on the +\emph on +whole cluster +\emph default +. + And finally, there is a chance that this will work in future versions of + MARS even after the symlinks have vanished. +\end_layout + +\begin_layout Standard +What's the difference? If you would try to remove your symlink locally by + hand via +\family typewriter +rm -f +\family default +, you will be surprised: since the symlink has been replicated to the other + cluster nodes, it will be re-transferred from there and will be resurrected + locally after some short time. + This way, you cannot delete any object reliably, because your whole cluster + (which may consist of many nodes) remembers all your state information + and will +\begin_inset Quotes eld +\end_inset + +correct +\begin_inset Quotes erd +\end_inset + + it whenever +\begin_inset Quotes eld +\end_inset + +necessary +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Standard +In order to solve the deletion problem, MARS uses some internal deletion + protocol using auxiliary symlinks residing in +\family typewriter +/mars/todo-global/. + +\family default + The deletion protocol ensures that all replicas get deleted in the whole + cluster, and only thereafter the auxiliary symlinks in +\family typewriter +/mars/todo-global/ +\family default + are also deleted eventually. +\end_layout + +\begin_layout Standard +You may update your already existing symlink via +\family typewriter +marsadm set-link some-other-value /mars/userspace/mykey-A +\family default + . + The new value will be propagated throughout the cluster according to a + +\series bold +timestamp comparison protocol +\series default +: whenever node B notices that A has a +\emph on +newer +\emph default + version of some symlink (according to the Lamport timestamp), it will replace + its elder version by the newer one. + The opposite does +\emph on +not +\emph default + work: if B notices that A has an elder version, just nothing happens. + This way, the timestamps of symlinks can only progress in forward direction, + but never backwards in time. +\end_layout + +\begin_layout Standard +As a consequence, symlink updates made +\begin_inset Quotes eld +\end_inset + +by hand +\begin_inset Quotes erd +\end_inset + + via +\family typewriter +ln -sf +\family default + may get lost when the local system clock is much more earlier than the + Lamport clock. +\end_layout + +\begin_layout Standard +When your cluster is fully connected by the network, the last timestamp + will finally win everywhere. + Only in case of network outages leading to +\emph on +network partitions +\emph default +, some information may be +\emph on +temporarily inconsistent +\emph default +, but only for the duration of the network outage. + The timestamp comparison protocol in combination with the Lamport clock + and with the persistence of the +\family typewriter +/mars/ +\family default + filesystem will automatically heal any temporary inconsistencies as soon + as possible, even in case of temporary node shutdown. +\end_layout + +\begin_layout Standard +The meaning of some internal MARS symlinks residing in +\family typewriter +/mars/ +\family default + will be hopefully documented in section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Documentation-of-the" + +\end_inset + + some day. +\end_layout + +\begin_layout Section +Defending Overflow of +\family typewriter +/mars/ +\begin_inset CommandInset label +LatexCommand label +name "sec:Defending-Overflow" + +\end_inset + + +\end_layout + +\begin_layout Standard +This section describes an important difference to DRBD. + The metadata of DRBD is allocated +\emph on +statically +\emph default + at +\emph on +creation +\emph default + +\emph on +time +\emph default + of the resource. + In contrast, the MARS transaction logfiles are allocated +\emph on +dynamically +\emph default + at +\emph on +runtime +\emph default +. +\end_layout + +\begin_layout Standard +This leads to a potential risk from the perspective of a sysadmin: what + happens if the +\family typewriter +/mars/ +\family default + filesystem runs out of space? +\end_layout + +\begin_layout Standard +No risk, no fun. + If you want a system which survives long-lasting network outages while + keeping your replicas always consistent (anytime consistency), you +\emph on +need +\emph default + dynamic memory for that. + It is +\emph on +impossible +\emph default + to solve that problem using static memory +\begin_inset Foot +status open + +\begin_layout Plain Layout +The bitmaps used by DRBD don't preserve the +\emph on +order +\emph default + of write operations. + They cannot do that, because their space is +\begin_inset Formula $O(k)$ +\end_inset + + for some constant +\begin_inset Formula $k$ +\end_inset + +. + In contrast, MARS preserves the order. + Preserving the order as such (even when only +\emph on +facts +\emph default + about the order were recorded without recording the actual data contents) + requires +\begin_inset Formula $O(n)$ +\end_inset + + space where +\begin_inset Formula $n$ +\end_inset + + is infinitely growing over time. +\end_layout + +\end_inset + +. +\end_layout + +\begin_layout Standard +Therefore, DRBD and MARS have different application areas. + If you just want a simple system for mirroring your data over short distances + like a crossover cable, DRBD will be a suitable choice. + However, if you need to replicate over longer distances, or if you need + higher levels of reliability even when multiple failures may accumulate + (such as network loss during a +\emph on +re +\emph default +sync of DRBD), the transaction logs of MARS can solve that, but at some + +\emph on +cost +\emph default +. +\end_layout + +\begin_layout Subsection +Countermeasures +\end_layout + +\begin_layout Subsubsection +Dimensioning of +\family typewriter +/mars/ +\begin_inset CommandInset label +LatexCommand label +name "subsec:Dimensioning-of-/mars/" + +\end_inset + + +\end_layout + +\begin_layout Standard +The first (and most important) measure against overflow of +\family typewriter +/mars/ +\family default + is simply to dimension it large enough to survive longer-lasting problems, + at least one weekend. +\end_layout + +\begin_layout Standard +Recommended size is at least one dedicated disk, residing at a hardware + RAID controller with BBU (see section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Preparation:-What-you" + +\end_inset + +). + During normal operation, that size is needed only for a small fraction, + typically a few percent or even less than one percent. + However, it is your +\series bold +safety margin +\series default +. + Keep it high enough! +\end_layout + +\begin_layout Subsubsection +Monitoring +\end_layout + +\begin_layout Standard +The next (equally important) measure is +\series bold +monitoring in userspace +\series default +. +\end_layout + +\begin_layout Standard +Following is a list of countermeasures both in userspace and in kernelspace, + in the order of +\begin_inset Quotes eld +\end_inset + +defensive walling +\begin_inset Quotes erd +\end_inset + +: +\end_layout + +\begin_layout Enumerate +Regular userspace monitoring must throw an INFO if a certain freespace limit + +\begin_inset Formula $l_{1}$ +\end_inset + + of +\family typewriter +/mars/ +\family default + is undershot. + Typical values for +\begin_inset Formula $l_{1}$ +\end_inset + + are 30%. + Typical actions are automated calls of +\family typewriter +marsadm cron +\family default + (or +\family typewriter +marsadm log-rotate all +\family default + followed by +\family typewriter +marsadm log-delete-all all +\family default +). + You have to implement that yourself in sysadmin space. +\end_layout + +\begin_layout Enumerate +Regular userspace monitoring must throw a WARNING if a certain freespace + limit +\begin_inset Formula $l_{2}$ +\end_inset + + of +\family typewriter +/mars/ +\family default + is undershot. + Typical values for +\begin_inset Formula $l_{2}$ +\end_inset + + are 20%. + Typical actions are (in addition to +\family typewriter +log-rotate +\family default + and +\family typewriter +log-delete-all +\family default +) alarming human supervisors via SMS and/or further stronger automated actions. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Frequently large space is occupied by files stemming from debugging output, + or from other programs or processes. + A hot candidate is +\begin_inset Quotes eld +\end_inset + +forgotten +\begin_inset Quotes erd +\end_inset + + removal of debugging output to +\family typewriter +/mars/ +\family default +. + Sometimes, an +\family typewriter +rm -rf $(find /mars/ -name +\begin_inset Quotes eld +\end_inset + +*.log +\begin_inset Quotes erd +\end_inset + +) +\family default + can work miracles. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Another source of space hogging is a +\begin_inset Quotes eld +\end_inset + +forgotten +\begin_inset Quotes erd +\end_inset + + +\family typewriter +pause-sync +\family default + or +\family typewriter +disconnect +\family default +. + Therefore, a simple +\family typewriter +marsadm connect-global all +\family default + followed by +\family typewriter +marsadm resume-replay-global all +\family default + may also work miracles (if you didn't want to freeze some mirror deliberately). +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +If you just wanted to freeze a mirror at an outdated state for a very long + time, you simply +\emph on +cannot +\emph default + do that without causing infinite growth of space consumption in +\family typewriter +/mars/ +\family default +. + Therefore, a +\family typewriter +marsadm leave-resource $res +\family default + at +\emph on +exactly that(!) +\emph default + secondary site where the mirror is frozen, can also work miracles. + If you want to automate this in unserspace, be careful. + It is easy to get unintended effects when choosing the wrong site for +\family typewriter +leave-resource +\family default +. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Hint: you can / should start some of these measures even earlier at the + INFO level (see item 1), or even earlier. +\end_layout + +\begin_layout Enumerate +Regular userspace monitoring must throw an ERROR if a certain freespace + limit +\begin_inset Formula $l_{3}$ +\end_inset + + of +\family typewriter +/mars/ +\family default + is undershot. + Typical values for +\begin_inset Formula $l_{3}$ +\end_inset + + are 10%. + Typical actions are alarming the CEO via SMS and/or even stronger automated + actions. + For example, you may choose to automatically call +\family typewriter +marsadm leave-resource $res +\family default + on some or all secondary nodes, such that the primary will be left alone + and now has a chance to really delete its logfiles because no one else + is any longer potentially needing it. +\end_layout + +\begin_layout Enumerate +First-level kernelspace action, automatically executed when +\family typewriter + +\begin_inset Flex URL +status open + +\begin_layout Plain Layout + +/proc/sys/mars/required_free_space_4_gb +\end_layout + +\end_inset + + +\family default + + +\family typewriter + +\begin_inset Flex URL +status open + +\begin_layout Plain Layout + +/proc/sys/mars/required_free_space_3_gb +\end_layout + +\end_inset + + +\family default + + +\family typewriter + +\begin_inset Flex URL +status open + +\begin_layout Plain Layout + +/proc/sys/mars/required_free_space_2_gb +\end_layout + +\end_inset + + +\family default + + +\family typewriter + +\begin_inset Flex URL +status open + +\begin_layout Plain Layout + +/proc/sys/mars/required_free_space_1_gb +\end_layout + +\end_inset + + +\family default + is undershot: +\begin_inset Newline newline +\end_inset + +a warning will be issued. +\end_layout + +\begin_layout Enumerate +Second-level kernelspace action, automatically executed when +\family typewriter + +\begin_inset Flex URL +status open + +\begin_layout Plain Layout + +/proc/sys/mars/required_free_space_3_gb +\end_layout + +\end_inset + + +\family default + + +\family typewriter + +\begin_inset Flex URL +status open + +\begin_layout Plain Layout + +/proc/sys/mars/required_free_space_2_gb +\end_layout + +\end_inset + + +\family default + + +\family typewriter + +\begin_inset Flex URL +status open + +\begin_layout Plain Layout + +/proc/sys/mars/required_free_space_1_gb +\end_layout + +\end_inset + + +\family default + is undershot: +\begin_inset Newline newline +\end_inset + +all locally secondary resources will delete local copies of transaction + logfiles which are no longer needed locally. + This is a desperate action of the kernel module. +\end_layout + +\begin_layout Enumerate +Third-level kernelspace action, automatically executed when +\family typewriter + +\begin_inset Flex URL +status open + +\begin_layout Plain Layout + +/proc/sys/mars/required_free_space_2_gb +\end_layout + +\end_inset + + +\family default + + +\family typewriter + +\begin_inset Flex URL +status open + +\begin_layout Plain Layout + +/proc/sys/mars/required_free_space_1_gb +\end_layout + +\end_inset + + +\family default + is undershot: +\begin_inset Newline newline +\end_inset + +all locally secondary resources will stop fetching transaction logfiles. + This is a more desperate action of the kernel module. + You don't want to get there (except for testing). +\end_layout + +\begin_layout Enumerate +Last desperate kernelspace action when all else has failed and +\family typewriter + +\begin_inset Flex URL +status open + +\begin_layout Plain Layout + +/proc/sys/mars/required_free_space_1_gb +\end_layout + +\end_inset + + +\family default + is undershot: +\begin_inset Newline newline +\end_inset + +all locally primary resources will enter +\series bold +emergency mode +\series default + (see description below in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Emergency-Mode" + +\end_inset + +). + This is the most desperate action of the kernel module. + You don't want to get there (except for testing). +\end_layout + +\begin_layout Standard +In addition, the kernel module obeys a general global limit +\family typewriter + +\begin_inset Flex URL +status open + +\begin_layout Plain Layout + +/proc/sys/mars/required_total_space_0_gb +\end_layout + +\end_inset + + + +\family default + the sum of all of the above limits. + When the +\emph on +total size +\emph default + of +\family typewriter +/mars/ +\family default + undershots that sum, the kernel module refuses to start at all, because + it assumes that it is senseless to try to operate MARS on a system with + such low memory resources. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +The current level of emergency kernel actions may be viewed at any time + via +\family typewriter + +\begin_inset Flex URL +status collapsed + +\begin_layout Plain Layout + +/proc/sys/mars/mars_emergency_mode +\end_layout + +\end_inset + + +\family default +. +\end_layout + +\begin_layout Subsubsection +Throttling +\end_layout + +\begin_layout Standard +The last measure for defense of overflow is +\series bold +throttling your performance pigs +\series default +. +\end_layout + +\begin_layout Standard +Motivation: in rare cases, some users with +\family typewriter +ssh +\family default + access can do +\emph on +very +\emph default + silly things. + For example, some of them are creating their own backups via user-cron + jobs, and they do it every 5 minutes. + Some example guy created a zip archive (almost 1GB) by regularly copying + his old zip archive into a new one, then appending deltas to the new one, + and finally deleting the old archive. + Every 5 minutes. + Yes, every 5 minutes, although almost never any new files were added to + the archive. + Essentially, he copied over his archive, for nothing. + This led to massive bulk write requests, for ridiculous reasons. +\end_layout + +\begin_layout Standard +In general, your hard disks (or even RAID systems) allow much higher write + IO rates than you can ever transport over a standard TCP network from your + primary site to your secondary, at least over longer distances (see use + cases for MARS in chapter +\begin_inset CommandInset ref +LatexCommand ref +reference "chap:Use-Cases-for" + +\end_inset + +). + Therefore, it is easy to create a such a high write load that it will be + +\emph on +impossible +\emph default + to replicate it over the network, +\emph on +by construction +\emph default +. +\end_layout + +\begin_layout Standard +Therefore, we +\emph on +need +\emph default + some mechanism for throttling bulk writers whenever the network is weaker + than your IO subsystem. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Notice that DRBD will +\emph on +always +\emph default + throttle your writes whenever the network forms a bottleneck, due to its + synchronous operation mode. + In contrast, MARS allows for buffering of performance peaks in the transaction + logfiles. + +\emph on +Only when +\emph default + your buffer in +\family typewriter +/mars/ +\family default + runs short (cf subsection +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Dimensioning-of-/mars/" + +\end_inset + +), MARS will start to throttle your application writes. +\end_layout + +\begin_layout Standard +There are a lot of screws named +\family typewriter +/proc/sys/mars/write_throttle_* +\family default + with the following meaning: +\end_layout + +\begin_layout Description + +\family typewriter +write_throttle_start_percent +\family default + Whenever the used space in +\family typewriter +/mars/ +\family default + is below this threshold, no throttling will occur at all. + Only when this threshold is exceeded, throttling will start +\emph on +slowly +\emph default +. + Typical values for this are 60%. +\end_layout + +\begin_layout Description + +\family typewriter +write_throttle_end_percent +\family default + Maximum throttling will occur once this space threshold is reached, i.e. + the throttling is now at its maximum effect. + Typical values for this are 90%. + When the actual space in +\family typewriter +/mars/ +\family default + lies between +\family typewriter +write_throttle_start_percent +\family default + and +\family typewriter +write_throttle_end_percent +\family default +, the strength of throttling will be interpolated linearly between the extremes. + In practice, this should lead to an equilibrum between new input flow into + +\family typewriter +/mars/ +\family default + and output flow over the network to secondaries. +\end_layout + +\begin_layout Description + +\family typewriter +write_throttle_size_threshold_kb +\family default + (readonly) This parameter shows the internal strength calculation of the + throttling. + Only write +\begin_inset Foot +status open + +\begin_layout Plain Layout +Read requests are never throttled at all. +\end_layout + +\end_inset + + requests exceeding this size (in KB) are throttled at all. + Typically, this will hurt the bulk performance pigs first, while leaving + ordinary users (issuing small requests) unaffected. +\end_layout + +\begin_layout Description + +\family typewriter +write_throttle_ratelimit_kb +\family default + Set the global IO rate in KB/s for those write requests which are throttled. + In case of strongest +\begin_inset Foot +status open + +\begin_layout Plain Layout +In case of lighter throttling, the input flow into +\family typewriter +/mars/ +\family default + may be higher because small requests are not throttled. +\end_layout + +\end_inset + + throttling, this parameters determines the input flow into +\family typewriter +/mars/ +\family default +. + The default value is 5.000 KB/s. + Please adjust this value to your application needs and to your environment. +\end_layout + +\begin_layout Description + +\family typewriter +write_throttle_rate_kb +\family default + (readonly) Shows the current rate of exactly those requests which are actually + throttled (in contrast to +\emph on +all +\emph default + requests). +\end_layout + +\begin_layout Description + +\family typewriter +write_throttle_cumul_kb +\family default + (logically readonly) Same as before, but the cumulative sum of all throttled + requests since startup / reset. + This value can be reset from userspace in order to prevent integer overflow. +\end_layout + +\begin_layout Description + +\family typewriter +write_throttle_count_ops +\family default + (logically readonly) Shows the cumulative number of throttled requests. + This value can be reset from userspace in order to prevent integer overflow. +\end_layout + +\begin_layout Description + +\family typewriter +write_throttle_maxdelay_ms +\family default + Each request is delayed at most for this timespan. + Smaller values will improve the responsiveness of your userspace application, + but at the cost of potentially retarding the requests not sufficiently. +\end_layout + +\begin_layout Description + +\family typewriter +write_throttle_minwindow_ms +\family default + Set the minimum length of the measuring window. + The measuring window is the timespan for which the average (throughput) + rate is computed (see +\family typewriter +write_throttle_rate_kb +\family default +). + Lower values can increase the responsiveness of the controller algorithm, + but at the cost of accuracy. +\end_layout + +\begin_layout Description + +\family typewriter +write_throttle_maxwindow_ms +\family default + This parameter must be set sufficiently much greater than +\family typewriter +write_throttle_minwindow_ms +\family default +. + In case the flow of throttled operations pauses for some natural reason + (e.g. + switched off, low load, etc), this parameter determines when a completely + new rate calculation should be started over +\begin_inset Foot +status open + +\begin_layout Plain Layout +Motivation: if requests would pause for one hour, the measuring window could + become also an hour. + Of course, that would lead to completely meaningless results. + Two requests in one hour is +\begin_inset Quotes eld +\end_inset + +incorrect +\begin_inset Quotes erd +\end_inset + + from a human point of view: we just have to ensure that averages are computed + with respect to a reasonable maximum time window in the magnitude of 10s. +\end_layout + +\end_inset + +. +\end_layout + +\begin_layout Subsection +Emergency Mode and its Resolution +\begin_inset CommandInset label +LatexCommand label +name "subsec:Emergency-Mode" + +\end_inset + + +\end_layout + +\begin_layout Standard +When +\family typewriter +/mars/ +\family default + is almost full and there is really absolutely no chance of getting rid + of any local transaction logfile (or free some space in any other way), + there is only one exit strategy: stop creating new logfile data. +\end_layout + +\begin_layout Standard +This means that the ability for replication gets lost. +\end_layout + +\begin_layout Standard +When entering emergency mode, the kernel module will execute the following + steps for all resources where the affected host is acting as a primary: +\end_layout + +\begin_layout Enumerate +Do a kind of +\begin_inset Quotes eld +\end_inset + +logrotate +\begin_inset Quotes erd +\end_inset + +, but create a +\emph on +hole +\emph default + in the sequence of transaction logfile numbers. + The +\begin_inset Quotes eld +\end_inset + +new +\begin_inset Quotes erd +\end_inset + + logfile is left empty, i.e. + no data ist written to it (for now). + The hole in the numbering will prevent any secondaries from replaying any + logfiles behind the hole (should they ever contain some data, e.g. + because the emergency mode has been left again). + This works because the secondaries are regularly checking the logfile numbers + for contiguity, and they will refuse to replay anything which is not contiguous. + As a result, the secondaries will be left in a consistent, but outdated + state (at least if they already were consistent before that). +\end_layout + +\begin_layout Enumerate +The kernel module writes back all data present in the temporary memory buffer + (see figure in section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-Transaction-Logger" + +\end_inset + +). + This may lead to a (short) delay of user write requests until that has + finished (typically fractions of a second or a few seconds). + The reason is that the temporary memory buffer must not be increased in + parallel during this phase (race conditions). +\end_layout + +\begin_layout Enumerate +After the temporary memory buffer is empty, all local IO requests (whether + reads or writes) are directly going to the underlying disk. + This has the same effect as if MARS would not be present anymore. + Transaction logging does no longer take place. +\end_layout + +\begin_layout Enumerate +Any sync from any secondary is stopped ASAP. + In case they are resuming their sync somewhen later, they will start over + from the beginning (position +\begin_inset Formula $0$ +\end_inset + +). +\end_layout + +\begin_layout Standard +In order to leave emergency mode, the sysadmin should do the following steps: +\end_layout + +\begin_layout Enumerate +Free enough space. + For example, delete any foreign files on +\family typewriter +/mars/ +\family default + which have nothing to do with MARS, or resize the +\family typewriter +/mars/ +\family default + filesystem, or whatever. +\end_layout + +\begin_layout Enumerate +If +\family typewriter + +\begin_inset Flex URL +status open + +\begin_layout Plain Layout + +/proc/sys/mars/mars_reset_emergency +\end_layout + +\end_inset + + +\family default + is not set, now it is time to set it. + Normally, it should be already set. +\end_layout + +\begin_layout Enumerate +Notice: as long as not enough space has been freed, a message containing + +\family typewriter + +\begin_inset Quotes eld +\end_inset + +EMEGENCY MODE HYSTERESIS +\begin_inset Quotes erd +\end_inset + + +\family default + (or similar) will be displayed by +\family typewriter +marsadm view all +\family default +. + As a consequence, any sync will be automatically halted. + This applies to freshly invoked syncs also, for example created by +\family typewriter +invalidate +\family default + or +\family typewriter +join-resource +\family default +. +\end_layout + +\begin_layout Enumerate +On the secondaries, use +\family typewriter +marsadm invalidate $res +\family default + in order to request updating your outdated mirrors. +\end_layout + +\begin_layout Enumerate +On the primary: +\family typewriter +marsadm log-delete-all all +\end_layout + +\begin_layout Enumerate +As soon as emough space has been freed everywhere to leave the +\family typewriter +EMEGENCY MODE HYSTERESIS +\family default +, sync should really start. + Until that it had been halted. +\end_layout + +\begin_layout Enumerate +Recommendation: check at secondaries that state +\family typewriter +Orphan +\family default + has been left after a while. +\end_layout + +\begin_layout Standard +Alternatively, there is another method by roughly following the instructions + from appendix +\begin_inset CommandInset ref +LatexCommand ref +reference "chap:Alternative-Methods-for" + +\end_inset + +, but in a slightly different order. + In this case, do +\family typewriter +leave-resource +\family default + everywhere on +\emph on +all +\emph default + secondaries, but +\emph on +don't +\emph default + start the +\family typewriter +join-resource +\family default + phase +\emph on +for now +\emph default +. + Then cleanup all your secondaries via +\family typewriter +log-purge-all +\family default +, and finally +\family typewriter +log-delete-all all +\family default + at the primary, and wait until the emergency has vanished everywhere. + Only after that, re- +\family typewriter +join-resource +\family default + your secondaries. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Expert advice for +\begin_inset Formula $k=2$ +\end_inset + + replicas: this means you had only 1 mirror per resource before the overflow + happened. + Provided that you have enough space on your LVMs and on +\family typewriter +/mars/ +\family default +, and provided that transaction logging has automatically restarted after + +\family typewriter +leave-resource +\family default + and +\family typewriter +log-purge-all +\family default +, you can recover redundancy by creating a +\emph on +new +\emph default + replica via +\family typewriter +marsadm join-resource $res +\family default + on a +\emph on +third +\emph default + node. + Only after the initial full sync has finished there, run +\family typewriter +join-resource +\family default +at your original mirror. + This way, you will always retain at least one +\series bold +consistent mirror +\series default + somewhere. + After all is up-to-date, you can delete the superfluous mirror by +\family typewriter +marsadm leave-resource $res +\family default + and reclaim the disk space from its underlying LVM disk. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +If you already have +\begin_inset Formula $k>2$ +\end_inset + + replicas in total, it may be a wise idea to prefer the +\family typewriter +leave-resource ; log-purge-all ; join-resource +\family default + method in front of +\family typewriter +invalidate +\family default + because it does not invalidate +\emph on +all +\emph default + your replicas at the same time (when handled properly in the right order). +\end_layout + +\begin_layout Chapter +The Macro Processor +\begin_inset CommandInset label +LatexCommand label +name "chap:The-Macro-Processor" + +\end_inset + + +\end_layout + +\begin_layout Standard + +\family typewriter +marsadm +\family default + comes with a customizable macro processor. + It can be used for high-level complex display of the state of MARS (so-called + +\emph on +complex macros +\emph default +), as well as for low-level display of lots of individual state values (so-calle +d +\emph on +primitive macros +\emph default +). +\end_layout + +\begin_layout Standard +From the commandline, any macro can be called via +\family typewriter +marsadm view- +\emph on +$macroname +\emph default + mydata +\family default +. + The short form +\family typewriter +marsadm view mydata +\family default + is equivalent to +\family typewriter +marsadm view-default mydata +\family default +. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +In general, the command +\family typewriter +marsadm view- +\emph on +$macroname +\emph default + all +\family default + will first call the macro +\family typewriter +\emph on +$macroname +\family default +\emph default + in a loop for +\emph on +all +\emph default + resources we are a +\emph on +member locally +\emph default +. + Finally, a trailing macro +\family typewriter +\emph on +$macroname +\emph default +-global +\family default + will be called with an empty +\family typewriter +%{res} +\family default + argument, provided that such a macro is defined. + This way, you can produce per-resource output followed by global output + which does not depend on a particular resource. +\end_layout + +\begin_layout Section +Predefined Macros +\end_layout + +\begin_layout Standard +The macro processor is a very flexible and versatile tool for +\series bold +customizing +\series default +. + You can create your own macros, but probably the rich set of predefined + macros is already sufficient for your needs. +\end_layout + +\begin_layout Subsection +Predefined Complex and High-Level Macros +\begin_inset CommandInset label +LatexCommand label +name "subsec:Predefined-Complex-and" + +\end_inset + + +\end_layout + +\begin_layout Standard +The following predefined complex macros try to address the information needs + of humans. + Use them only in scripts when you are prepared about the fact that the + output format may change during development of MARS. +\end_layout + +\begin_layout Standard +Notice: the definitions of predefined complex macros may be updated in the + course of the MARS project. + However, the primitive macros recursively called by the complex ones will + be hopefully rather stable in future (with the exception of bugfixes). + If you want to retain an old / outdated version of a complex macro, just + check it out from git, follow the instructions in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Creating-your-own" + +\end_inset + +, and preferably give it a different name in order to avoid confusion with + the newer version. + In general, it should be possible to use old macros with newer versions + of +\family typewriter +marsadm +\family default + +\begin_inset Foot +status open + +\begin_layout Plain Layout +You might need to check out also old versions of further macros and adapt + their names, whenever complex macros call each other. +\end_layout + +\end_inset + +. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +default +\family default + This is equivalent to +\family typewriter +marsadm view mydata +\family default + without +\family typewriter +\emph on +-maroname +\family default +\emph default + suffix. + It shows a one-line status summary for each resource, optionally followed + by informational lines such as progress bars whenever a sync or a fetch + of logfiles is currently running. + The status line has the following fields: +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_deeper +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +%{res} +\family default + resource name. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +[ +\emph on +this_count +\emph default +/ +\emph on +total_count +\emph default +] +\family default + total number of replicas of this resource, out of total number of cluster + members. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +%include{diskstate} +\family default + see +\family typewriter +diskstate +\family default + macro below. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +%include{replstate} +\family default + see +\family typewriter +replstate +\family default + macro below. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +%include{flags} +\family default + see +\family typewriter +flags +\family default + macro below. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +%include{role} +\family default + see +\family typewriter +role +\family default + macro below. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +%include{primarynode} +\family default + see +\family typewriter +primarynode +\family default + macro below. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +%include{commstate} +\family default + see +\family typewriter +commstate +\family default + macro below. +\end_layout + +\end_deeper +\begin_layout Labeling +\labelwidthstring 00.00.0000 +\begin_inset space ~ +\end_inset + + After that, optional lines such as progress bars are appearing only when + something unusual is happening. + These lines are subject to future changes. + For examples, wasted disk space due to missing +\family typewriter +resize +\family default + is reported when +\family typewriter +%{threshold} +\family default + is exceeded. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +1and1 +\family default + +\begin_inset space ~ +\end_inset + +or +\begin_inset space ~ +\end_inset + + +\family typewriter +default-1and1 +\family default + A variant of +\family typewriter +default +\family default + for internal use by 1&1 Internet AG. + You may call this complex macro by saying +\family typewriter +marsadm view-1and1 all +\family default +. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Note: the +\family typewriter +marsadm view-1and1 +\family default + command has been intensely tested in Spring 2014 to produce exactly the + same output than the 1&1 internal +\begin_inset Foot +status open + +\begin_layout Plain Layout +In addition to allow for customization, the macro processor is also meant + as an exit strategy for removing dependencies from non-free software. + +\series bold +Please put your future macros also under GPL! +\end_layout + +\end_inset + + tool +\family typewriter +marsview +\family default + +\begin_inset Foot +status open + +\begin_layout Plain Layout +There are some subtle differences: numbers are displayed in a different + precision, some bug fixes in the macro version (which might have occurred + +\emph on +in the meantime +\emph default + ) may lead to different output as a side effect from bug fixes in +\emph on +predefined +\emph default + macros, because the original +\family typewriter +marsview +\family default + command is currently not actively maintained. + Documentation of +\family typewriter +marsview +\family default + can be found in the corresponding manpage, see +\family typewriter +man marsview +\family default +. + By construction, this is also the (unmaintained) documentation of +\family typewriter +marsadm view-1and1 +\family default + and other +\family typewriter +-1and1 +\family default + macros. + Notice that all +\family typewriter +*-1and1 +\family default + macros are not officially supported by the developer of MARS, and they + may disappear in a future major release. + However, they could be useful for your own customization macros. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Customization via your own macros (see section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Creating-your-own" + +\end_inset + +) is explicitly encouraged by the developer. + It would be nice if a vibrant user community would emerge, helping each + other by exchange of macros. + +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Hint: in order to produce your own customized inspection / monitoring tools, + you may ask the author for an official reservation of a macro sub-namespace + such as +\family typewriter +*- +\emph on +yourcompanyname +\family default +\emph default +. + You will be fully responsible for your own reserved namespace and can do + with it whatever you want. + The official MARS release will guarantee that +\emph on +no name clashes +\emph default + with your reserved sub-namespace will occur in future. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +default-global +\family default + Currently, this just calls +\family typewriter +comminfo +\family default + (see below). + May be extended in future. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +diskstate +\family default + Shows the status of the underlying disk device, in the following order + of precedence +\begin_inset Foot +status open + +\begin_layout Plain Layout +When an earlier list item is displayed, no combinations with following items + are possible. + This kind of +\begin_inset Quotes eld +\end_inset + +hiding effect +\begin_inset Quotes erd +\end_inset + + can lead to an +\emph on +information loss +\emph default +. + In order to get a non-lossy picture from the state of your system, please + look at the +\family typewriter +flags +\family default + which are able to display cartesian combinations of more detailed internal + states. +\end_layout + +\end_inset + +: +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_deeper +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +NotJoined +\family default + (cf +\family typewriter +%get-disk{} +\family default +) No underlying disk device is configured. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +NotPresent +\family default + (cf +\family typewriter +%disk-present{} +\family default +) The underlying disk device (as configured, see +\family typewriter +marsadm view-get-disk +\family default +) does not exist or the device node is not accessible. + Therefore MARS cannot work. + Check that LVM or other software is properly configured and running. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +Detached +\family default + (cf +\family typewriter +InConsistent +\family default +, +\family typewriter +NeedsReplay +\family default +, +\family typewriter +%todo-attach{} +\family default +, +\family typewriter +%is-attach{} +\family default +) The underlying disk is willingly switched off (see +\family typewriter +marsadm detach +\family default +), and it actually is no longer opened by MARS. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +Detaching +\family default + (cf +\family typewriter +%todo-attach{} +\family default + and +\family typewriter +%is-attach{} +\family default +) Access to the underlying disk is switched off, but actually not yet +\family typewriter +close() +\family default +d by MARS. + This can happen for a long time on a primary when other secondaries are + accessing the disk remotely for syncing. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +DefectiveLog[ +\emph on +description-text +\emph default +] +\family default + (cf +\family typewriter +%replay-code{} +\family default +) Typicially this indicates an +\family typewriter +md5 +\family default + checksum error in a transaction logfile, or another (hardware / filesystem) + defect. + This occurs extremely rarely in practice, but has been observed more frequently + during a massive failure of air conditioning in a datacenter, when disk + temperatures raised to more than 80° Celsius. + Notice that a secondary +\series bold +refuses +\series default + to apply any knowingly defective logfile data to the disk. + Although this message is +\emph on +not directly +\emph default + referring to the underlying disk, it is mentioned here because of its superior + +\series bold +relevance +\series default + for the diskstate. + A damaged transaction logfile will always affect the +\emph on +actuality +\emph default + of the disk, but not its +\emph on +integrity +\emph default + (by itself). + What to do in such a case? +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_deeper +\begin_layout Enumerate +When the damage is only at one of your secondaries, you should first ensure + that the primary has a good logfile after a +\family typewriter +marsadm log-rotate +\family default +, then try +\family typewriter +marsadm invalidate +\family default + at the damaged secondary. + It is crucial that the primary has a fresh correct logfile behind the error + position, and that it is continuing to operate correctly. +\end_layout + +\begin_layout Enumerate +When +\emph on +all +\emph default + of your secondaries are reporting +\family typewriter +DefectiveLog +\family default +, the primary could have +\emph on +produced +\emph default + a damaged logfile (e.g. + in RAM, in a DMA channel, etc) while continuing to operate, and all of + your secondaries got that defective logfile. + After +\family typewriter +marsadm log-delete-all all +\family default +, you can check this by comparing the +\family typewriter +md5sum +\family default + of the first primary logfile (having the lowest serial number) with the + versions on your replicas. + The problem is that you don't know whether the primary side has a silent + corruption on any of its disks, or not. + You will need to take an operational decision whether to switchover to + a secondary via +\family typewriter +primary --force +\family default +, or whether to continue operation at the primary and +\family typewriter +invalidate +\family default + your secondaries. +\end_layout + +\begin_layout Enumerate +When the original primary is affected in a very bad way, such that it crashed + badly and afterwards even recovery of the +\emph on +primary +\emph default + is impossible +\begin_inset Foot +status open + +\begin_layout Plain Layout +In such a rare case, the +\emph on +original primary +\emph default + (but not any other host) +\series bold +refuses +\series default + to come up during recovery with +\emph on +his own +\emph default + logfile originally produced by +\emph on +himself +\emph default +. + This is not a bug, but saves you from incorrectly assuming that your original + primary disk were consistent - it is +\emph on +known +\emph default + to be inconsistent, but recovery is impossible due to the damaged logfile. + Thus +\emph on +this one +\emph default + replica is trapped by defective hardware. + The other replicas shouldn't. +\end_layout + +\end_inset + + due to this error (which typically occurs extremely rarely, observed two + times during 7 millions of operating hours on defective hardware), you + need to take an operational decision between the following alternatives: +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_deeper +\begin_layout Enumerate +switchover to a former secondary via +\family typewriter +primary --force +\family default +, producing a split brain, and producing some (typically small) data loss. + However, integrity is more important than actuality in such an extreme + case. +\end_layout + +\begin_layout Enumerate +deconstruction of the resource at +\emph on +all +\emph default + replicas via +\family typewriter +leave-resource --force +\family default +, running +\family typewriter +fsck +\family default + or similar tools by hand at the underlying disks, selecting the best replica + out of them, and finally re-constructing the resource again. +\end_layout + +\begin_layout Enumerate +restore your backup. +\end_layout + +\end_deeper +\end_deeper +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +Orphan +\family default + The secondary cannot replay data anymore, because it has been kicked out + for avoidance of emergency mode. + The data is not recent anymore. + Typically, +\family typewriter +marsadm invalidate +\family default + needs to be done. +\begin_inset Newline newline +\end_inset + +There is an execption: shortly after +\family typewriter +join-resource +\family default + or +\family typewriter +invalidate +\family default +, it may take some time until state +\family typewriter +Orphan +\family default + may be left, and until the newest logfile has appeared at your secondary + site (depending on the size of logfiles, and on your network). + In case of network problems, this may take very long. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + This state tells you that your replica is not current, and currently not + being updated at all. + Don't forget to +\series bold +monitor +\series default + for longer occurrences of this state! Otherwise you may get a big surprise + when you need a forceful emergency failover, but your replica is very old + or even does not really exist at all. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +NoAttach +\family default + (cf +\family typewriter +%is-attach{} +\family default +) The underlying disk is currently not opened by MARS. + Reasons may be that the kernel module is not loaded, or an exclusive +\family typewriter +open() +\family default + is currently not possible because somebody else has already opened it. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +InConsistent +\family default + (cf +\family typewriter +%is-consistent{} +\family default +) A logfile replay and/or sync is known to be needed / or to complete (e.g. + after +\family typewriter +invalidate +\family default + has started) in order to restore local consistency (for details, look at + +\family typewriter +flags +\family default +). +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Hint: in the current implementation of MARS, this will never happen on secondari +es during ordinary replay (but only when either sync has not yet finished, + or when the +\emph on +initial +\emph default + logfile replay after the sync has not yet finished), because the ordinary + logfile replay always maintains anytime consistency once a consistent state + had been reached. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + +\emph on +Only +\emph default + in case of a primary node crash, and +\emph on +only +\emph default + after attempts have failed to become primary again (e.g. + IO errors, etc), this +\emph on +can +\emph default + (but need not) mean that something went wrong. + Even in such an extremely unlikely event, chances are high that +\family typewriter +fsck +\family default +can fix any remaining problems (and, of course, you can also switchover + to a former secondary). +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +When this message appears, simply start MARS again (e.g. + +\family typewriter +modprobe mars; marsadm up all +\family default +), in whatever role you are intending. + This will +\emph on +automatically +\emph default + try to replay any necessary transaction logfile(s) in order to fix the + inconsistency. + Only if the automatic fix fails and this message persists for a long time + without progress, you +\emph on +might +\emph default + have a problem. + Typically, as observed at a large installation at 1&1, this happens extremely + rarely, and then typically indicates that your hardware is likely to be + defective. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +OutDated[FR] +\family default + (cf +\family typewriter +%work-reached{} +\family default +) Only at secondaries. + Tells whether it is +\emph on +currently known +\emph default + that the disk has any lag-behind when compared to the +\emph on +currently known +\emph default + state of the current designated primary (if there exists one). + Only meaningful if a current designated primary exists. + Notice that this kind of status display is subject to +\emph on +natural races +\emph default +, for example when new logfile data has been produced in parallel, or network + propagation is very slow. + Additional information is in brackets: +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_deeper +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +[F] +\family default + Fetch is known to be needed. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +[R] +\family default + Replay is known to be needed. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +[FR] +\family default + Both are known to be needed. +\end_layout + +\end_deeper +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +WriteBack[ +\emph on +amount +\emph default +] +\family default + (cf +\family typewriter +%is-primary{} +\family default + and amount via +\family typewriter +%writeback-rest{} +\family default +) Appears only at actual primaries (whether designated or not), when the + writeback from the RAM buffer is active (see section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-Transaction-Logger" + +\end_inset + +). + The +\emph on +amount +\emph default + is displayed in human readable form, and may be used for a very rough estimatio +n of recovery time after a primary crash. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +Recovery +\family default + (cf +\family typewriter +%todo-primary{} +\family default +) Appears only at the designated primary before it actually has become primary. + Similar to database recovery, this indicates the recovery phase after a + crash +\begin_inset Foot +status open + +\begin_layout Plain Layout +In some cases, +\family typewriter +primary --force +\family default + may also trigger this message. +\end_layout + +\end_inset + +. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +EmergencyMode +\family default + (cf +\family typewriter +%is-emergency{} +\family default +) A current designated primary exists, and it is known that this host has + entered emergency mode. + See section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Emergency-Mode" + +\end_inset + +. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +UpToDate +\family default + Displayed when none of the above has been detected. +\end_layout + +\end_deeper +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +diskstate-1and1 +\family default + A variant for internal use by 1&1 Internet AG. + See above note. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +replstate +\family default + Shows the status of the replication in the following order of precedence: +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_deeper +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +ModuleNotLoaded +\family default + (cf +\family typewriter +%is-module-loaded{} +\family default +) No kernel module is loaded, and as a consequence no +\family typewriter +/proc/sys/mars/ +\family default + does exist. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +UnResponsive +\family default + (cf +\family typewriter +%is-alive{%{host}} +\family default +) The main thread +\family typewriter +mars_light +\family default + did not do any noticable work for more than +\family typewriter +%{window} +\family default + (default 60) seconds. + Notice that this may happen when deleting +\emph on +extremely +\emph default + large logfiles (up to hundreds of gigabytes or terabytes). + If this happens for a +\emph on +very +\emph default + long time, you should check whether you might need a reboot in order to + fix the hang. + The time window may be changed by +\family typewriter +--window=$seconds +\family default +. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +NotJoined +\family default + (cf +\family typewriter +%get-disk{} +\family default +) No underlying disk device is configured for this resource. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +NotStarted +\family default + (cf +\family typewriter +%todo-attach{} +\family default +) Replication has not been started. +\end_layout + +\begin_layout Itemize +When the current host is designated as a primary, the rest of the precedence + list looks as follows: +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_deeper +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +EmergencyMode +\family default + (cf. + +\family typewriter +%is-emergency{} +\family default +) See section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Emergency-Mode" + +\end_inset + +. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +Replicating +\family default + (cf. + +\family typewriter +%is-primary{} +\family default +) Primary mode has been entered. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +NotYetPrimary +\family default +(catchall) This means the current host +\emph on +should +\emph default + act as a primary (see +\family typewriter +marsadm primary +\family default + or +\family typewriter +marsadm primary --force +\family default +), but currently doesn't (yet). + This happens during logfile replay, before primary mode is actually entered. + Notice that replay of very big logfiles may take a long time. +\end_layout + +\end_deeper +\begin_layout Itemize +When the current host is +\emph on +not +\emph default + designated as a primary: +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_deeper +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +PausedSync +\family default + (cf. + +\family typewriter +%sync-rest{} +\family default + and +\family typewriter +%todo-sync{} +\family default +) Some data needs to be synced, but sync is currently switched off. + See +\family typewriter +marsadm {pause,resume}-sync +\family default +. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +Syncing +\family default + (cf. + +\family typewriter +%is-sync{} +\family default +) Sync is currently running. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +PausedFetch +\family default + (cf. + +\family typewriter +%todo{fetch} +\family default +) Fetch is currently switched off. + See +\family typewriter +marsadm {pause,resume}-fetch +\family default +. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +PausedReplay +\family default + (cf. + +\family typewriter +%todo{replay} +\family default +) Replay is currently switched off. + See +\family typewriter +marsadm {pause,resume}-replay +\family default +. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +NoPrimaryDesignated +\family default + (cf. + +\family typewriter +%get-primary{} +\family default +) A +\family typewriter +secondary +\family default + command has been given somewhere in the cluster. + Thus no designated primary exists. + All resource members are in state +\family typewriter +Secondary +\family default + or try to approach it. + Sync and other operations are not possible. + This state is therefore not recommended. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +PrimaryUnreachable +\family default + (cf. + +\family typewriter +%is-alive{} +\family default +) A current designated primary has been set, but this host has not been + remotely updated for more than 60 seconds (see also +\family typewriter +--window=$seconds +\family default +). +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +Orphan +\family default + The secondary cannot replay data anymore, because it has been kicked out + for avoidance of emergency mode. + The data is not recent anymore. + Typically, +\family typewriter +marsadm invalidate +\family default + needs to be done. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +Replaying +\family default + (catchall) None of the previous conditions have triggered. +\end_layout + +\end_deeper +\end_deeper +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +replstate-1and1 +\family default + A variant for internal use by 1&1 Internet AG. + See above note. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +flags +\family default + For each of disk, consistency, attach, sync, fetch, and replay, show exactly + one character. + Each character is either a capital one, or the corresponding lowercase + one, or a dash. + The meaning is as follows: +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_deeper +\begin_layout Labeling +\labelwidthstring 00.00.0000 +disk/device: +\family typewriter +D +\family default + = the device +\family typewriter +/dev/mars/mydata +\family default + is present, +\family typewriter +d +\family default + = only the underlying disk +\family typewriter +/dev/lv-x/mydata +\family default + is present, +\family typewriter +- +\family default + = none present / configured. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +consistency: this relates to the +\emph on +underlying disk +\emph default +, not to +\family typewriter +/dev/mars/mydata +\family default +! +\family typewriter +C +\family default + = locally consistent, +\family typewriter +c +\family default + = maybe inconsistent (no guarantee), - = cannot determine. + Notice: this does not tell anything about +\emph on +actuality +\emph default +. + Notice: like the other flags, this flag is subject to races and therefore + should be relied on only in +\emph on +detached +\emph default + state! See also description of macro +\family typewriter +is-consistent +\family default + below. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +attach: +\family typewriter +A +\family default + = attached, +\family typewriter +a +\family default + = currently trying to attach/detach but not yet ready (intermediate state), + +\family typewriter +- +\family default + = attach is switched off. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +sync: +\family typewriter +S +\family default + = sync finished, +\family typewriter +s +\family default + = currently syncing, +\family typewriter +- +\family default + = sync is switched off. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +fetch: +\family typewriter +F +\family default + = according to knowlege, fetched logfiles are up-to-date, +\family typewriter +f +\family default + = currently fetching (some parts of) a logfile, +\family typewriter +- +\family default + = fetch is switched off. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +replay: +\family typewriter +R +\family default + = all fetched logfiles are replayed, +\family typewriter +r +\family default + = currently replaying, +\family typewriter +- +\family default + = replay is switched off. +\end_layout + +\end_deeper +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +flags-1and1 +\family default + A variant for internal use by 1&1 Internet AG. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +todo-role +\family default + Shows the +\emph on +designated +\emph default + state: +\family typewriter +None +\family default +, +\family typewriter +Primary +\family default + or +\family typewriter +Secondary +\family default +. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +role +\family default + Shows the +\emph on +actual +\emph default + state: +\family typewriter +None +\family default +, +\family typewriter +NotYetPrimary +\family default +, +\family typewriter +Primary +\family default +, +\family typewriter +RemainsPrimary +\family default +, or +\family typewriter +Secondary +\family default +. + Any differences to the designated state are indicated by a prefix to the + keyword +\family typewriter +Primary +\family default +: +\family typewriter +NotYet +\family default + means that it +\emph on +should +\emph default + become primary, but actually hasn't. + Vice versa, +\family typewriter +Remains +\family default + means that it +\emph on +should +\emph default + leave primary state in order to become secondary, but actually cannot do + that because the +\family typewriter +/dev/mars/mydata +\family default + device is currently in use . +\begin_inset Newline newline +\end_inset + + +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +%todo-primary{} == 0 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +%todo-primary{} == 1 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +%is-primary{} == 0 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +None +\family default + / +\family typewriter +Secondary +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +NotYetPrimary +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +%is-primary{} == 1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +RemainsPrimary +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +Primary +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +role-1and1 +\family default + A variant for internal use by 1&1 Internet AG. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +primarynode +\family default + Display +\family typewriter +(none) +\family default + or the hostname of the designated primary. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +primarynode-1and1 +\family default + A variant for internal use by 1&1 Internet AG. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +commstate +\family default + When the last metadata communication to the designated primary is longer + ago than +\family typewriter +${window} +\family default + (see also +\family typewriter +--window= +\emph on +seconds +\family default +\emph default + option), display that age in human readable form. + See also primitive macro +\family typewriter +%alive-age{} +\family default +. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +syncinfo +\family default + Shows an informational progress bar when sync is running. + Intended for humans. + Scripts should not rely on any details from this. + Scripts may use this only as an +\emph on +approximate +\emph default + means for detecting progress (when comparing the +\emph on +full +\emph default + output text to a prior version and finding +\emph on +any +\emph default + difference, they may conclude that some progress has happened, how small + whatsoever). +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +syncinfo-1and1 +\family default + A variant for internal use by 1&1 Internet AG. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +replinfo +\family default + Shows an informational progress bar when fetch is running. + This should not be used for scripting at all, because it contains realtime + information in human-readable form. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +replinfo-1and1 +\family default + A variant for internal use by 1&1 Internet AG. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +fetch-line +\family default + Additional details, called by +\family typewriter +replinfo +\family default +. + Shows the amount of data to be fetched, as well as the current transfer + rate and a very rough estimation of the future duration. + When primitive macros +\family typewriter +%fetch-age{} +\family default + or +\family typewriter +%fetch-lag{} +\family default + exceed +\family typewriter +${window} +\family default +, their values are also displayed for human informational purposes. + See description of these primitive macros. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +replay-line +\family default + Additional details, called by +\family typewriter +replinfo +\family default +. + Shows the amount of data to be replayed, as well as the current replay + rate and a very rough estimation of the future duration. + When primitive macro +\family typewriter +%replay-age{} +\family default + exceeds +\family typewriter +${window} +\family default +, it is also displayed for human informational purposes. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +comminfo +\family default + When the network communication is in an unusual condition, display it. + Otherwise, don't produce any output. +\end_layout + +\begin_layout Subsection +Predefined Primitive Macros +\begin_inset CommandInset label +LatexCommand label +name "subsec:Predefined-Trivial-Macros" + +\end_inset + + +\end_layout + +\begin_layout Subsubsection +Intended for Humans +\end_layout + +\begin_layout Standard +In the following, shell glob notation +\family typewriter +{a,b} +\family default + is used to document similar variants of similar macros in a single place. + When you actually call the macro, you must choose one of the possible variants + (excluding the braces). +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +the-err-msg +\family default + Show reported errors for a resource. + When the resource argument is missing or empty, show global error information. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +all-err-msg +\family default + Like before, but show all information including those which are +\family typewriter +OK +\family default +. + This way, you get a list +\begin_inset Foot +status open + +\begin_layout Plain Layout +The list may be extended in future versions of MARS. +\end_layout + +\end_inset + + of +\emph on +all +\emph default + potential error information present in the system. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{all,the}-wrn-msg +\family default + Show all / reported warnings in the system. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{all,the}-inf-msg +\family default + Show all / reported informational messages in the system. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{all,the}-msg +\family default + Show all / reported messages regardless of its classification. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{all,the}-global-msg +\family default + Show global messages not associated with any resource (the resource argument + of the +\family typewriter +marsadm +\family default + command is ignored in this case). +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{all,the}-global-{inf,wrn,err}-msg +\family default + Dito, but more specific. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{all,the}-pretty-{global-,}{inf-,wrn-,err-,}msg +\family default + Dito, but show numerical timestamps in a human readable form. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{all,the}-{global-,}{inf-,wrn-,err-,}count +\family default + Instead of showing the messages, show their count (number of lines). +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +errno-text +\family default + This macro takes 1 argument, which must represent a Linux +\family typewriter +errno +\family default + number, and converts it to human readable form (similar to the C +\family typewriter +strerror() +\family default + function). +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +todo-{attach,sync,fetch,replay,primary} +\family default + Shows a boolean value (0 or 1) indicating the current state of the correspondin +g todo switch (whether on or off). + The meaning of todo switches is illustrated in section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-State-of" + +\end_inset + +. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +get-resource-{fat,err,wrn} +\family default + Access to the internal error status files. + This is not an official interface and may thus change at any time without + notice. + Use this only for human inspection, not for scripting! +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + These macros, as well as the error status files, are likely to disappear + in future versions of MARS. + They should be used for debugging only. + At least when merging into the upstream Linux kernel, only the +\family typewriter +*-msg +\family default + macros will likely survive. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +get-resource-{fat,err,wrn}-count +\family default + Dito, but get the number of lines instead of the text. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +replay-code +\family default + Indicate the current state of logfile replay / recovery: +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_deeper +\begin_layout Labeling +\labelwidthstring 00.00.0000 +(empty) Unknown. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +0 No replay is currently running. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +1 Replay is currently running. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +2 Replay has successfully stopped. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +<0 See Linux +\family typewriter +errno +\family default + code. + Typically this indicates a damaged logfile, or another filesystem error + at +\family typewriter +/mars +\family default +. +\end_layout + +\end_deeper +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +is-{attach,sync,fetch,replay,primary,module-loaded} +\family default + Shows a boolean value (0 or 1) indicating the +\emph on +actual +\emph default + state, whether the corresponding action has been actually carried out, + or not (yet). + Notice that the values indicated by +\family typewriter +is-* +\family default + may differ from the +\family typewriter +todo-* +\family default + values when something is not (yet) working. + More explanations can be found in section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-State-of" + +\end_inset + +. + +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +is-split-brain +\family default + Shows whether split brain (see section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Split-Brain-Resolution" + +\end_inset + +) has been detected, or not. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +is-consistent +\family default + Shows whether the +\emph on +underlying disk +\emph default + is in a locally consistent state, i.e. + whether it +\emph on +could +\emph default + be (potentially) detached and then used for read-only test-mounting +\begin_inset Foot +status open + +\begin_layout Plain Layout +Notice that the +\emph on +writeback +\emph default + at the primary side is out-of-order by default, for performance reasons. + Therefore, the underlying disk is only guaranteed to be consistent when + there is no data left to be written back. + Notice that this condition is racy by construction. + When your primary node crashes during writeback and then comes up again, + you must do a +\family typewriter +modprobe mars +\family default + first in order to automatically replay the transaction logfiles, which + will automatically heal such temporary inconsistencies. +\end_layout + +\end_inset + +. + Don't confuse this with the consistency of +\family typewriter +/dev/mars/mydata +\family default +, which is by construction +\emph on +always +\emph default + locally consistent once it has appeared +\begin_inset Foot +status open + +\begin_layout Plain Layout +Exceptions are possible when using +\family typewriter +marsadm fake-sync +\family default +. + Even in split brain situations, +\family typewriter +marsadm primary --force +\family default + tries to prevent any further potential exception as best as it can, by + not letting +\family typewriter +/dev/mars/mydata +\family default + to appear and by insisting on split brain resolution first. + In future implementations, this might change if more pressure is put on + the developer to sacrifice consistency in preference to not waiting for + a full logfile replay. +\end_layout + +\end_inset + +. + By construction of MARS, the disk of secondaries will +\emph on +always +\emph default + remain in a locally consistent state once the initial sync has finished + as well as the initial logfile replay. + Notice that local consistency does not necessarily imply actuality (see + high-level explanation in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Behaviour-of-MARS" + +\end_inset + +). +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +is-emergency +\family default + Shows whether emergency mode (see section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Emergency-Mode" + +\end_inset + +) has been entered for the named resource, or not. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +rest-space +\family default + (global, no resource argument necessary) Shows the +\emph on +logically +\emph default + available space in +\family typewriter +/mars/ +\family default +, which may deviate from the physically available space as indicated by + the +\family typewriter +df +\family default + command. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +get-{disk,device} +\family default + Show the name of the underlying disk, or of the +\family typewriter +/dev/mars/mydata +\family default + device (if it is available). +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{disk,device}-present +\family default + Show (as a boolean value) whether the underlying disk, or the +\family typewriter +/dev/mars/mydata +\family default + device, is available. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +device-opened +\family default + Show (as a number) how often +\family typewriter +/dev/mars/mydata +\family default + has been actually openend, e.g. + by +\family typewriter +mount +\family default + or by some processes like +\family typewriter +dd +\family default +, or by iSCSI, etc. +\end_layout + +\begin_layout Subsubsection +Intended for Scripting +\end_layout + +\begin_layout Standard +While complex macros may output a whole bunch of information, the following + primitive macros are outputting exactly one value. + They are intended for script use (cf. + section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Scripting-HOWTO" + +\end_inset + +). + Of course, curious humans may also try them :) +\end_layout + +\begin_layout Standard +In the following, shell glob notation +\family typewriter +{a,b} +\family default + is used to document similar variants of similar macros in a single place. + When you actually call the macro, you must choose one of the possible variants + (excluding the braces). +\end_layout + +\begin_layout Paragraph +Name Querying +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +cluster-members +\family default + Show a newline-separated list of all host names participating in the cluster. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +resource-members +\family default + Show a newline-separated list of all host names participating in the particular + resource +\family typewriter +%{res} +\family default +. + Notice that this may be a subset of +\family typewriter +%cluster-members{} +\family default +. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{my,all}-resources +\family default + Show a newline-separated list of either all resource names existing in + the cluster, or only those where the current host +\family typewriter +%{host} +\family default + is member. + Optionally, you may specify the hostname as a parameter, e.g. + +\family typewriter +%my-resources{ +\emph on +otherhost +\emph default +} +\family default +. +\end_layout + +\begin_layout Paragraph +Amounts of Data Inquiry +\end_layout + +\begin_layout Standard +\begin_inset Float figure +placement h +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset Graphics + filename images/fetch-replay-total.fig + width 80col% + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +overview on amounts / cursors +\begin_inset CommandInset label +LatexCommand label +name "fig:overview-on-amounts" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +The following macros are meaningful for both primary and secondary nodes: +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +deletable-size +\family default + Show the total amount of +\emph on +locally present +\emph default + logfile data which +\emph on +could +\emph default + be deleted by +\family typewriter +marsadm log-delete-all mydata +\family default +. + This differs almost always from both +\family typewriter +replay-pos +\family default + and +\family typewriter +occupied-size +\family default + due to granularity reasons (only whole logfiles can be deleted). + Units are +\emph on +bytes +\emph default +, not kilobytes. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +occupied-size +\family default + Show the total amount of +\emph on +locally present +\emph default + logfile data (sum of all file sizes). + This is often roughly approximate to +\family typewriter +fetch-pos +\family default +, but it may differ vastly (in both directions) when logfiles are not completely + transferred, when some are damaged, during split brain, after a +\family typewriter +join-resource +\family default + / +\family typewriter +invalidate +\family default +, or when the resource is in emergency mode (see section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Emergency-Mode" + +\end_inset + +). +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +disk-size +\family default + Show the size of the underlying local disk in bytes. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +resource-size +\family default + Show the logical size of the resource in bytes. + When this value is lower than +\family typewriter +disk-size +\family default +, you are wasting space. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +device-size +\family default + At a primary node, this may differ from +\family typewriter +resource-size +\family default + only for a very short time during the +\family typewriter +resize +\family default + operation. + At secondaries, there will be no difference. +\end_layout + +\begin_layout Standard +\noindent +The following macros are only meaningful for resources in primary mode: +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +writeback-rest +\family default + Show the amount of data which is already in the transaction logfile, but + has not yet been written back to the underlying disk. + This may be used for estimation of recovery time after a potential primary + crash. + The writeback buffer is explained by the graphics at +\begin_inset CommandInset ref +LatexCommand vref +reference "sec:The-Transaction-Logger" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. +\end_layout + +\begin_layout Standard +\noindent +The following macros are only meaningful for resources in secondary mode. + By information theoretic limits, they can only tell what is +\emph on +locally known +\emph default +. + They +\series bold +cannot +\series default + reflect the +\begin_inset Quotes eld +\end_inset + +true (global) state +\begin_inset Foot +status open + +\begin_layout Plain Layout +Notice that according to Einstein's law, and according to observations by + Lamport, the concept of +\begin_inset Quotes eld +\end_inset + +true state +\begin_inset Quotes erd +\end_inset + + does not exist at all in a distributed system. + Anything you can know in a distributed system is always local knowlege, + which races with other (remote) knowlege, and may be outdated at +\emph on +any +\emph default + time. +\end_layout + +\end_inset + + +\begin_inset Quotes erd +\end_inset + + of a cluster, in particular during network partitions. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{sync,fetch,replay,work}-size +\family default + Show the total amount of data which is / was to be processed by either + sync, fetch, or replay. + +\family typewriter +work-size +\family default + is equivalent to +\family typewriter +fetch-size +\family default +. + +\family typewriter +replay-size +\family default + is equivalent to +\family typewriter +fetch-pos +\family default + (see below). + Units are +\emph on +bytes +\emph default +, not kilobytes. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{sync,fetch,replay,work}-pos +\family default + Show the total amount of data which is already processed (current +\begin_inset Quotes eld +\end_inset + +cursor +\begin_inset Quotes erd +\end_inset + + position). + +\family typewriter +work-pos +\family default + is equivalent to +\family typewriter +replay-pos +\family default +. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +The 0% point is the +\emph on +locally contiguous +\emph default + amount of data since the last +\family typewriter +create-resource +\family default +, +\family typewriter +join-resource +\family default +, or +\family typewriter +invalidate +\family default +, or since the last emergency mode, but possibly shortened by +\family typewriter +log-delete +\family default +s. + Notice that the 0% point may be different on different cluster nodes, because + their resource history may be different or non-contiguous during split + brain, or after a +\family typewriter +join-resource +\family default +, or after +\family typewriter +invalidate +\family default +, or during / after emergency mode. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{sync,fetch,replay,work}-rest +\family default + Shows the difference between +\family typewriter +*-size +\family default + and +\family typewriter +*-pos +\family default + (amount of work to do). + +\family typewriter +work-rest +\family default + is therefore the difference between +\family typewriter +fetch-size +\family default + and +\family typewriter +replay-pos +\family default +, which is the +\emph on +total +\emph default + amount of work to do (regardless whether to be fetched and/or to be replayed). +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{sync,fetch,replay,work}-reached +\family default + Boolean value indicating whether +\family typewriter +*-rest +\family default + dropped down to zero +\begin_inset Foot +status open + +\begin_layout Plain Layout +Recall from chapter +\begin_inset CommandInset ref +LatexCommand ref +reference "chap:Use-Cases-for" + +\end_inset + + that MARS (in its current stage of development) does only guarantee local + consistency, but cannot guarantee actuality in all imaginable situations. + Notice that a general notion of +\begin_inset Quotes eld +\end_inset + +actuality +\begin_inset Quotes erd +\end_inset + + is +\emph on +undefinable +\emph default + in a widely distributed system at all, according to Einstein's laws. +\end_layout + +\begin_layout Plain Layout +Let's look at an example. + In case of a node crash, and after the node is up again, a +\family typewriter +modprobe mars +\family default + has to occur, in order to replay the transaction logs of MARS again. + However, at the recovery phase before, the journalling +\family typewriter +ext4 +\family default + filesystem +\family typewriter +/mars/ +\family default + +\emph on +may +\emph default + have rolled back some internal symlink updates which have occurred immediately + before the crash. + MARS is relying on the fact that journalling filesystems like +\family typewriter +ext4 +\family default + should do their recovery in a consistent way, possibly by sacrifycing actuality + a little bit. + Therefore, the above macros cannot guarantee to deliver true information + about what is persisted at the moment. +\end_layout + +\begin_layout Plain Layout +Notice that there are further potential caveats. +\end_layout + +\begin_layout Plain Layout +In case of +\family typewriter +{sync,fetch}-reached +\family default +, MARS uses +\family typewriter +bio +\family default + callbacks resp. + +\family typewriter +fdatasync() +\family default + by default, thus the underlying storage layer has +\emph on +told +\emph default + us that it +\emph on +believes +\emph default + it has commited the data in a reboot-safe way. + Whether this is +\emph on +really +\emph default + true does not depend on MARS, but on the lower layers of the storage hierarchy. + There exists hardware where this claim is known to be wrong under certain + circumstances, such as certain hard disk drives in certain modes of operation. + Please check the hardware for any violations of storage semantics under + certain circumstances such as power loss, and check information sources + like magazines about the problem area. + Please notice that such a problem, if it exists at all, is independent + from MARS. + It would also exist if you wouldn't use MARS on the same system. +\end_layout + +\end_inset + +. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{fetch,replay,work}-threshold-reached +\family default + Boolean value indicating whether +\family typewriter +*-rest +\family default + dropped down to +\family typewriter +%{threshold} +\family default +, which is pre-settable by the +\family typewriter +--threshold= +\emph on +size +\family default +\emph default + command line option (default is 10 MiB). + In asynchronous use cases of MARS, this should be preferred over +\family typewriter +*-reached +\family default + for +\emph on +human display +\emph default +, because it produces less flickering by the inevitable replication delay. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{fetch,replay,work}-almost-reached +\family default + Boolean value indicating whether +\family typewriter +*-rest +\family default + +\emph on +almost +\emph default + / +\emph on +approximately +\emph default + dropped down to zero. + The default is that at lease 990 permille are reached. + In asynchronous use cases of MARS, this can be preferred over +\family typewriter +*-reached +\family default + for +\emph on +human display +\emph default + only, because it produces less flickering by the inevitable replication + delay. + However, don't base any decisions on this! +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{sync,fetch,replay,work}-percent +\family default + The cursor position +\family typewriter +*-pos +\family default + as a percentage of +\family typewriter +*-size +\family default +. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{sync,fetch,replay,work}-permille +\family default + The cursor position +\family typewriter +*-pos +\family default + as permille of +\family typewriter +*-size +\family default +. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{sync,fetch,replay,work}-rate +\family default + Show the current throughput in bytes +\begin_inset Foot +status open + +\begin_layout Plain Layout +Notice that the internal granularity reported by the kernel may be coarser, + such as KiB. + This interfaces abstracts away from kernel internals and thus presents + everything in byte units. +\end_layout + +\end_inset + + per second. + +\family typewriter +work-rate +\family default + is the +\emph on +maximum +\emph default + of +\family typewriter +fetch-rate +\family default + and +\family typewriter +replay-rate +\family default +. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{sync,fetch,replay,work}-remain +\family default + Show the +\emph on +estimated +\emph default + remaining time for completion of the respective operation. + This is just a very raw guess. + Units are seconds. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +summary-vector +\family default + Show the colon-separated CSV value +\family typewriter +%replay-pos{}:%fetch-pos{}:%fetch-size{} +\family default +. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +replay-basenr +\family default +Get currently first reachable logfile number (see figure +\begin_inset CommandInset ref +LatexCommand vref +reference "fig:overview-on-amounts" + +\end_inset + +). + Only for curious humans or for debugging / monitoring - don't base any + decisions on this. + Use the +\family typewriter +*-{pos,size} +\family default + macros instead. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{replay,fetch,work}-lognr +\family default +Get current logfile number of replay or fetch position, or of the currently + known last reachable number (see figure +\begin_inset CommandInset ref +LatexCommand vref +reference "fig:overview-on-amounts" + +\end_inset + +). + Only for curious humans or for debugging / monitoring - don't base any + decisions on this. + Use the +\family typewriter +*-{pos,size} +\family default + macros instead. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{replay,fetch,work}-logcount +\family default +Get current number of logfiles which are already replayed, or are already + fetched, or are to be applied in total (see figure +\begin_inset CommandInset ref +LatexCommand vref +reference "fig:overview-on-amounts" + +\end_inset + +). + Only for curious humans or for debugging / monitoring - don't base any + decisions on this. + Use the +\family typewriter +*-{rest} +\family default + macros instead. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +alive-timestamp +\family default + Tell the Lamport Unix timestamp (seconds since 1970) of the last metadata + communication to the designated primary (or to any other host given by + the first argument). + Returns +\begin_inset Formula $-1$ +\end_inset + + if no such host exists. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{fetch,replay,work}-timestamp +\family default + Tell the Lamport Unix timestamp (seconds since 1970) when the last progress + has been made. + When no such action exists, +\begin_inset Formula $-1$ +\end_inset + + is returned. + +\family typewriter +%work-timestamp{ +\emph on +hostname +\emph default +} +\family default + is the maximum of +\family typewriter +%fetch-timestamp{ +\emph on +hostname +\emph default +} +\family default + and +\family typewriter +%replay-timestamp{ +\emph on +hostname +\emph default +} +\family default +. + When the parameter +\family typewriter +\emph on +hostname +\family default +\emph default + is empty, the local host will be reported (default). + Example usage: +\family typewriter +marsadm view all --macro= +\begin_inset Quotes erd +\end_inset + +%replay-timestamp{%todo-primary{}} +\begin_inset Quotes erd +\end_inset + + +\family default + shows the timestamp of the last reported +\begin_inset Foot +status open + +\begin_layout Plain Layout +Updates of this information are occurring with lower frequency than actual + writebacks, for performance reasons. + The metadata network update protocol will add further delays. + Therefore, the accuracy is only in the range of minutes. +\end_layout + +\end_inset + + writeback action at the designated primary. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{alive,fetch,replay,work}-age +\family default + Tell the number of seconds since the last respective action, or +\begin_inset Formula $-1$ +\end_inset + + if none exists. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +{alive,fetch,replay,work}-lag +\family default + Report the time difference (in seconds) between the last +\emph on +known +\emph default + action at the local host and at the designated primary (or between any + other hosts when 2 parameters are given). + Returns +\begin_inset Formula $-1$ +\end_inset + + if no such action exists at any of the two hosts. + Attention! This need not reflect the +\emph on +actual +\emph default + state in case of networking problems. + Don't draw wrong conclusions from a high +\family typewriter +{fetch,replay}-lag +\family default + value: it could also mean that simply no write operation at all has occurred + at the primary side for a long time. + Conversely, a low lag value does not imply that the replication is recent: + it may refer to +\emph on +different +\emph default + write operations at each of the hosts; therefore it only tells that +\emph on +some +\emph default + progress has been made, but says nothing about the amount of the progress. +\end_layout + +\begin_layout Paragraph +Misc Informational Status +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +get-primary +\family default + Return the name of the current designated primary node as locally known. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +actual-primary +\family default + (deprecated) try to determine the name of the node which +\emph on +appears +\emph default + to be the actual primary. + This only a +\series bold +\emph on +guess +\series default +\emph default +, because it is not generally unique in split brain situations! Don't use + this macro. + Instead, use +\family typewriter +is-primary +\family default + on those nodes you are interested in. + The explanations from section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-State-of" + +\end_inset + + also apply to +\family typewriter +get-primary +\family default + versus +\family typewriter +actual-primary +\family default + analogously. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +is-alive +\family default + Boolean value indicating whether all other nodes participating in +\family typewriter +mydata +\family default + are reachable / healthy. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +uuid +\family default + (global) Show the unique identifier created by +\family typewriter +create-cluster +\family default + or by +\family typewriter +create-uuid +\family default +. + Hint: this is immutable, and it is firmly bound to the +\family typewriter +/mars/ +\family default + filesystem. + It can only be destroyed by deleting the whole filesystem (see section + +\begin_inset CommandInset ref +LatexCommand ref +reference "leave-cluster" + +\end_inset + +). +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +tree +\family default + (global) Indicate symlink tree version (see section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-Symlink-Tree" + +\end_inset + +). +\end_layout + +\begin_layout Paragraph +Experts Only +\end_layout + +\begin_layout Standard +The following is for hackers who know what they are doing. + The following is not officially supported. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +wait-{is,todo}-{attach,sync,fetch,replay,primary}-{on,off} +\family default + This may be used to program some useful waiting conditions in advanced + macro scripts. + Use at your own risk! +\end_layout + +\begin_layout Section +Creating your own Macros +\begin_inset CommandInset label +LatexCommand label +name "subsec:Creating-your-own" + +\end_inset + + +\end_layout + +\begin_layout Standard +In order to create your own macros, you could start writing them from scratch + with your favorite ASCII text editor. + However, it is much easier to take an existing macro and to customize it + to your needs. + In addition, you can learn something about macro programming by looking + at the existing macro code. +\end_layout + +\begin_layout Standard +Go to a new empty directory and say +\end_layout + +\begin_layout Itemize + +\family typewriter +marsadm dump-macros +\end_layout + +\begin_layout Standard +in order to get the most interesting complex macros, or say +\end_layout + +\begin_layout Itemize + +\family typewriter +marsadm dump-all-macros +\end_layout + +\begin_layout Standard +in order to additionally get some primitive macros which could be customized + if needed. + This will write lots of files +\family typewriter +*.tpl +\family default + into your current working directory. +\end_layout + +\begin_layout Standard +Any modfied or new macro file should be placed either into the current working + directory +\family typewriter +./ +\family default + , or into +\family typewriter +$HOME/.marsadm/ +\family default + , or into +\family typewriter +/etc/marsadm/ +\family default + . + They will be searched in this order, and the first match will win. + When no macro file is found, the built-in version will be used if it exists. + This way, you may override builtin macros. +\end_layout + +\begin_layout Standard +Example: if you have a file +\family typewriter +./mymacro.tpl +\family default + you just need to say +\family typewriter +marsadm view-mymacro mydata +\family default + in order to invoke it in the resource context +\family typewriter +mydata +\family default +. +\end_layout + +\begin_layout Subsection +General Macro Syntax +\end_layout + +\begin_layout Standard +Macros are simple ASCII text, enriched with calls to other macros. +\end_layout + +\begin_layout Standard +ASCII text outside of comments are copied to the output verbatim. + Comments are skipped. + Comments may have one of the following well-known forms: +\end_layout + +\begin_layout Itemize + +\family typewriter +# skipped text until / including next newline character +\end_layout + +\begin_layout Itemize + +\family typewriter +// skipped text until / including next newline character +\end_layout + +\begin_layout Itemize + +\family typewriter +/* skipped text including any newline characters */ +\end_layout + +\begin_layout Itemize +denoted as Perl regex: +\family typewriter + +\backslash + +\backslash + +\backslash +n +\backslash +s* +\family default +(single backslash directly followed by a newline character, and eating up + any whitespace characters at the beginning of the next line) Hint: this + may be fruitfully used to structure macros in a more readable form / indentatio +n. +\end_layout + +\begin_layout Standard +Special characters are always initiated by a backslash. + The following pre-defined special character sequences are recognized: +\end_layout + +\begin_layout Itemize + +\family typewriter + +\backslash +n +\family default + newline +\end_layout + +\begin_layout Itemize + +\family typewriter + +\backslash +r +\family default + return (useful for DOS compatibility) +\end_layout + +\begin_layout Itemize + +\family typewriter + +\backslash +t +\family default + tab +\end_layout + +\begin_layout Itemize + +\family typewriter + +\backslash +f +\family default + formfeed +\end_layout + +\begin_layout Itemize + +\family typewriter + +\backslash +b +\family default + backspace +\end_layout + +\begin_layout Itemize + +\family typewriter + +\backslash +a +\family default + alarm (bell) +\end_layout + +\begin_layout Itemize + +\family typewriter + +\backslash +e +\family default + escape (e.g. + for generating ANSI escape sequences) +\end_layout + +\begin_layout Itemize + +\family typewriter + +\backslash + +\family default + followed by anything else: assure that the next character is taken verbatim. + Although possible, please don't use this for escaping letters, because + further escape sequences might be pre-defined in future. + Best practice is to use this only for escaping the backslash itself, or + for escaping the percent sign when you don't want to call a macro (protect + against evaluation), or to escape a brace directly after a macro call (verbatim + brace not to be interpreted as a macro parameter). +\end_layout + +\begin_layout Itemize +All other characters stand for their own. + If you like, you should be able to produce XML, HTML, JSON and other ASCII-base +d output formats this way. +\end_layout + +\begin_layout Standard +Macro calls have the following syntax: +\end_layout + +\begin_layout Itemize + +\family typewriter +% +\emph on +macroname +\emph default +{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +}{ +\emph on +argn +\emph default +} +\end_layout + +\begin_layout Itemize +Of course, arguments may be empty, denoted as +\family typewriter +{} +\end_layout + +\begin_layout Itemize +It is possible to supply more arguments than required. + These are simply ignored. +\end_layout + +\begin_layout Itemize +There must be always at least 1 argument, even for parameterless macros. + In such a case, it is good style to leave it empty (even if it is actually + ignored). + Just write +\family typewriter +%parameterlessmacro{} +\family default + in such a case. +\end_layout + +\begin_layout Itemize + +\family typewriter +%{ +\emph on +varname +\emph default +} +\family default + syntax: As a special case, the macro name may be empty, but then the first + argument must denote a previously defined variable (such as assigned via + +\family typewriter +%let{varname}{myvalue} +\family default +, or a pre-defined standard variable like +\family typewriter +%{res} +\family default + for the current resource name, see later paragraph +\begin_inset CommandInset ref +LatexCommand ref +reference "par:Predefined-Variables" + +\end_inset + +). +\end_layout + +\begin_layout Itemize +Of course, parameter calls may be (almost) arbitrarily nested. +\end_layout + +\begin_layout Itemize +Of course, the +\emph on +correctness +\emph default + of nesting of braces must be generally obeyed, as usual in any other macro + processor language. + General rule: for each opening brace, there must be exactly one closing + brace somewhere afterwards. +\end_layout + +\begin_layout Standard +These rules are hopefully simple and intuitive. + There are currently no exceptions. + In particular, there is no special infix operator syntax for arithmetic + expressions, and therefore no operator precedence rules are necessary. + You have to write nested arithmetic expressions always in the above prefix + syntax, like +\family typewriter +%*{7}{%+{2}{3}} +\family default + (similar to non-inverse polish notation). +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +When deeply nesting macros and their braces, you may easily find yourself + in a feeling like in the good old days of Lisp. + Use the above backslash-newline syntax to indent your macros in a readable + and structured way. + Fortunately, modern text editors like (x)emacs or vim have modes for dealing + with the correctness of nested braces. +\end_layout + +\begin_layout Subsection +Calling Builtin / Primitive Macros +\end_layout + +\begin_layout Standard +Primitive macros can be called in two alternate forms: +\end_layout + +\begin_layout Itemize + +\family typewriter +%primitive- +\emph on +macroname +\emph default +{ +\emph on +something +\emph default +} +\end_layout + +\begin_layout Itemize + +\family typewriter +% +\emph on +macroname +\emph default +{ +\emph on +something +\emph default +} +\end_layout + +\begin_layout Standard +When using the +\family typewriter +%primitive-*{} +\family default + form, you +\emph on +explicitly disallow +\emph default + interception of the call by a +\family typewriter +*.tpl +\family default + file. + Otherwise, you may override the standard definition even of primitive macros + by your own template files. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Notice that +\family typewriter +%call{} +\family default + conventions are used in such a case. + The parameters are passed via +\family typewriter +%{0} +\family default + +\begin_inset Formula $\ldots$ +\end_inset + + +\family typewriter +%{n} +\family default + variables (see description below). +\end_layout + +\begin_layout Paragraph +Standard MARS State Inspection Macros +\end_layout + +\begin_layout Standard +These are already described in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Predefined-Trivial-Macros" + +\end_inset + +. + When calling one of them, the call will simply expand to the corresponding + value. +\end_layout + +\begin_layout Standard +Example: +\family typewriter +%get-primary{} +\family default + will expand to the hostname of the current designated primary node. +\end_layout + +\begin_layout Paragraph +Further MARS State Inspection Macros +\end_layout + +\begin_layout Paragraph +Variable Access Macros +\end_layout + +\begin_layout Itemize + +\family typewriter +%let{ +\emph on +varname +\emph default +}{ +\emph on +expression +\emph default +} +\family default +Evaluates both +\family typewriter +\emph on +varname +\family default +\emph default + and the +\family typewriter +\emph on +expression +\family default +\emph default +. + The +\family typewriter +\emph on +expression +\family default +\emph default + is then assigned to +\family typewriter +varname +\family default +. +\end_layout + +\begin_layout Itemize + +\family typewriter +%let{ +\emph on +varname +\emph default +}{ +\emph on +expression +\emph default +} +\family default +Evaluates both +\family typewriter +\emph on +varname +\family default +\emph default + and the +\family typewriter +\emph on +expression +\family default +\emph default +. + The +\family typewriter +\emph on +expression +\family default +\emph default + is then appended to +\family typewriter +varname +\family default + (concatenation). +\end_layout + +\begin_layout Itemize + +\family typewriter +%{ +\emph on +varname +\emph default +} +\family default +Evaluates +\family typewriter +\emph on +varname +\family default +\emph default +, and outputs the value of the corresponding variable. + When the variable does not exist, the empty string is returned. +\end_layout + +\begin_layout Itemize + +\family typewriter +%{++}{ +\emph on +varname +\emph default +} +\family default +or +\family typewriter +%{ +\emph on +varname +\emph default +}{++} +\family default + Has the obvious well-known side effect e.g. + from C or Java. + You may also use +\family typewriter +-- +\family default + instead of +\family typewriter +++ +\family default +. + This is handy for programming loops (see below). +\end_layout + +\begin_layout Itemize + +\family typewriter +%dump-vars{} +\family default +Writes all currently defined variables (from the currently active scope) + to +\family typewriter +stderr +\family default +. + This is handy for debugging. +\end_layout + +\begin_layout Paragraph +CSV Array Macros +\end_layout + +\begin_layout Itemize + +\family typewriter +%{ +\emph on +varname +\emph default +}{ +\emph on +delimiter +\emph default +}{ +\emph on +index +\emph default +} +\family default +Evaluates all arguments. + The contents of +\family typewriter +\emph on +varname +\family default +\emph default + is interpreted as a comma-separated list, delimited by +\family typewriter +\emph on +delimiter +\family default +\emph default +. + The +\family typewriter +\emph on +index +\family default +\emph default +'th list element is returned. +\end_layout + +\begin_layout Itemize + +\family typewriter +%set{ +\emph on +varname +\emph default +}{ +\emph on +delimiter +\emph default +}{ +\emph on +index +\emph default +}{ +\emph on +expression +\emph default +} +\family default +Evaluates all arguments. + The contents of the old +\family typewriter +\emph on +varname +\family default +\emph default + is interpreted as a comma-separated list, delimited by +\family typewriter +\emph on +delimiter +\family default +\emph default +. + The +\family typewriter +\emph on +index +\family default +\emph default +'th list element is the assigend to, or substituted by, +\family typewriter +\emph on +expression +\family default +\emph default +. +\end_layout + +\begin_layout Paragraph +Arithmetic Expression Macros +\end_layout + +\begin_layout Standard +The following macros can also take more than two arguments, carrying out + the corresponding arithmetic operation in sequence (it depends on the operator + whether this accords to the associative law). +\end_layout + +\begin_layout Itemize + +\family typewriter +%+{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Evaluates the arguments, inteprets them as numbers, and adds them together. +\end_layout + +\begin_layout Itemize + +\family typewriter +%-{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Subtraction. +\end_layout + +\begin_layout Itemize + +\family typewriter +%*{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Multiplication. +\end_layout + +\begin_layout Itemize + +\family typewriter +%/{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Division. +\end_layout + +\begin_layout Itemize + +\family typewriter +%%{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Modulus. +\end_layout + +\begin_layout Itemize + +\family typewriter +%&{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Bitwise Binary And. +\end_layout + +\begin_layout Itemize + +\family typewriter +%|{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Bitwise Binary Or. +\end_layout + +\begin_layout Itemize + +\family typewriter +%^{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Bitwise Binary Exclusive Or. +\end_layout + +\begin_layout Itemize + +\family typewriter +%<<{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Binary Shift Left. +\end_layout + +\begin_layout Itemize + +\family typewriter +%>>{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Binary Shift Right. +\end_layout + +\begin_layout Itemize + +\family typewriter +%min{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Compute the arithmetic minimum of the arguments. +\end_layout + +\begin_layout Itemize + +\family typewriter +%max{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Compute the arithmetic maximum of the arguments. +\end_layout + +\begin_layout Paragraph +Boolean Condition Macros +\end_layout + +\begin_layout Itemize + +\family typewriter +%=={ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Numeral Equality. +\end_layout + +\begin_layout Itemize + +\family typewriter +%!={ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Numeral Inequality. +\end_layout + +\begin_layout Itemize + +\family typewriter +%<{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Numeral Less Then. +\end_layout + +\begin_layout Itemize + +\family typewriter +%<={ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Numeral Less or Equal. +\end_layout + +\begin_layout Itemize + +\family typewriter +%>{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Numeral Greater Then. +\end_layout + +\begin_layout Itemize + +\family typewriter +%>={ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Numeral Greater or Equal. +\end_layout + +\begin_layout Itemize + +\family typewriter +%eq{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default + +\begin_inset space ~ +\end_inset + +String Equality. +\end_layout + +\begin_layout Itemize + +\family typewriter +%ne{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +String Inequality. +\end_layout + +\begin_layout Itemize + +\family typewriter +%lt{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +String Less Then. +\end_layout + +\begin_layout Itemize + +\family typewriter +%le{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +String Less or Equal. +\end_layout + +\begin_layout Itemize + +\family typewriter +%gt{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +String Greater Then. +\end_layout + +\begin_layout Itemize + +\family typewriter +%ge{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +String Greater or Equal. +\end_layout + +\begin_layout Itemize + +\family typewriter +%=~{ +\emph on +string +\emph default +}{ +\emph on +regex +\emph default +}{ +\emph on +opts +\emph default +} +\family default +or +\family typewriter +%match{ +\emph on +string +\emph default +}{ +\emph on +regex +\emph default +}{ +\emph on +opts +\emph default +} +\family default + Checks whether +\family typewriter +\emph on +string +\family default +\emph default + matches the Perl regular expression +\family typewriter +\emph on +regex +\family default +\emph default +. + Modifiers can be given via +\family typewriter +\emph on +opts +\family default +\emph default +. +\end_layout + +\begin_layout Paragraph +Shortcut Evaluation Operators +\end_layout + +\begin_layout Standard +The following operators evaluate their arguments only when needed (like + in C). +\end_layout + +\begin_layout Itemize + +\family typewriter +%&&{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Logical And. +\end_layout + +\begin_layout Itemize + +\family typewriter +%and{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Alias for +\family typewriter +%&&{} +\family default +. +\end_layout + +\begin_layout Itemize + +\family typewriter +%||{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Logical Or. +\end_layout + +\begin_layout Itemize + +\family typewriter +%or{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +} +\family default +Alias for +\family typewriter +%||{} +\family default +. +\end_layout + +\begin_layout Paragraph +Unary Operators +\end_layout + +\begin_layout Itemize + +\family typewriter +%!{ +\emph on +arg +\emph default +} +\family default +Logical Not. +\end_layout + +\begin_layout Itemize + +\family typewriter +%not{ +\emph on +arg +\emph default +} +\family default +Alias for +\family typewriter +%!{} +\family default +. +\end_layout + +\begin_layout Itemize + +\family typewriter +%~{ +\emph on +arg +\emph default +} +\family default +Bitwise Ńegation. +\end_layout + +\begin_layout Paragraph +String Functions +\end_layout + +\begin_layout Itemize + +\family typewriter +%length{ +\emph on +string +\emph default +} +\family default +Return the number of ASCII characters present in +\family typewriter +\emph on +string +\family default +\emph default +. +\end_layout + +\begin_layout Itemize + +\family typewriter +%toupper{ +\emph on +string +\emph default +} +\family default +Return all ASCII characters converted to uppercase. +\end_layout + +\begin_layout Itemize + +\family typewriter +%tolower{ +\emph on +string +\emph default +} +\family default +Return all ASCII characters converted to lowercase. +\end_layout + +\begin_layout Itemize + +\family typewriter +%append{ +\emph on +varname +\emph default +}{ +\emph on +string +\emph default +} +\family default +Equivalent to +\family typewriter +%let{ +\emph on +varname +\emph default +}{%{ +\emph on +varname +\emph default +} +\emph on +string +\emph default +} +\family default +. +\end_layout + +\begin_layout Itemize + +\family typewriter +%subst{ +\emph on +string +\emph default +}{ +\emph on +regex +\emph default +}{ +\emph on +subst +\emph default +}{ +\emph on +opts +\emph default +} +\family default +Perl regex substitution. +\end_layout + +\begin_layout Itemize + +\family typewriter +%sprintf{ +\emph on +fmt +\emph default +}{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +}{ +\emph on +argn +\emph default +} +\family default +Perl +\family typewriter +sprintf() +\family default + operator. + Details see Perl manual. +\end_layout + +\begin_layout Itemize + +\family typewriter +%human-number{ +\emph on +unit +\emph default +}{ +\emph on +delim +\emph default +}{ +\emph on +unit-sep +\emph default +}{ +\emph on +number +\emph default +1}{ +\emph on +number +\emph default +2} +\begin_inset Formula $\ldots$ +\end_inset + + +\family default +Convert a number or a list of numbers into human-readable +\family typewriter +B +\family default +, +\family typewriter +KiB +\family default +, +\family typewriter +MiB +\family default +, +\family typewriter +GiB +\family default +, +\family typewriter +TiB +\family default +, as given by +\family typewriter +\emph on +unit +\family default +\emph default +. + When +\family typewriter +\emph on +unit +\family default +\emph default + is empty, a reasonable unit will be guessed automatically from the maximum + of all given numbers. + A single result string is produced, where multiple numbers are separated + by +\family typewriter +\emph on +delim +\family default +\emph default + when necessary. + When +\family typewriter +\emph on +delim +\family default +\emph default + is empty, the slash symbol +\family typewriter +/ +\family default + is used by default (the most obvious use case is result strings like +\family typewriter + +\begin_inset Quotes eld +\end_inset + +17/32 KiB +\begin_inset Quotes erd +\end_inset + + +\family default +). + The final unit text is separated from the previous number(s) by +\family typewriter +\emph on +unit-sep +\family default +\emph default +. + When +\family typewriter +\emph on +unit-sep +\family default +\emph default + is empty, a single blank is used by default. +\end_layout + +\begin_layout Itemize + +\family typewriter +%human-seconds{ +\emph on +number +\emph default +} +\family default +Convert the given number of seconds into +\family typewriter +hh:mm:ss +\family default + format. +\end_layout + +\begin_layout Paragraph +Complex Helper Macros +\end_layout + +\begin_layout Itemize + +\family typewriter +%progress{20} +\family default +Return a string containing a progress bar showing the values from +\family typewriter +%summary-vector{} +\family default +. + The default width is 20 characters plus two braces. +\end_layout + +\begin_layout Itemize + +\family typewriter +%progress{20}{ +\emph on +minvalue +\emph default +}{ +\emph on +midvalue +\emph default +}{ +\emph on +maxvalue +\emph default +} +\family default +Instead of taking the values from +\family typewriter +%summary-vector{} +\family default +, use the supplied values. + +\family typewriter +minvalue +\family default + and +\family typewriter +midvalue +\family default + indicate two different intermediate points, while +\family typewriter +maxvalue +\family default + will determine the 100% point. +\end_layout + +\begin_layout Paragraph +Control Flow Macros +\end_layout + +\begin_layout Itemize + +\family typewriter +%if{ +\emph on +expression +\emph default +}{ +\emph on +then-part +\emph default +} +\family default + or +\family typewriter +%if{ +\emph on +expression +\emph default +}{ +\emph on +then-part +\emph default +}{ +\emph on +else-part +\emph default +} +\family default + Like in any other macro or programming language, this evaluates the +\family typewriter +expression +\family default + once, not copying its outcome to the output. + If the result is non-empty and is not a string denoting the number +\family typewriter +0 +\family default +, the +\family typewriter +\emph on +then-part +\family default +\emph default + is evaluated and copied to the output. + Otherwise, the +\family typewriter +else-part +\family default + is evaluated and copied, provided that one exists. +\end_layout + +\begin_layout Itemize + +\family typewriter +%unless{ +\emph on +expression +\emph default +}{ +\emph on +then-part +\emph default +} +\family default + or +\family typewriter +%unless{ +\emph on +expression +\emph default +}{ +\emph on +then-part +\emph default +}{ +\emph on +else-part +\emph default +} +\family default + Like +\family typewriter +%if{} +\family default +, but the expression is logically negated. + Essentially, this is a shorthand for +\family typewriter +%if{%not{expression}}{...} +\family default + or similar. +\end_layout + +\begin_layout Itemize + +\family typewriter +%elsif{ +\emph on +expr1 +\emph default +}{ +\emph on +then1 +\emph default +}{ +\emph on +expr2 +\emph default +}{ +\emph on +then2 +\emph default +} +\family default + +\begin_inset Formula $\ldots$ +\end_inset + + or +\family typewriter +%elsif{ +\emph on +expr1 +\emph default +}{ +\emph on +then1 +\emph default +}{ +\emph on +expr2 +\emph default +}{ +\emph on +then2 +\emph default +} +\family default + +\begin_inset Formula $\ldots$ +\end_inset + + +\family typewriter +{ +\emph on +odd-else-part +\emph default +} +\family default + This is for simplification of boring if-else-if chains. + The classical if-syntax (as shown above) has the drawback that inner if-parts + need to be nested into outer else-parts, so rather deep nestings may occur + when you are programming longer chains. + This is an alternate syntax for avoidance of deep nesting. + When giving an odd number of arguments, the last argument is taken as final + else-part. +\end_layout + +\begin_layout Itemize + +\family typewriter +%elsunless +\family default + +\begin_inset Formula $\ldots$ +\end_inset + + Like +\family typewriter +%elsif +\family default +, but +\emph on +all +\emph default + conditions are negated. +\end_layout + +\begin_layout Itemize + +\family typewriter +%while{ +\emph on +expression +\emph default +}{ +\emph on +body +\emph default +} +\family default +Evaluates the +\family typewriter +\emph on +expression +\family default +\emph default + in a while loop, like in any other macro or programming language. + The +\family typewriter +\emph on +body +\family default +\emph default + is evaluated exactly as many times as the +\family typewriter +\emph on +expression +\family default +\emph default + holds. + Notice that endless loops can be only avoided by a calling a non-pure macro + inspecting external state information, or by creating (and checking) another + side effect somewhere, like assigning to a variable somewhere. +\end_layout + +\begin_layout Itemize + +\family typewriter +%until{ +\emph on +expression +\emph default +}{ +\emph on +body +\emph default +} +\family default +Like +\family typewriter + %while{ +\emph on +expression +\emph default +}{ +\emph on +body +\emph default +} +\family default +, but negate the expression. +\end_layout + +\begin_layout Itemize + +\family typewriter +%for{ +\emph on +exp +\emph default +r1}{ +\emph on +exp +\emph default +r2}{ +\emph on +exp +\emph default +r3}{ +\emph on +body +\emph default +} +\family default + As you will expect from the corresponding C, Perl, Java, or (add your favorite + language) construct. + Only the syntactic sugar is a little bit different. +\end_layout + +\begin_layout Itemize + +\family typewriter +%foreach{ +\emph on +varname +\emph default +}{ +\emph on +CSV-delimited-string +\emph default +}{ +\emph on +delimiter +\emph default +}{ +\emph on +body +\emph default +} +\family default + As you can expect from similar +\family typewriter +foreach +\family default + constructs in other languages like Perl. + Currently, the macro processor has no arrays, but can use comma-separated + strings as a substitute. +\end_layout + +\begin_layout Itemize + +\family typewriter +%eval{ +\emph on +count +\emph default +}{ +\emph on +body +\emph default +} +\family default + Evaluates the +\family typewriter +\emph on +body +\family default +\emph default + exactly as many times as indicated by the numeric argument +\family typewriter +\emph on +count +\family default +\emph default +. + This may be used to re-evaluate the output of other macros once again. +\end_layout + +\begin_layout Itemize + +\family typewriter +%protect{ +\emph on +body +\emph default +} +\family default + Equivalent to +\family typewriter +%eval{0}{ +\emph on +body +\emph default +} +\family default +, which means that the body is not evaluated at all, but copied to the output + verbatim +\begin_inset Foot +status open + +\begin_layout Plain Layout +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +TeX +\end_layout + +\end_inset + + +\begin_inset space ~ +\end_inset + +or +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +LaTeX +\end_layout + +\end_inset + + +\begin_inset space ~ +\end_inset + +fans usually know what this is good for ;) +\end_layout + +\end_inset + +. +\end_layout + +\begin_layout Itemize + +\family typewriter +%eval-down{ +\emph on +body +\emph default +} +\family default + Evaluates the +\family typewriter +\emph on +body +\family default +\emph default + in a loop until the result does not change any more +\begin_inset Foot +status open + +\begin_layout Plain Layout +Mathematicians knowing Banach's fixedpoint theorem will know what this is + good for ;) +\end_layout + +\end_inset + +. +\end_layout + +\begin_layout Itemize + +\family typewriter +%tmp{ +\emph on +body +\emph default +} +\family default + Evaluates the +\family typewriter +\emph on +body +\family default +\emph default + once in a temporary scope which is thrown away afterwards. +\end_layout + +\begin_layout Itemize + +\family typewriter +%call{ +\emph on +macroname +\emph default +}{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +}{ +\emph on +argn +\emph default +} +\family default + Like in many other macro languages, this evaluates the named macro in the + a new scope. + This means that any side effects produced by the called macro, such as + variable assignments, will be reverted after the call, and therefore not + influence the old scope. + However notice that the arguments +\family typewriter +\emph on +arg1 +\family default +\emph default + to +\family typewriter +\emph on +argn +\family default +\emph default + are evaluted in the +\emph on +old +\emph default + scope before the call actually happens (possibly producing side effects + if they contain some), and their result is respectively assigned to +\family typewriter +%{1} +\family default + until +\family typewriter +%{ +\emph on +n +\emph default +} +\family default + in the new scope, analogously to the Shell or to Perl. + In addition, the new +\family typewriter +%{0} +\family default + gets the +\family typewriter +\emph on +macroname +\family default +\emph default +. + Notice that the argument evaluation happens non-lazily in the old scope + and therefore differs from other macro processors like +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +TeX +\end_layout + +\end_inset + +. +\end_layout + +\begin_layout Itemize + +\family typewriter +%include{ +\emph on +macroname +\emph default +}{ +\emph on +arg1 +\emph default +}{ +\emph on +arg2 +\emph default +}{ +\emph on +argn +\emph default +} +\family default + Like +\family typewriter +%call{} +\family default +, but evaluates the named macro in the +\emph on +current +\emph default + scope (similar to the +\family typewriter +source +\family default + command of the bourne shell). + This means that any side effects produced by the called macro, such as + variable assignments, will +\emph on +not +\emph default + be reverted after the call. + Even the +\family typewriter +%{0} +\family default + until +\family typewriter +%{ +\emph on +n +\emph default +} +\family default + variables will continue to exist (and may lead to confusion if you aren't + aware of that). +\end_layout + +\begin_layout Itemize + +\family typewriter +%callstack{} +\family default + Useful for debugging: show the current chain of macro invocations. +\end_layout + +\begin_layout Paragraph +Time Handling Macros +\end_layout + +\begin_layout Itemize + +\family typewriter +%time{} +\family default + Return the current Lamport timestamp (see section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-Lamport-Clock" + +\end_inset + +), in units of seconds since the Unix epoch. +\end_layout + +\begin_layout Itemize + +\family typewriter +%real-time{} +\family default + Return the current system clock timestamp, in units of seconds since the + Unix epoch. +\end_layout + +\begin_layout Itemize + +\family typewriter +%sleep{ +\emph on +seconds +\emph default +} +\family default + Pause the given number of seconds. +\end_layout + +\begin_layout Itemize + +\family typewriter +%timeout{ +\emph on +seconds +\emph default +} +\family default + Like +\family typewriter +%sleep{ +\emph on +seconds +\emph default +} +\family default +, but abort the +\family typewriter +marsadm +\family default + command after the total waiting time has exceeded the timeout given by + the +\family typewriter +--timeout= +\family default + parameter. +\end_layout + +\begin_layout Paragraph +Misc Macros +\end_layout + +\begin_layout Itemize + +\family typewriter +%warn{ +\emph on +text +\emph default +} +\family default + Show a WARNING: +\end_layout + +\begin_layout Itemize + +\family typewriter +%die{ +\emph on +text +\emph default +} +\family default + Abort execution with an error message. +\end_layout + +\begin_layout Paragraph +Experts Only - Risky +\end_layout + +\begin_layout Standard +The following macros are unstable and may change at any time without notice. +\end_layout + +\begin_layout Itemize + +\family typewriter +%get-msg{ +\emph on +name +\emph default +} +\family default + Low-level access to system messages. + You should not use this, since this is not extensible (you must know the + name in advance). +\end_layout + +\begin_layout Itemize + +\family typewriter +%readlink{ +\emph on +path +\emph default +} +\family default + Low-level access to symlinks. + Don't misuse this for circumvention of the abstraction macros from the + symlink tree! +\end_layout + +\begin_layout Itemize + +\family typewriter +%setlink{ +\emph on +value +\emph default +}{ +\emph on +path +\emph default +} +\family default + Low-level creation of symlinks. + Don't misuse this for circumvention of the abstraction macros for the symlink + tree! +\end_layout + +\begin_layout Itemize + +\family typewriter +%fetch-info{} +\family default +etc. + Low-level access to internal symlink formats. + Don't use this in scripts! Only for curious humans. +\end_layout + +\begin_layout Itemize + +\family typewriter +%is-almost-consistent{} +\family default + Whatever you guess what this could mean, don't use it, at least never in + place of +\family typewriter +%is-consistent{} +\family default + - it is risky to base decisions on this. + Mostly for historical reasons. +\end_layout + +\begin_layout Itemize + +\family typewriter +%does{ +\emph on +name +\emph default +} +\family default +Equivalent to +\family typewriter +%is- +\emph on +name +\emph default +{} +\family default + (just more handy for computing the macro name). + Use with care! +\end_layout + +\begin_layout Subsection +Predefined Variables +\begin_inset CommandInset label +LatexCommand label +name "par:Predefined-Variables" + +\end_inset + + +\end_layout + +\begin_layout Itemize + +\family typewriter +%{cmd} +\family default +The command argument of the invoked +\family typewriter +marsadm +\family default + command. +\end_layout + +\begin_layout Itemize + +\family typewriter +%{res} +\family default +The resource name given to the +\family typewriter +marsadm +\family default + command as a command line parameter (or, possibly expanded from +\family typewriter +all +\family default +). +\end_layout + +\begin_layout Itemize + +\family typewriter +%{resdir} +\family default +The corresponding resource directory. + The current version of MARS uses +\family typewriter +/mars/resource-%{res}/ +\family default +, but this may change in future. + Normally, you should not need this, since anything should be already abstracted + for you. + In case you +\emph on +really +\emph default + need low-level access to something, please prefer this variable over +\family typewriter +%{mars}/resource-%{res} +\family default + because it is a bit more abstracted. +\end_layout + +\begin_layout Itemize + +\family typewriter +%{mars} +\family default +Currently the fixed string +\family typewriter +/mars +\family default +. + This may change in future, probably with the advent of MARS Full. +\end_layout + +\begin_layout Itemize + +\family typewriter +%{host} +\family default +The hostname of the local node. +\end_layout + +\begin_layout Itemize + +\family typewriter +%{ip} +\family default +The IP address of the local node. +\end_layout + +\begin_layout Itemize + +\family typewriter +%{timeout} +\family default +The value given by the +\family typewriter +--timeout= +\family default + option, or the corresonding default value. +\end_layout + +\begin_layout Itemize + +\family typewriter +%{threshold} +\family default +The value given by the +\family typewriter +--threshold= +\family default + option, or the corresonding default value. +\end_layout + +\begin_layout Itemize + +\family typewriter +%{window} +\family default +The value given by the +\family typewriter +--window= +\family default + option, or the corresonding default value (60s). +\end_layout + +\begin_layout Itemize + +\family typewriter +%{force} +\family default +The number of times the +\family typewriter +--force +\family default + option has been given. +\end_layout + +\begin_layout Itemize + +\family typewriter +%{dry-run} +\family default +The number of times the +\family typewriter +--dry-run +\family default + option has been given. +\end_layout + +\begin_layout Itemize + +\family typewriter +%{verbose} +\family default +The number of times the +\family typewriter +--verbose +\family default + option has been given. +\end_layout + +\begin_layout Itemize + +\family typewriter +%{callstack} +\family default +Same as the +\family typewriter +%callstack{} +\family default + macro. + The latter gives you an opportunity for overriding, while the former is + firmly built in. +\end_layout + +\begin_layout Section +Scripting HOWTO +\begin_inset CommandInset label +LatexCommand label +name "sec:Scripting-HOWTO" + +\end_inset + + +\end_layout + +\begin_layout Standard +Both the +\series bold +asynchronous communication model +\series default + of MARS (cf section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-Lamport-Clock" + +\end_inset + +) including the Lamport clock, and the +\series bold +state model +\series default + (cf section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-State-of" + +\end_inset + +) is something you +\emph on +definitely +\emph default + should have in mind when you want to do some scripting. + Here is some further concrete advice: +\end_layout + +\begin_layout Itemize +Don't access anything on +\family typewriter +/mars/ +\family default + directly, except for debugging purposes. + Use +\family typewriter +marsadm +\family default +. +\end_layout + +\begin_layout Itemize +Avoid running scripts in parallel, other than for inspection / monitoring + purposes. + When you give two +\family typewriter +marsadm +\family default + commands in parallel (whether on the same host, or on different hosts belonging + to the same cluster), it is very likely to produce a mess. + +\family typewriter +marsadm +\family default + has no internal locking. + There is no cluster-wide locking at all. + Unfortunately, some systems like Pacemaker are violating this in many cases + (depending on their configuration). + Best is if you have a dedicated / more or less centralized +\series bold +control machine +\series default + which controls masses of your georedundant working servers. + This reduces the risk of running interfering actions in parallel. + Of course, you need backup machines for your control machines, and in different + locations. + Not obeying this advice can easily lead to problems such as complex races + which are very difficult to solve in long-distance distributed systems, + even in general (not limited to MARS). +\end_layout + +\begin_layout Itemize + +\family typewriter +marsadm wait-cluster +\family default + is your friend. + Whenever your (near-)central script has to switch between different hosts + +\family typewriter +A +\family default + and +\family typewriter +B +\family default + (of the same cluster), use it in the following way: +\begin_inset Newline newline +\end_inset + + +\family typewriter +ssh A +\begin_inset Quotes eld +\end_inset + +marsadm action1 +\begin_inset Quotes erd +\end_inset + +; ssh B +\begin_inset Quotes eld +\end_inset + +marsadm wait-cluster; marsadm action2 +\begin_inset Quotes erd +\end_inset + + +\begin_inset Newline newline +\end_inset + + +\family default + +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Don't ignore this advice! Interference is almost +\emph on +sure +\emph default +! As a rule of thumb, precede almost any action command with some appropriate + waiting command! +\end_layout + +\begin_layout Itemize +Further friends are any +\family typewriter +marsadm wait-* +\family default + commands, such as +\family typewriter +wait-umount +\family default +. +\end_layout + +\begin_layout Itemize +In some places, busy-wait loops might be needed, e.g. + for waiting until a specific resource is +\family typewriter +UpToDate +\family default + or matches some other condition. + Examples of waiting conditions can be found under +\family typewriter +github.com/schoebel/test-suite +\family default + in subdirectory +\family typewriter +mars/modules/ +\family default +, specifically +\family typewriter +02_predicates.sh +\family default + or similar. +\end_layout + +\begin_layout Itemize +In case of network problems, some command may hang (forever), if you don't + set the +\family typewriter +--timeout= +\family default + option. + Don't forget the check the return state of any failed / timeouted commands, + and to take appropriate measures! +\end_layout + +\begin_layout Itemize +Test your scripts in failure scenarios! +\end_layout + +\begin_layout Chapter +The Sysadmin Interface ( +\family typewriter +marsadm +\family default + and +\family typewriter +/proc/sys/mars/ +\family default +) +\family typewriter + +\begin_inset CommandInset label +LatexCommand label +name "chap:The-Sysadmin-Interface" + +\end_inset + + +\end_layout + +\begin_layout Standard +In general, the term +\begin_inset Quotes eld +\end_inset + +after a while +\begin_inset Quotes erd +\end_inset + + means that other cluster nodes will take notice of your actions according + to the +\begin_inset Quotes eld +\end_inset + +eventually consistent +\begin_inset Quotes erd +\end_inset + + propagation protocol described in sections +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-Lamport-Clock" + +\end_inset + + and +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-Symlink-Tree" + +\end_inset + +. + Please be aware that this +\begin_inset Quotes eld +\end_inset + +while +\begin_inset Quotes erd +\end_inset + + may last very long in case of network outages or bad firewall rules. +\end_layout + +\begin_layout Standard +In the following tables, column +\begin_inset Quotes eld +\end_inset + +Cmp +\begin_inset Quotes erd +\end_inset + + means compatibility with DRBD. + Please note that 100% exact compatibility is not possible, because of the + asynchronous communication paradigm. +\end_layout + +\begin_layout Standard +The following table documents common options which work with (almost) any + command: +\end_layout + +\begin_layout Standard + +\size scriptsize +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Option +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Cmp +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Description +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +--dry-run +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Run the command without actually creating symlinks or touching files or + executing rsync. + This option +\emph on +should +\emph default + be used first at any dangerous command, in order to check what would happen. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + +Don't use in scripts! Only use by hand! +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +This option does not change the waiting logic. + Many commands are waiting until the desired effect has taken place. + However, with +\family typewriter +--dry-run +\family default + the desired effect will never happen, so the command may wait forever (or + abort with a timeout). +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +In addition, this option can lead to additional aborts of the commands due + to unmet conditions, which cannot be met because the symlinks are not actually + created / altered. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Thus this option can give only a +\series bold +rough estimate +\series default + of what would happen later! +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +--force +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +almost +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Some preconditions are skipped, i.e. + the command will / should work although some (more or less) vital preconditions + are violated. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Instead of giving +\family typewriter +--force +\family default +, you may alternatively prefix your command with +\family typewriter +force- +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + THIS OPTION IS DANGEROUS! +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Use it only when you are absolutely sure that you know what you are doing! +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Use it only as a last resort if the same command without +\family typewriter +--force +\family default + has failed +\emph on +for no good reason +\emph default +! +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +--ignore-sync +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +almost +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Use this for a +\emph on +planned +\emph default + handover instead of +\family typewriter +--force +\family default +. + Only one precondition is relaxed: some sync may be running somewhere. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + +Careful when using this on extremely huge LVs where the sync may take serveral + days, or weeks. + It is your sysadmin decision what you want to prefer: restarting the sync, + or planned handover. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +--verbose +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Some (few) commands will become more speaky. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +--timeout=$seconds +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Some commands require response from either the local kernel module, or from + other cluster nodes. + In order to prevent infinite waiting in case of network outages or other + problems, the command will fail after the given timeout has been reached. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +When $seconds is -1, the command will wait forever. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +When $seconds is 0, the command will not wait in case any precondition is + not met, und abort without performing an action.. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +The default timeout is 5s. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +--window=$seconds +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +The time window for checking the aliveness of other nodes in the network. + When no symlink updates have occurred during the last window, the node + is considered dead. + Default is 60s. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +--threshold=$size +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +The macros containing the substring +\family typewriter +-threshold- +\family default + or +\family typewriter +-almost- +\family default + are using this as a default value for approximation whether something has + been approximately reached. + Default is 10MiB. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +The $size argument may be a number optionally followed by one the lowercase + characters k m g t p for indicating kilo mega giga tera or peta bytes as + multiples of 1000. + When using the corresponding uppercase character, multiples of 1024 are + formed instead. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +--host=$host +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +The command acts as if the command were executed on another host $host. + This option should not be used regularly, because the local information + in the symlink tree may be outdated or even wrong. + Additionally, some local information like remote sizes of physical devices + (e.g. + remote disks) is not present in the symlink tree at all, or is wrong (reflectin +g only the +\emph on +local +\emph default + state). +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + THIS OPTION IS DANGEROUS! +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Use it only for final destruction of dead cluster nodes, see section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Final-Destroy-of" + +\end_inset + +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +--ip=$ip +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +By default, +\family typewriter +marsadm +\family default + always uses the IP for +\family typewriter +$host +\family default + as stored in the symlink tree (directory +\family typewriter +/mars/ips/ +\family default +). + When such an IP entry does not (yet) exist (e.g. + +\family typewriter +create-cluster +\family default + or +\family typewriter +join-cluster +\family default +), all local network interfaces are automatically scanned for IPv4 adresses, + and the first one is taken. + This may lead to wrong decisions if you have multiple network interfaces. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +In order to override the automatic IP detection and.to explicitly tell the + IP address of your storage network, use this option. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize +Usually you will need this only at +\family typewriter +{create,join}-cluster +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +--verbose +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Some (few) commands will become more speaky. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Section +Cluster Operations +\begin_inset CommandInset label +LatexCommand label +name "sec:Cluster-Operations" + +\end_inset + + +\end_layout + +\begin_layout Standard + +\size scriptsize +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Command / Params +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Cmp +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Description +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +create-cluster +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the +\family typewriter +/mars/ +\family default + filesystem must be mounted and it must be empty ( +\family typewriter +mkfs.ext4 +\family default +, see instructions in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Setup-your-Cluster" + +\end_inset + +). + The kernel module must +\emph on +not +\emph default + be loaded. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: the initial symlink tree is created in +\family typewriter +/mars/ +\family default +. + Additionally, the +\family typewriter +/mars/uuid +\family default + symlink is created for later distribution in the cluster. + It uniquely indentifies the cluster in the world. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +This must be called exactly once at the initial primary. + +\end_layout + +\begin_layout Plain Layout +Hint: use the +\family typewriter +--ip= +\family default + option if you have multiple interfaces. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +join-cluster +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$host +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the +\family typewriter +/mars/ +\family default + filesystem must be mounted and it must be empty ( +\family typewriter +mkfs.ext4 +\family default +, see instructions in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Setup-your-Cluster" + +\end_inset + +). + The kernel module must +\emph on +not +\emph default + be loaded. + The cluster must have been already created at another node +\family typewriter +$host +\family default +. + A working ssh connecttion to $host as root must exist (without password). + +\family typewriter +rsync +\family default + must be installed at all cluster nodes. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: the initial symlink tree +\family typewriter +/mars/ +\family default + is replicated from the remote host +\family typewriter +$host +\family default +, and the local host has been added as another cluster member. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +This must be called exactly once at every initial secondary node. +\end_layout + +\begin_layout Plain Layout +Hint: use the +\family typewriter +--ip= +\family default + option if you have multiple interfaces. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +leave-cluster +\begin_inset CommandInset label +LatexCommand label +name "leave-cluster" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the +\family typewriter +/mars/ +\family default + filesystem must be mounted and it must contain a valid MARS symlink tree + produced by the other +\family typewriter +marsadm +\family default + commands. + The local node must no longer be member of any resource (see +\family typewriter +marsadm leave-resource +\family default +). + The kernel module should be loaded and the network should be operating + in order to also propogate the effect to the other nodes. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: the local node is removed from the replicated symlink tree + +\family typewriter +/mars/ +\family default + such that other nodes will cease to communicate with it after a while. + The converse it not true: the local node may continue +\begin_inset Foot +status open + +\begin_layout Plain Layout + +\size scriptsize +Reason: +\family typewriter +leave-cluster +\family default + removes only its +\emph on +own +\emph default + IP address from +\family typewriter +/mars/ips/ +\family default +, but does not destroy the usual symmetry of the symlink tree by leaving + the other IPs intact. + Therefore, the local node will continue fetching updates from all nodes + present in +\family typewriter +/mars/ips/ +\family default +. + As an effect, the local node will +\emph on +passively +\emph default + mirror the symlinks of other cluster members, but not vice versa. + There is no communication from the local node to the other ones, turning + the local node into a +\series bold +whitness +\series default + according to some terminology from Distributed Systems. + This is a feature, not a bug. + It could be used for porst-mortem analysis, or for monitoring purposes. + However, +\emph on +deletions +\emph default + of symlinks are not guaranteed to take place, so your whitness may +\emph on +accumulate +\emph default + thousands of old symlinks over a long time. + If you want to eventually stop all communication to the local node, just + run +\family typewriter +rmmod +\family default +. +\end_layout + +\end_inset + + passivley fetching the symlink tree. + In order to really stop all communication, the kernel module should be + unloaded afterwards. + The local +\family typewriter +/mars/ +\family default + filesystem may be manually destroyed after that (at least if you need to + reuse it). +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +In case of an eventual node loss (e.g. + fire, water, ...) this command should be used on another node $helper in order + to finally remove $damaged from the cluster via the command +\family typewriter +marsadm leave-cluster --host=$damaged --force +\family default +. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize +In case you cannot use +\family typewriter +leave-resource +\family default + for any reason, you may do the following: just destroy the +\family typewriter +/mars/ +\family default + filesystem on the host +\family typewriter +$deadhost +\family default + you want to remove (e.g. + by +\family typewriter +mkfs +\family default +), or take other measures to +\emph on +ensure +\emph default + that it cannot be accidentally re-used in any way (e.g. + physical destruction of the underlying RAID, +\family typewriter +lvremove +\family default +, etc). + On all other hosts, do +\family typewriter +rmmod mars +\family default +, then delete the symlink +\family typewriter +/mars/ips/ip-$deadhost +\family default + everywhere by hand, and finally +\family typewriter +modprobe mars +\family default + again. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + +\size scriptsize +Notice that the last +\family typewriter +leave-resource +\family default + operation does not delete the cluster as such. + It just creates an +\emph on +empty +\emph default + cluster which has no longer any members. + In particular, the cluster ID +\family typewriter +/mars/uuid +\family default + is +\emph on +not +\emph default + removed, deliberately +\begin_inset Foot +status open + +\begin_layout Plain Layout + +\size scriptsize +This is a feature, not a bug. + The +\family typewriter +uuid +\family default + is created once, but never alterered anywhere. + The only way to get rid of it is +\emph on +external +\emph default + deletion (not by +\family typewriter +marsadm +\family default +) +\emph on +together(!) +\emph default + with all other contents of +\family typewriter +/mars/ +\family default +. + This prevents you from accidentally merging half-dead remains which could + have survived a disaster for any reason, such as snapshotting filesystems + / VMs or whatever. +\end_layout + +\end_inset + +. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize +Before you can re-use +\emph on +any +\emph default + left-over +\family typewriter +/mars/ +\family default + filesystem for creating / joining a new / different cluster, you +\emph on +must +\emph default + obey the instructions in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Setup-your-Cluster" + +\end_inset + + and use +\family typewriter +mkfs.ext4 +\family default + accordingly. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +merge-cluster +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$host +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the set of resources at the local cluster (transitively) and + at the cluster of +\family typewriter +$host +\family default + (transitively) must be disjoint. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Create the union of both clusters, consisting of the union of all participating + machines (transitively). + Resource memberships are unaffected. + This is useful for creating a +\begin_inset Quotes eld +\end_inset + +virtual LVM cluster +\begin_inset Quotes erd +\end_inset + + where resources can be migrated later via +\family typewriter +join-resource +\family default + / +\family typewriter +leave-resource +\family default + operations. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + +\size scriptsize +Attention! The mars branch +\family typewriter +0.1.y +\family default + does not scale well in number of cluter members, because it evolved from + a lab prototype with +\begin_inset Formula $O(n^{2})$ +\end_inset + + behaviour at metadata exchange. + Never exceed the maximum cluster members as described in appendix +\begin_inset CommandInset ref +LatexCommand vref +reference "chap:Technical-Data-MARS" + +\end_inset + +. + For safety, you should better stay at 1/2 of the numbers mentioned there. + Use +\family typewriter +split-cluster +\family default + for going back to smaller clusters again after your background data migration + has completed. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize +Future versions of MARS, starting with branch +\family typewriter +0.1b.y +\family default + will be constructed for very big clusters in the range of thousands of + nodes. + Development has not yet stabilized there, and operational experiences are + missing at the moment. + Be careful until official announcements are appearing in the ChangeLog, + reporting of operational experiences from the 1&1 big cluster at metadata + level. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +merge-cluster-check +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$host +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Check in advance whether the set of resources at the local cluster and at + the other cluster +\family typewriter +$host +\family default + are disjoint. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +split-cluster +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +This is almost the inverse operation of +\family typewriter +merge-cluster +\family default +: it determines the minimum sub-cluster groups participating in some common + resources. + Then it splits the cluster memberships such that unnecessary connections + between non-related nodes are interrupted. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Use this for avoidance of too big clusters. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +wait-cluster +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +See section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Waiting" + +\end_inset + +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +create-uuid +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +Deprecated. + Only for compatibility with old version light0.1beta05 or earlier. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the +\family typewriter +/mars/ +\family default + filesystem must be mounted. + A +\family typewriter +uuid +\family default + (such as automatically created by recent versions of +\family typewriter +marsadm create-cluster +\family default +) must not already exist; i.e. + you have a very old and outdated symlink tree. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: the +\family typewriter +/mars/uuid +\family default + symlink is created for later distribution in the cluster. + It uniquely indentifies the cluster in the world. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +This must be called at most once at the current primary. + +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Section +Resource Operations +\begin_inset CommandInset label +LatexCommand label +name "sec:Resource-Operations" + +\end_inset + + +\end_layout + +\begin_layout Standard +Common precondition for all resource operations is that the +\family typewriter +/mars/ +\family default + filesystem is mounted, that it contains a valid MARS symlink tree produced + by other +\family typewriter +marsadm +\family default + commands (including a unique +\family typewriter +uuid +\family default +), that your current node is a valid member of the cluster, and that the + kernel module is loaded. + When communication is impossible due to network outages or bad firewall + rules, most commands will succeed, but other cluster nodes may take a long + time to notice your changes. +\end_layout + +\begin_layout Standard +Instead of executing +\family typewriter +marsadm +\family default + commands serveral times for each resource argument, you may give the special + resource argument +\family typewriter +all +\family default +. + This work even when combined with +\family typewriter +--force +\family default +, but be cautious when giving dangerous command combinations like +\family typewriter +marsadm delete-resource --force all +\family default +. +\end_layout + +\begin_layout Standard +In newer versions of +\family typewriter +marsadm +\family default +, you may give a comma-separated list of resource names in place of +\family typewriter +all +\family default +. + This way, you have more fine-grained control over the set of resource names + you want to use. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + +Beware when combining this with +\family typewriter +--host=somebody +\family default +. + In some very rare cases, like final destruction of a whole datacenter after + an earthquake, you might need a combination like +\family typewriter +marsadm --host=defective delete-resource --force all +\family default +. + Don't use such combinations if you don't need them +\emph on +really +\emph default +! You can easily shoot yourself in your head if you are not carefully operating + such commands! +\end_layout + +\begin_layout Subsection +Resource Creation / Deletion / Modification +\begin_inset CommandInset label +LatexCommand label +name "subsec:Resource-Creation" + +\end_inset + + +\end_layout + +\begin_layout Standard + +\size scriptsize +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Command / Params +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Cmp +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Description +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +create-resource +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$disk_dev +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +[$mars_name] +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +[$size] +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the resource argument +\family typewriter +$res +\family default + must not denote an already existing resource name in the cluster. + The argument +\family typewriter +$disk_dev +\family default + must denote an absolute path to a usable local block device, its size must + be greater zero. + When the optional +\family typewriter +$mars_name +\family default + is given, that name must not already exist on the local node; when not + given, +\family typewriter +$mars_name +\family default + defaults to +\family typewriter +$res +\family default +. + When the optional +\family typewriter +$size +\family default + argument is given, it must be a number, optionally followed by a lowercase + suffix +\family typewriter +k +\family default +, +\family typewriter +m +\family default +, +\family typewriter +g +\family default +, +\family typewriter +t +\family default +, or +\family typewriter +p +\family default + (denoting size factors as multiples of 1000), or an uppercase suffix +\family typewriter +K +\family default +, +\family typewriter +M +\family default +, +\family typewriter +G +\family default +, +\family typewriter +T +\family default + or +\family typewriter +P +\family default + (denoting size factors as multiples of 1024). + The given size must not exceed the actual size of +\family typewriter +$disk_dev +\family default +. + It will specify the future resource size as shown by +\family typewriter +marsadm view-resource-size $res +\family default +. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: the resource +\family typewriter +$res +\family default + is created, the inital role of the current node is primary. + The corresponding symlink tree information is asynchonously distributed + in the cluster (in the background). + The device +\family typewriter +/dev/mars/$mars_name +\family default + should appear after a while. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Notice: when +\family typewriter +$size +\family default + is strictly smaller than the size of +\family typewriter +$disk_dev +\family default +, you will unnecessarily waste some space.. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +This must be called exactly once for any new resource. + +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +join-resource +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$disk_dev +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +[$mars_name] +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the resource argument +\family typewriter +$res +\family default + must denote an already existing resource in the cluster (i.e. + its symlink tree information must have been received). + The resource must have a designated primary, and it must no be in emergency + mode. + There must not exist a split brain in the cluster. + The local node must not be already member of that resource. + The argument +\family typewriter +$disk_dev +\family default + must denote an absolute path to a usable (but currently unused) local block + device, its size must be greater or equal to the logical size of the resource. + When the optional +\family typewriter +$mars_name +\family default + is given, that name must not already exist on the local node; when not + given, +\family typewriter +$mars_name +\family default + defaults to +\family typewriter +$res +\family default +. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: the current node becomes a member of resource +\family typewriter +$res +\family default +, the inital role is secondary. + The initial full sync should start after a while. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Notice: when the size of $disk_dev is strictly greater than the size of + the resource, you will unnecessarily waste some space. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + +\size scriptsize +After a while, state +\family typewriter +Orphan +\family default + should be left. + Don't forget to regularly monitor for longer occurrences of +\family typewriter +Orphan +\family default +! +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +leave-resource +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the local node must be a member of the resource +\family typewriter +$res +\family default +; its current role must be secondary. + Sync, fetch and replay must be paused (see commands +\family typewriter +pause-{sync,fetch,replay} +\family default + or their abbreviation +\family typewriter +down +\family default +). + The disk must be detatched (see commands +\family typewriter +detach +\family default + or +\family typewriter +down +\family default +). + The kernel module should be loaded and the network should be operating + in order to also propogate the effect to the other nodes. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: the local node is no longer a member of +\family typewriter +$res +\family default +. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Notice: as a side effect for other nodes, their +\family typewriter +log-delete +\family default + may now become possible, since the current node does no longer count as + a candidate for logfile application. + In addition, a split brain situation may be (partly) resolved by this. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize + Please notice that this command +\emph on +may +\emph default + lead to (but does not guarantee) split-brain resolution. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + +\size scriptsize +The contents of the disk is not changed by this command. + Before issuing this command, check whether the disk appears to be locally + consistent (see +\family typewriter +view-is-consistent +\family default +)! After giving this command, any internal information indicating the consistenc +y state will be gone, and you will no longer be able to guess consistency + properties. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize + When you are +\emph on +sure +\emph default +.that the disk was consistent before (or is now by manually checking it), + you may re-create a new resource out of it via +\family typewriter +create-resource +\family default +. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +In case of an eventual node loss (e.g. + fire, water, ...) this command may be used on another node $helper in order + to finally remove all the resources $damaged from the cluster via the command + +\family typewriter +marsadm leave-resource $res --host=$damaged --force +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +delete-resource +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the resource must be empty (i.e. + all members must have left via +\family typewriter +leave-resource +\family default +). + This precondition is overridable by +\family typewriter +--force +\family default +, increasing the danger to maximum! It is even possible to combine +\family typewriter +--force +\family default + with an invalid resource argument and an invalid +\family typewriter +--host=somebodyelse +\family default + argument in order to desperately try to destroy remains of incomplete or + pysically damaged hardware. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: all cluster members will somewhen be forcefully removed from + +\family typewriter +$res +\family default +. + In case of network interruptions, the forced removal may take place far + in the future. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + THIS COMMAND IS +\emph on +VERY +\emph default + DANGEROUS! +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Use this only in desperate situations, and only manually. + Don't call this from scripts. + You are forcefully using a sledgehammer, even without +\family typewriter +--force +\family default +! The danger is that the +\emph on +true +\emph default + state of other cluster nodes need not be known in case of network problems + .Even when it were known, it could be compromised by +\series bold +byzantine failures +\series default +. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +It is strongly advised to try this command with +\family typewriter +--dry-run +\family default + first. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +When combined with +\family typewriter +--force +\family default +, this command will definitely +\series bold +murder +\series default + other cluster nodes, possibly after a long while, and even when they are + operating in primary mode / having split brains / etc. + However, there is no guarantee that other cluster nodes will be +\emph on +really +\emph default + dead – it is (theoretically) possible that they remain only +\emph on +half +\emph default + +\emph on +dead +\emph default +. + For example, a half dead node may continue to write data to +\family typewriter +/mars/ +\family default + and thus lead to overflow somewhen. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + +This command implies a forceful detach, possibly destroying consistency. + +\size scriptsize +It is similar in spirit to a +\series bold +STONITH +\series default +. + In particular, when a cluster node was operating in primary mode ( +\family typewriter +/dev/mars/mydata +\family default + being continuously in use), the forceful detach cannot be carried out until + the device is completely unused. + In the meantime, the current transaction logfile will be appended to, but + the file +\emph on +might +\emph default + be already unlinked (orphan file filling up the disk). + After the forceful detach, the underlying disk need not be consistent (although + MARS does its best). + Since this command deletes any symlinks which normally would indicate the + consistency state, no guarantees about consistency can be given after this + +\emph on +in general +\emph default +! Always check consistency by hand! +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +When possible / as soon as possible, check the local state on the other + nodes in order to +\emph on +really +\emph default + shutdown the resource everywhere (e.g. + to +\emph on +really +\emph default + unuse the +\family typewriter +/dev/mars/mydata +\family default + device, etc). +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +After this command, you +\emph on +should +\emph default + rebuild the resource under a different name, in order to avoid any clashes + caused by unexpected resurrection of +\begin_inset Quotes eld +\end_inset + +dead +\begin_inset Quotes erd +\end_inset + + or +\begin_inset Quotes eld +\end_inset + +half-dead +\begin_inset Quotes erd +\end_inset + + nodes (beware of shapshot / restores on virtual machines!!). + MARS does its best to avoid problems even in case the new resource name + should equal the old one, but there can be +\emph on +no guarantee +\emph default + in all possible failure scenarios / usage scenarios. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize +When possible, prefer +\family typewriter +leave-resource +\family default + over this! +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +wait-resource +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +{is-,}{attach, +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + + primary, +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + + device}{-off,} +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +See section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Waiting" + +\end_inset + +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Subsection +Operation of the Resource +\begin_inset CommandInset label +LatexCommand label +name "subsec:Operation-of-the" + +\end_inset + + +\end_layout + +\begin_layout Standard +Common preconditions are the preconditions from section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Resource-Operations" + +\end_inset + +, plus the respective resource +\family typewriter +$res +\family default + must exist, and the local node must be a member of it. + With the single exception of +\family typewriter +attach +\family default + itself, all other operations must be started in +\family typewriter +attached +\family default + state. +\end_layout + +\begin_layout Standard +When +\family typewriter +$res +\family default + has the special reserved value +\family typewriter +all +\family default +, the following operations will work on all resources where the current + node is a member (analogously to DRBD). +\end_layout + +\begin_layout Standard +With newer versions of +\family typewriter +marsadm +\family default +, you can also give a list of comma-separated resource names in place of + +\family typewriter +all +\family default +. +\end_layout + +\begin_layout Standard +\noindent + +\size scriptsize +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Command / Params +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Cmp +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Description +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +attach +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +yes +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the local disk belonging to $res is not in use by anyone else. + Its contents has not been altered in the meantime since the last +\family typewriter +detach +\family default +. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize +Mounting +\emph on +read-only +\emph default + is allowed during the detached phase. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + +\size scriptsize +However, be careful! If you +\emph on +accidentally +\emph default + forget to give the right readonly-mount flags, if you use +\family typewriter +fsck +\family default + in repair mode inbetween, or alter the disk content in any other way (beware + of LVM snapshots / restores etc), you will almost certainly produce an + +\series bold +unnoticed inconsistency +\series default + (not reported by +\family typewriter +view-is-consistent +\family default +)! MARS has +\emph on +no chance +\emph default + to notice suchalike! +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: MARS uses the local disk and is able to work with it (e.g. + replay logfiles on it). +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Note: the local disk is opened in exclusive read-write mode. + This should protect against most common misuse, such as opening the disk + in parallel to MARS. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + +\size scriptsize +However, this does not necessarily protect against non-exclusive openers. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +detach +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +yes +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the local +\family typewriter +/dev/mars/mydata +\family default + device (when present) is no longer opened by anybody. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: the local disk belonging to $res is no longer in use. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize +In contrast to DRBD, you need not explicitly pause syncing, fetching, or + replaying +\emph on +to +\emph default + (as apposed to +\emph on +from +\emph default +) the local disk. + These processes are automatically paused. + As another contrast to DRBD, the respective processes will usually +\emph on +automatically +\emph default + resume after re-attach, as far as possible in the respective new situation. + This will usually work even over +\family typewriter +rmmod +\family default + or reboot cycles, since the internal symlink tree will automatically persist + all todo switches for you (c.f. + section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-State-of" + +\end_inset + +). +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + +\size scriptsize +Notice: only +\emph on +local +\emph default + transfer operations +\emph on +to +\emph default + the local disk are paused by a detach. + When another node is remotely running a sync +\emph on +from +\emph default + your local disk, it will likely remain in use for remote reading. + The reason is that the server part of MARS is operating purely passively, + in order serve all remote requests as best as possible (similar to the + original Unix philosophy). + In order to really stop all accesses, do a +\family typewriter +pause-sync +\family default + on all other resource member where a sync is currently running. + You may also try +\family typewriter +pause-sync-global +\family default +. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + +\size scriptsize +WARNING! After this, and ather having paused any remote data access, you + might use the underlying disk for your own purposes, such as test-mounting + it in +\emph on +readonly +\emph default + mode. + +\series bold +Don't modifiy +\series default + its contents in any way! Not even by an +\family typewriter +fsck +\family default + +\begin_inset Foot +status open + +\begin_layout Plain Layout + +\size scriptsize +Some (but not all) +\family typewriter +fsck +\family default + tools for some filesystems have options to start only a test repair / verify + mode / dry run, without doing actual modifications to the data. + Of course, these modes +\emph on +can +\emph default + be used. + But be really sure! Double-check for the right options! +\end_layout + +\end_inset + +! Otherwise, you will have inconsistencies +\emph on +guaranteed +\emph default +. + MARS has no way for knowing of any modifications to your disk when bypassing + +\family typewriter +/dev/mars/* +\family default +. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize +In case you accidentally modified the underlying disk at the +\emph on +primary +\emph default + side, you may choose to resolve the inconsistencies by +\family typewriter +marsadm invalide $res +\family default + on +\emph on +each +\emph default + secondary. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +pause-sync +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Equivalent to +\family typewriter +pause-sync-local +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +pause-sync-local +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: none additionally. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: any sync operation targeting the local disk (when not yet + completed) is paused after a while (cf section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-State-of" + +\end_inset + +). + When successfully completed, this operation will remember the switch state + forever and automatically become relevant if a sync is needed again (e.g. + +\family typewriter +invalidate +\family default + or +\family typewriter +resize +\family default +). +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +pause-sync-global +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Like +\family typewriter +*-local +\family default +, but operates on all members of the resource. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +resume-sync +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Equivalent to +\family typewriter +resume-sync-local +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +resume-sync-local +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: additionally, a primary must be designated, and it must not + be in emergency mode. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: any sync operation targeting the local disk (when not yet + completed) is resumed after a while. + When completed, this operation will remember the switch state forever and + become relevant if a sync is needed again (e.g. + +\family typewriter +invalidate +\family default + or +\family typewriter +resize +\family default +). +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +resume-sync-global +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Like +\family typewriter +*-local +\family default +, but operates on all members of the resource. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +pause-fetch +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Equivalent to +\family typewriter +pause-fetch-local +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +pause-fetch-local +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: none additionally. + The resource +\emph on +should +\emph default + be in secondary role. + Otherwise the switch has +\emph on +no +\emph default + +\emph on +immediate +\emph default + effect, but will come (possibly unexpectedly) into effect whenever secondary + role is entered later for whatever reason. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: any transfer of (parts of) transaction logfiles which are + present at another primary host to the local +\family typewriter +/mars/ +\family default + storage are paused at their current stage. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize +This switch works independently from +\family typewriter +{pause,resume}-replay +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +pause-fetch-global +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Like +\family typewriter +*-local +\family default +, but operates on all members of the resource. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +resume-fetch +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Equivalent to +\family typewriter +resume-fetch-local +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +resume-fetch-local +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: none additionally. + The resource +\emph on +should +\emph default + be in secondary role. + Otherwise the switch has +\emph on +no +\emph default + +\emph on +immediate +\emph default + effect, but will come (possibly unexpectedly) into effect whenever secondary + role is entered later for whatever reason. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: any (parts of) transaction logfiles which are present at + another primary host shouldl be transferred to the local +\family typewriter +/mars/ +\family default + storage as far as not yet locally present. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize +This works independently from +\family typewriter +{pause,resume}-replay +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +resume-fetch-global +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Like +\family typewriter +*-local +\family default +, but operates on all members of the resource. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +pause-replay +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Equivalent to +\family typewriter +pause-replay-local +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +pause-replay-local +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: none additionally. + The resource +\emph on +should +\emph default + be in secondary role. + Otherwise the switch has +\emph on +no +\emph default + +\emph on +immediate +\emph default + effect, but will come (possibly unexpectedly) into effect whenever secondary + role is entered later for whatever reason. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: any local replay operations of transaction logfiles to the + local disk are paused at their current stage. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize +This works independently from +\family typewriter +{pause,resume}-fetch +\family default + resp. + +\family typewriter +{dis,}connect +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +pause-replay-global +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Like +\family typewriter +*-local +\family default +, but operates on all members of the resource. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +resume-replay +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Equivalent to +\family typewriter +pause-replay-local +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +resume-replay-local +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status collapsed + +\begin_layout Plain Layout + +\size scriptsize +Precondition: must be in secondary role. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: any (parts of) locally existing transaction logfiles (whether + replicated from other hosts or produced locally) are started for replay + to the local disk, as far as they have not yet been applied. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +resume-replay-global +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Like +\family typewriter +*-local +\family default +, but operates on all members of the resource. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +connect +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Equivalent to +\family typewriter +connect-local +\family default + and to +\family typewriter +resume-fetch-local +\family default +. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize +Note: although this sounds similar to DRBD's +\family typewriter +drbdadm connect +\family default +, there are subtle differences. + DRBD has exactly one connection per resource, which is associated with + +\emph on +pairs +\emph default + of nodes. + In contrast, MARS may create multiple connections per resource at runtime, + and these are associated with the +\emph on +target +\emph default + host (not with +\emph on +pairs +\emph default + of hosts). + As a consequence, the fetch may +\emph on +potentially +\emph default + occur from any other other source host which happens to be reachable (although + the current implementation prefers the current designated primary, but + this may change in future). + In addition, +\family typewriter +marsadm disconnect +\family default + does not stop +\emph on +all +\emph default + communication. + It only stops fetching logfiles. + The symlink update running in background is +\emph on +not +\emph default + stopped, in order to always propagate as much metadata as possible in the + cluster. + In case of a later incident, chances are higher for a better knowledge + of the +\emph on +real +\emph default + state of the cluster. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +connect-local +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Equivalent to +\family typewriter +resume-fetch-local +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +connect-global +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Equivalent to +\family typewriter +resume-fetch-global +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +disconnect +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Equivalent to +\family typewriter +disconnect-local +\family default + and to +\family typewriter +pause-fetch-local +\family default +. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize +See above note at +\family typewriter +connect +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +disconnect-local +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Equivalent to +\family typewriter +pause-fetch-local +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +disconnect-global +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +partly +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Equivalent to +\family typewriter +pause-fetch-global +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +up +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +yes +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Equivalent to +\family typewriter +attach +\family default + followed by +\family typewriter +resume-fetch +\family default + followed by +\family typewriter +resume-replay +\family default + followed by +\family typewriter +resume-sync +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +down +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +yes +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Equivalent to +\family typewriter +pause-sync +\family default + followed by +\family typewriter +pause-fetch +\family default + followed by +\family typewriter +pause-replay +\family default + followed by +\family typewriter +detach +\family default +. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize +Hint: consider to prefer plain +\family typewriter +detach +\family default + over this, because +\family typewriter +detach +\family default + will remember the last state of all switches, while +\family typewriter +down +\family default + will +\emph on +not +\emph default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +primary +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +almost +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: sync must have finished at any resource member. + All relevant transaction logfiles must be either already locally present, + or be fetchable (see +\family typewriter +resume-fetch +\family default + and +\family typewriter +resume-replay +\family default +). + When some logfile data is locally missing, there must be enough space on + +\family typewriter +/mars/ +\family default + to fetch it. + Any replay must not have been interrupted by a replay error (see macro + %replay-code{} or diskstate +\family typewriter +DefectiveLog +\family default +). + The current designated primary must be reachable over network. + When there is no designated primary (i.e. + +\family typewriter +marsadm secondary +\family default + had been executed before, which is explicitly +\emph on +not recommended +\emph default +), +\emph on +all +\emph default + other members of the resource must be reachable (since we have no memory + who was the old primary before), and then they must also match the same + preconditions. + When another host is currently primary (whether designated or not), it + must match the preconditions of +\family typewriter +marsadm secondary +\family default + (that means, its local +\family typewriter +/dev/mars/mydata +\family default + device must not be in use any more). + A split brain must not already exist. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: +\family typewriter +/dev/mars/$dev_name +\family default + appears locally and is usable; the current host is in primary role. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Switches the +\series bold +designated primary +\series default +. + There are three variants: +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +1) +\series bold +Handover +\series default + when +\emph on +not +\emph default + giving +\family typewriter +--force +\family default +: when another host is currently primary, it is first asked to leave its + primary role, and it is waited until it actually has become secondary. + After that, the local host is asked to become primary. + Before actually becoming primary, all relevant logfiles are transferred + over the network and replayed, in order to avoid accidental creation of + split brain as best as possible +\begin_inset Foot +status open + +\begin_layout Plain Layout + +\size scriptsize +Note that split brain avoidance is +\series bold +best effort +\series default + and cannot be guaranteed in general. + For example, it may be impossible to avoid split brain in case of long-lasting + network outages. +\end_layout + +\end_inset + +. + Only after that, +\family typewriter +/dev/mars/$dev_name +\family default + will appear. + When network transfers of the symlink tree are very slow (or currently + impossible), this command may take a very long time. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +In case a split brain is already detected at the initial situation, the + local host will refuse to switch the designated primary without +\family typewriter +--force +\family default +. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize + In case of +\begin_inset Formula $k>2$ +\end_inset + + replicas: if you want to handover between host +\family typewriter +A +\family default + and +\family typewriter +B +\family default + while a sync is currently running at host +\family typewriter +C +\family default +, you have the following options: +\end_layout + +\begin_layout Enumerate + +\size scriptsize +wait until the sync has finished (see macro +\family typewriter +sync-rest +\family default +, or +\family typewriter +marsadm view +\family default + in general). +\end_layout + +\begin_layout Enumerate + +\size scriptsize +do a +\family typewriter +leave-resouce +\family default + on host +\family typewriter +C +\family default +, and later +\family typewriter +join-resource +\family default + after the handover completed successfully. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +2) +\series bold +Handover ignoring running syncs, +\series default + by adding the option +\family typewriter +--ignore-sync +\family default +. + Any running syncs will restart from scratch, in order to ensure consistency. + Use this only when the planned handover is more important than the sync + time. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +3) +\series bold +Forced switching +\series default +: by giving –force while +\family typewriter +pause-fetch +\family default + is active (but not +\family typewriter +pause-replay +\family default +), most preconditions are ignored, and MARS does its best to actually become + primary even if some logfiles are missing or incomplete or even defective. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + +\family typewriter +\size scriptsize +primary --force +\family default + is a potentially harmful variant, because it will provoke a split brain + in most cases, and therefore in turn will lead to +\series bold +data loss +\series default + because one of your split brain versions must be discarded later in order + to resolve the split brain (see section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Split-Brain-Resolution" + +\end_inset + +). +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + +\series bold +\size scriptsize +Never +\series default + call +\family typewriter +primary --force +\family default + when +\family typewriter +primary +\family default + without +\family typewriter +--force +\family default + is sufficient! If +\family typewriter +primary +\family default + without +\family typewriter +--force +\family default + complains that the device is in use at the former primary side, take it + seriously! Don't override with +\family typewriter +--force +\family default +, but rather umount +\begin_inset Foot +status open + +\begin_layout Plain Layout + +\size scriptsize +A common misconception is when people think that they can keep their filesystem + mounted without provoking a split brain, because they have their application + stopped and thus don't write any data into the filesystem. + This is a wrong idea, because filesystems may write some metadata, like + booking information, even after hours or days of inactivity. + Therefore MARS insists that the device is no longer in use before any handover + can take place. +\end_layout + +\end_inset + + the device at the other side! +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize + Only use +\family typewriter +primary --force +\family default + when something is +\emph on +already broken +\emph default +, such as a network outage, or a node crash, etc. + During ordinary operations (network OK, nodes OK), you should never need + +\family typewriter +primary --force +\family default +! +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize + If you umount +\family typewriter +/dev/mars/mydata +\family default + on the old primary +\family typewriter +A +\family default +, and then wait until +\family typewriter +marsadm view +\family default + (or another suitable macro) on the target host +\family typewriter +B +\family default + shows that everything is +\family typewriter +UpToDate +\family default +, you can prevent a split brain by yourself even when giving +\family typewriter +primary --force +\family default + afterwards. + However, checking / assuring this is +\emph on +your +\emph default + responsibility! +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + +\family typewriter +\size scriptsize + primary --force +\family default + switches the +\emph on +designated +\emph default + primary. + In some extremely rare cases, when +\emph on +multiple +\emph default + faults have accumulated in a +\emph on +weird +\emph default + situation, it +\emph on +might +\emph default + be impossible becoming the / an actual primary. + Typically you may be +\emph on +already +\emph default + in a split brain situation. + This has not been observed for a long operations time on recent versions + of MARS, but in general becoming primary via +\family typewriter +--force +\family default + cannot be guaranteed always, although MARS does its best. + In split brain situations, or if you ever encounter such a problem, you + +\emph on +must +\emph default + resolve the split brain immediately after giving this command (see section + +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Split-Brain-Resolution" + +\end_inset + +). +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize + Hint in case of +\begin_inset Formula $k>2$ +\end_inset + + replicas: +\family typewriter +marsadm invalidate +\family default + cannot always resolve a split brain at other secondaries (which are neither + the old nor the new designated primary). + Therefore, prefer the +\family typewriter +leave-resource +\family default + method described in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Split-Brain-Resolution" + +\end_inset + +, starting with a +\family typewriter +leave-resource +\family default + phase at the old primary, and proceeding to +\begin_inset Quotes eld +\end_inset + +unrelated +\begin_inset Quotes erd +\end_inset + + secondaries step by step, until the split brain is gone. + Don't +\family typewriter +join-resource +\family default + again before the split brain is gone! This way, all these replicas will + remain consistent for now, but of course outdated (or potentially even + a +\begin_inset Quotes eld +\end_inset + +wrong +\begin_inset Quotes erd +\end_inset + + split-brain version, but +\emph on +potentially usable +\emph default + in case you get under pressure in some way). + In the hopefully unlikely case that you should later discover that you + accidentally forced the +\emph on +wrong +\emph default + replica via +\family typewriter +primary --force +\family default +, you will have a chance to recover by either forcing the +\begin_inset Quotes eld +\end_inset + +correct +\begin_inset Quotes erd +\end_inset + + host to primary (if it did not already leave the resource), or by creating + a completely fresh resource out of the +\begin_inset Quotes eld +\end_inset + +correct +\begin_inset Quotes erd +\end_inset + + local disk. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize + Generally: in case of +\family typewriter +primary --force +\family default +, the preconditions are different. + The fetch +\emph on +must +\emph default + be switched off (see +\family typewriter +pause-fetch +\family default +), in order to get stable logfile positions. + See section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Forced-Switching" + +\end_inset + +. + For your safety, +\family typewriter +–force +\family default + does not work in newer marsadm (after mars0.1stable52) when your replica + is a current sync target. + More explanations see section +\begin_inset CommandInset ref +LatexCommand vref +reference "subsec:Forced-Switching" + +\end_inset + +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +secondary +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +almost +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the local +\family typewriter +/dev/mars/$dev_name +\family default + is no longer in use (e.g. + umounted). +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: There exists no designated primary any more. + During split brain and when the network is OK (again), all actual primaries + (including the local host) will leave primary ASAP (i.e. + when their +\family typewriter +/dev/mars/mydata +\family default + is no longer in use). + Any secondary will start following (old) logfiles (even from backlogs) + by replaying transaction logs if it is +\emph on +uniquely +\emph default + possible (which is often violated during split brain). + On any secondary, +\family typewriter +/dev/mars/$dev_name +\family default + will have disappeared. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize + Notice: in difference to DRBD, you +\series bold +don't need +\series default + this command during normal operation, including handover. + Any resource member which is +\emph on +not +\emph default + designated as primary will +\emph on +automatically +\emph default + go into secondary role. + For example, if you have +\begin_inset Formula $k=4$ +\end_inset + + replicas, only +\emph on +one of them +\emph default + can be designated as a primary. + When the network is OK, all other 3 nodes will know this fact, and they + will +\emph on +automatically +\emph default + go into secondary mode, following the transaction logs from the (new) primary. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + +\size scriptsize +Hint: avoid this command. + It turns off +\emph on +any +\emph default + primary, +\series bold +globally +\series default + +\begin_inset Foot +status open + +\begin_layout Plain Layout + +\size scriptsize +A serious +\series bold +misconception +\series default + among some people is when they believe that they can switch +\begin_inset Quotes eld +\end_inset + +a certain node to secondary +\begin_inset Quotes erd +\end_inset + +. + It is not possible to switch individual nodes to secondary, without affecting + other nodes! The concept of +\begin_inset Quotes eld +\end_inset + +designated primary +\begin_inset Quotes erd +\end_inset + + is +\series bold +global +\series default + throughout a resource! +\end_layout + +\end_inset + +. + You cannot start a sync after that (e.g. + +\family typewriter +invalidate +\family default + or +\family typewriter +join-resource +\family default + or +\family typewriter +resume-sync +\family default +), because it is +\emph on +not unique +\emph default + wherefrom the data shall be fetched. + In split brain situations (when the network is OK again), this may have + further drawbacks. + It is much better / easier to +\series bold +\emph on +directly +\emph default + switch the designated primary +\series default + from one node to another via the +\family typewriter +primary +\family default + command. + See also section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Forced-Switching" + +\end_inset + +. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\size scriptsize + There is only one valid use case where you +\emph on +really +\emph default + need this command: before finally destroying a resouce via the +\emph on +last +\emph default + +\family typewriter +leave-resource +\family default + (or the dangerous +\family typewriter +delete-resource +\family default +), you will need this before you can do that. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +wait-umount +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +See section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Waiting" + +\end_inset + +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +log-purge-all +\begin_inset CommandInset label +LatexCommand label +name "log-purge-all$res" + +\end_inset + + +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: none additionally. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: all locally known logfiles and version links are removed, + whenever they are not / no longer reachable by any split brain version. +\end_layout + +\begin_layout Plain Layout +Rationale: remove hindering split-brain / +\family typewriter +leave-resource +\family default + leftovers. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Use this only when split brain does not go away by means of +\family typewriter +leave-resource +\family default + (which +\emph on +could +\emph default + happen in very weird scenarios such as MARS running on virtual machines + doing a restore of their snapshots, or otherwise unexpected resurrection + of dead or half-dead nodes). +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + THIS IS POTENTIALLY DANGEROUS! +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +This command +\emph on +might +\emph default + destroy some valuable logfiles / other information in case the local informatio +n is outdated or otherwise incorrect. + MARS does its best for checking anything, but there is no guarantee. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Hint: use +\family typewriter +--dry-run +\family default + beforehand for checking! +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +resize +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +[$size] +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +almost +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: The local host must be primary. + All disks in the cluster participating in +\family typewriter +$res +\family default + must be physically larger than the logical resource size (e.g, by use of + +\family typewriter +lvm +\family default +; can be checked by macros +\family typewriter +%disk-size{} +\family default + and +\family typewriter +%resource-size{} +\family default +). + When the optional +\family typewriter +$size +\family default + argument is present, it must be smaller than the minimum of all physical + sizes, but larger than the current logical size of the resource. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: the logical size of +\family typewriter +/dev/mars/$dev_name +\family default + will reflect the new size after a while. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Subsection +Logfile Operations +\end_layout + +\begin_layout Standard + +\size scriptsize +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Command / Params +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Cmp +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Description +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +cron +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Do all necessary housekeeping tasks. + See +\family typewriter +log-rotate +\family default + and +\family typewriter +log-delete-all +\family default + for details. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +This should be regularly called by an external cron job or similar. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +log-rotate +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the local node +\family typewriter +$host +\family default + must be primary at +\family typewriter +$res +\family default +. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: after a while, a new transaction logfile +\family typewriter +/mars/resource-$res/log-$new_nr-$host +\family default + will be used instead of +\family typewriter +/mars/resource-$res/log-$old_nr-$host +\family default + where +\family typewriter +$new_nr +\family default + = +\family typewriter +$old_nr +\family default + + 1. + Without +\family typewriter +--force +\family default +, this will only carry out actions at the primary side since it makes no + sense on secondaries. + With +\family typewriter +--force +\family default +, secondaries are +\emph on +trying +\emph default + to +\emph on +remotely +\emph default + trigger a log-rotate, but without any guarantee (likely even a split-brain + may result instead, so use this only if you are +\emph on +really +\emph default + desperate). +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +log-delete +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the local node must be a member of +\family typewriter +$res +\family default +. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: when there exists some old transaction logfiles +\family typewriter +/mars/resource-$res/log-*-$some_host +\family default + which are no longer referenced by any of the symlinks +\family typewriter +/mars/resource-$res/replay-* +\family default + , those logfiles are marked for deletion in the whole cluster. + When no such logfiles exist, nothing will happen. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +log-delete-one +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the local node must be a member of +\family typewriter +$res +\family default +. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: when there exists an old transaction logfile +\family typewriter +/mars/resource-$res/log-$old_nr-$some_host +\family default + where +\family typewriter +$old_nr +\family default + is the minimum existing number and that logfile is no longer referenced + by any of the symlinks +\family typewriter +/mars/resource-$res/replay-* +\family default + , that logfile is marked for deletion in the whole cluster. + When no such logfile exists, nothing will happen. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +log-delete-all +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Alias for +\family typewriter +log-delete +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Subsection +Consistency Operations +\end_layout + +\begin_layout Standard + +\size scriptsize +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Command / Params +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Cmp +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Description +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +invalidate +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the local node must be in secondary role at +\family typewriter +$res +\family default +. + A +\emph on +designated +\emph default + primary must exist. + When having +\begin_inset Formula $k>2$ +\end_inset + + replicas, no split brain must exist (otherwise, or when +\family typewriter +invalidate +\family default + does not work in case of +\begin_inset Formula $k=2$ +\end_inset + +, use the +\family typewriter +leave-resource +\family default + ; +\family typewriter +join-resource +\family default + method described in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Split-Brain-Resolution" + +\end_inset + +). +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: the local disk is marked as inconsistent, and a fast fullsync + from the designated primary will start after a while. + Notice that +\family typewriter +marsadm {pause,resume}-sync +\family default + will influence whether the sync really starts. + When the fullsync has finished successfully, the local node will be consistent + again. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + +\size scriptsize +After a while, state +\family typewriter +Orphan +\family default + should be left. + Don't forget to regularly monitor for longer occurrences of +\family typewriter +Orphan +\family default +! +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +fake-sync +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the local node must be in secondary role at +\family typewriter +$res +\family default +. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: when a fullsync is running, it will stop after a while, and + the local node will be +\emph on +marked +\emph default + as consistent as if it were consistent again. +\end_layout + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + +\size scriptsize +ONLY USE THIS IF YOU REALLY KNOW WHAT YOU ARE DOING! +\begin_inset Newline newline +\end_inset + +See the WARNING in section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Creating-and-Maintaining" + +\end_inset + + +\begin_inset Newline newline +\end_inset + +Use this only +\emph on +before +\emph default + creating a fresh filesystem inside +\family typewriter +/dev/mars/$res +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +set-replay +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + +\size scriptsize +ONLY FOR ADVANCED HACKERS WHO KNOW WHAT THEY ARE DOING! +\begin_inset Newline newline +\end_inset + +This command is deliberately not documented. + You need the competence level RTFS ( +\begin_inset Quotes eld +\end_inset + +read the fucking sources +\begin_inset Quotes erd +\end_inset + +). +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Section +Further Operations +\end_layout + +\begin_layout Subsection +Inspection Commands +\end_layout + +\begin_layout Standard + +\size scriptsize +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Command / Params +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Cmp +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Description +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +view- +\emph on +macroname +\begin_inset Newline newline +\end_inset + + +\emph default + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Display the output of a macro evaluation. + See section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Inspecting-the-State" + +\end_inset + + for a thorough description. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +view +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Equivalent to +\family typewriter +view-default +\family default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +role +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Deprectated. + Use +\family typewriter +view-role +\family default + instead. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +state +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Deprectated. + Use +\family typewriter +view-state +\family default + instead. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +cstate +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Deprectated. + Use +\family typewriter +view-cstate +\family default + instead. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +dstate +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Deprectated. + Use +\family typewriter +view-dstate +\family default + instead. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +status +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Deprectated. + Use +\family typewriter +view-status +\family default + instead. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +show-state +\end_layout + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Deprectated. + Don't use it. + Use +\family typewriter +view-state +\family default + instead, or other macros. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +show-info +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Deprectated. + Don't use it. + Use +\family typewriter +view-info +\family default + instead, or other macros. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +show +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Deprectated. + Don't use it. + Use or implement some macros instead. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +show-errors +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Deprectated. + Use +\family typewriter +view-the-err-msg +\family default + or +\family typewriter +view-resource-err +\family default + similar macros. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +cat +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$file +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Write the file content to stdout, but replace all occurences of numeric + timestamps converted to a human-readable format. + Thus is most useful for inspection of status and log files, e.g. + +\family typewriter +marsadm cat /mars/5.total.log +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Subsection +Setting Parameters +\begin_inset CommandInset label +LatexCommand label +name "subsec:Setting-Parameters" + +\end_inset + + +\end_layout + +\begin_layout Subsubsection +Per-Resource Parameters +\end_layout + +\begin_layout Standard + +\size scriptsize +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Command / Params +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Cmp +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Description +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +set-emergency-limit $res +\emph on +n +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +The argument +\emph on +n +\emph default + must be percentage between 0 and 100 %. + When the remaining store space in +\family typewriter +/mars/ +\family default + undershoots the given percentage, the resource will go +\emph on +earlier +\emph default + into emergency mode than by the global computation described in section + +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Defending-Overflow" + +\end_inset + +. + 0 means unlimited. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +get-emergency-limit $res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Inquiry of the preceding value. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Subsubsection +Global Parameters +\end_layout + +\begin_layout Standard + +\size scriptsize +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Command / Params +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Cmp +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Description +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +set-sync-limit-value +\emph on +n +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Limit the concurrency of sync operations to some maximum number. + 0 means unlimited. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +get-sync-limit-value +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Inquiry of the preceding value. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +set-connect-pref-list host1,host2,hostn +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Set the order of preferences for connections when there are more than 2 + hosts participating in a cluster. + The argument must be comma-separated list of node names. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +get-connect-pref-list +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Inquiry of the preceding value. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Subsection +Waiting +\begin_inset CommandInset label +LatexCommand label +name "subsec:Waiting" + +\end_inset + + +\end_layout + +\begin_layout Standard + +\size scriptsize +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Command / Params +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Cmp +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Description +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +wait-cluster +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the +\family typewriter +/mars/ +\family default + filesystem must be mounted and it must contain a valid MARS symlink tree + produced by the other +\family typewriter +marsadm +\family default + commands. + The kernel module must be loaded. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: none. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Wait until +\emph on +all +\emph default + nodes in the cluster have sent a message, or until timeout. + The default timeout is 30 s (exceptionally) and +\size default + +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + +Be +\size scriptsize + may be changed by +\family typewriter + --timeout=$seconds +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +wait-resource +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +{is-,}{attach, +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + + primary, +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + + device}{-off,} +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: the local node must be a member of the resource +\family typewriter +$res +\family default +. + +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: none. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Wait until the local node reaches a specified condition on +\family typewriter +$res +\family default +, or until timeout. + The default timeout of 60 s may be changed by +\family typewriter + --timeout=$seconds +\family default +. + The last argument denotes the condition. + The condition is inverted if suffixed by +\family typewriter +-off +\family default +. + When preceded by +\family typewriter +is- +\family default + (which is the most useful case), it is checked whether the condition is + actually reached. + When the +\family typewriter +is- +\family default + prefix is left off, the check is whether another +\family typewriter +marsadm +\family default + command has been already given which +\emph on +tries +\emph default + to achieves the intended result (typicially, you may use this after the + +\family typewriter +is- +\family default + variant has failed). +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +wait-connect +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +almost +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +This is an alias for +\family typewriter +wait-cluster +\family default + waiting until only those nodes are reachable which belong to +\family typewriter +$res +\family default + (instead of waiting for the +\emph on +full +\emph default + cluster). +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +wait-umount +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$res +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Precondition: none additionally. +\end_layout + +\begin_layout Plain Layout + +\size scriptsize +Postcondition: the local +\family typewriter +/dev/mars/$dev_name +\family default + is no longer in use (e.g. + umounted). +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Subsection +Low-Level Expert Commands +\end_layout + +\begin_layout Standard +These commands are for experts and advanced sysadmins only. + The interface is not stable, i.e. + the meaning may change at any time. + Use at your own risk! +\end_layout + +\begin_layout Standard + +\size scriptsize +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Command / Params +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Cmp +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Description +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +set-link +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +RTFS. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +get-link +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +RTFS. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +delete-file +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +RTFS. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Standard +The following commands are for manual setup / repair of cluster membership. + Only to be used by experts who know what they are doing! In general, cluster-wi +de operations on IP addresses may need to be repeated at all hosts in the + cluster iff the communication is not (yet) possible and/or not (yet) actually + working (e.g. + firewalling problems etc). +\end_layout + +\begin_layout Standard + +\size scriptsize +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Command / Params +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Cmp +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Description +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "30col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +lowlevel-ls-host-ips +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "50col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +List all configured cluster members together with their currently configured + IP addresses, as known +\emph on +locally +\emph default +. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "30col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +lowlevel-set-host-ip +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$hostname +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$ip +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "50col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Change the assignment of IP addresses +\emph on +locally +\emph default +. + May be used when hosts are moved to different network locations, or when + different network interfaces are to be used for replication (e.g. + dedicated replication IPs). + Notice that the names of hosts must not change at all, only their IP addresses + may be changed. + Check active connections with +\family typewriter +netstat +\family default + & friends. + Updates may need some time to proceed (socket timeouts etc). +\begin_inset Newline newline +\end_inset + +Hint: for safety, call this on +\emph on +all +\emph default + members of a cluster to ensure consistency. + Otherwise it may happen that some cluster members do not know the +\emph on +new +\emph default + IP address where to fetch the +\emph on +new +\emph default + information from. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "30col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +lowlevel-delete-host +\begin_inset Newline newline +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +strut +\backslash +hfill +\end_layout + +\end_inset + +$hostname +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "50col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Remove a host from the cluster membership +\emph on +locally +\emph default +, together with its IP address assignment. + This does not remove any further information. + In particular, resource memberships are untouched. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Subsection +Senseless Commands (from DRBD) +\end_layout + +\begin_layout Standard + +\size scriptsize +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Command / Params +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Cmp +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Description +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +syncer +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +new-current-uuid +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +create-md +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +dump-md +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +dump +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +get-gi +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +show-gi +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +outdate +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +adjust +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +yes +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +Implemented as NOP (not necessary with MARS). +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +hidden-commands +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Subsection +Forbidden Commands (from DRBD) +\end_layout + +\begin_layout Standard +These commands are not implemented because they would be dangerous in MARS + context: +\end_layout + +\begin_layout Standard + +\size scriptsize +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Command / Params +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Cmp +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +Description +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +invalidate-remote +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +This would be too dangerous in case you have multiple secondaries. + A similar effect can be achieved with the +\family typewriter +--host= +\family default + option. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "20col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\family typewriter +\size scriptsize +verify +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +no +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size scriptsize +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "60col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout + +\size scriptsize +This would cause unintended side effects due to races between logfile transfer + / application and block-wise comparison of the underlying disks. + However, +\family typewriter +marsadm join-resource +\family default + or +\family typewriter +invalidate +\family default + will do the same as DRBD verify followed by DRBD resync, i.e. + this will automatically correct any found errors;. + Note that the fast-fullsync algorithm of MARS will minimize network traffic. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Section +The +\family typewriter +/proc/sys/mars/ +\family default + and other Expert Tweaks +\begin_inset CommandInset label +LatexCommand label +name "sec:The-/proc/sys/mars/-Expert" + +\end_inset + + +\end_layout + +\begin_layout Standard +In many case, you will not need to deal with tweaks in +\family typewriter +/proc/sys/mars/ +\family default + because everything should already default to reasonable predefined values. + This interface allows access to some internal kernel variables of the +\family typewriter +mars.ko +\family default + kernel module at +\emph on +runtime +\emph default +. + This means, the values will be reset to default at +\family typewriter +rmmod mars +\family default + or at reboot. + If you need some persistence, implement it by yourself, e.g. + at startup scripts. +\end_layout + +\begin_layout Standard + +\family typewriter +/proc/sys/mars/ +\family default + is +\emph on +not +\emph default + a stable interface. + It is not only specific for MARS, but may also change between releases + without notice. +\end_layout + +\begin_layout Standard +This section describes only those tweaks intended for sysadmins, not those + for developers / very deep internals. +\end_layout + +\begin_layout Subsection +Tuning Network Performance +\begin_inset CommandInset label +LatexCommand label +name "subsec:Tuning-Network-Performance" + +\end_inset + + +\end_layout + +\begin_layout Standard +Starting with MARS Light series 0.2, a new feature called +\begin_inset Quotes eld +\end_inset + +socket bundling +\begin_inset Quotes erd +\end_inset + + is available. +\end_layout + +\begin_layout Standard +It is mostly intendend for lines showing high packet loss. + By using multiple TCP sockets in parallel for emulating a single logical + connection, throughput can be significantly increased. +\end_layout + +\begin_layout Standard +Example for setting the socket parallelism to 4: +\end_layout + +\begin_layout Itemize + +\family typewriter +echo 4 > /proc/sys/mars/parallel_connections +\end_layout + +\begin_layout Standard +The following graphics shows the throughput of a non-fast +\begin_inset Foot +status open + +\begin_layout Plain Layout +The fast fullsync algorithm would not saturate the +\family typewriter +eth0 +\family default + link with traffic from a single resource. +\end_layout + +\end_inset + + fullsync of a +\emph on +single +\emph default + 100GiB resource over a loaded long-distance line between Europe/Germany + and USA/Midwest. + In order to compensate highly varying load at the line, all the experiments + were repeated more than 10 times and averaged. + Each bar shows the throughput for a particular socket parallelism. +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Graphics + filename images/socket-bundling-long-summary.png + width 70col% + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +Notice that the uplinks of the two servers are only 1 GBit/s respectively. + When the uplink is saturated, about 100 MByte/s is the maximum possible + peak throughput in theory. + You can easily recognize that the peak throughput is almost reached with + a parallelism degree of 2, but using even more sockets appears to be slightly + counter-productive. + One of the reasons is that more sockets will increase contention on the + line, and thus increasing packet loss. + Another potential reason is that higher parallelism at sockets will lead + to higher parallelism in disk reads, in turn leading to more permutations + of disk read positions (more +\emph on +random +\emph default + reads instead of purely sequential reads), which is counter-productive + for disk readahead strategies. +\end_layout + +\begin_layout Standard +The next graphics shows the same, but over a medium distance of about 50km. + This line is even more heavily loaded with respect to the number of TCP + connections running in parallel (probly some 10,000 or even 100,000 if + not more), and there is some kind of +\begin_inset Quotes eld +\end_inset + +traffic shaping +\begin_inset Quotes erd +\end_inset + + at some intermediate network gear which will +\begin_inset Quotes eld +\end_inset + +punish +\begin_inset Quotes erd +\end_inset + + those traffic sources disproportionally increasing overall packet loss. + This can explain the even higher counter-productive effect of using too + much sockets and thus injecting additional packet loss: +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Graphics + filename images/socket-bundling-short-summary.png + width 70col% + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +In general, the optimum value for +\family typewriter +/proc/sys/mars/parallel_connections +\family default + may depend on many runtime factors such as other load running over some + (parts of) physical equipment. + You will need to determine optimum values yourself. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Notice that socket bundling is conceptually the +\begin_inset Quotes eld +\end_inset + +opposite +\begin_inset Quotes erd +\end_inset + + of traffic shaping. + You are trying to get +\emph on +more +\emph default + bandwidth, at the cost of +\emph on +other +\emph default + traffic competing for the same network resources. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + If you are operating masses of servers, don't set the MARS socket parallelism + +\series bold +too high +\series default +everywhere. + You might +\begin_inset Quotes eld +\end_inset + +steal +\begin_inset Quotes erd +\end_inset + + too much bandwidth from other applications when starting masses of syncs + in parallel, e.g. + after an incident. + Best practice is to start with a default value of 1, and to increase it + only +\emph on +on demand +\emph default +, and/or preferably +\emph on +only +\emph default + at those servers where high load really occurs or where some urgent actions + need a +\emph on +temporary +\emph default + boost. +\end_layout + +\begin_layout Subsection +Syslogging +\end_layout + +\begin_layout Standard +All internal messages produced by the kernel module belong to one of the + following classes: +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +0 debug messages +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +1 info messages +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +2 warnings +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +3 error messages +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +4 fatal error messages +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +5 any message (summary of 0 to 4) +\end_layout + +\begin_layout Subsubsection +Logging to Files +\end_layout + +\begin_layout Standard +This feature will likely disappear when MARS goes to kernel upstream. + It was mostly intended for debugging during early beta phases and is no + longer needed for stable operation. + Developers may use it for spotting potential problems. +\end_layout + +\begin_layout Standard +The classes may be used to produce status files +\family typewriter +$class.*.status +\family default + in the +\family typewriter +/mars/ +\family default + and/or in the +\family typewriter +/mars/resource- +\emph on +mydata +\emph default +/ +\family default + directory / directories. +\end_layout + +\begin_layout Standard +When you create a file +\family typewriter +$class.*.log +\family default + in parallel to any +\family typewriter +$class.*.status +\family default +, the +\family typewriter +*.log +\family default + file will be appended forever with the same messages as in +\family typewriter +*.status +\family default +. + The difference is that *.status is regenerated anew from an empty starting + point, while *.log can (potentially) increase indefinitely unless you remove + it, or rename it to something else. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + +Beware, any permamently present +\family typewriter +*.log +\family default + file can easily fill up your +\family typewriter +/mars/ +\family default + partition until the problems described in section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Defending-Overflow" + +\end_inset + + will appear. + Use +\family typewriter +*.log +\family default + only for a +\series bold +limited time +\series default +, and +\series bold +only for debugging! +\end_layout + +\begin_layout Subsubsection +Logging to Syslog +\end_layout + +\begin_layout Standard +The classes also play a role in the following +\family typewriter +/proc/sys/mars/ +\family default + tweaks: +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +syslog_min_class +\family default + (rw) The +\emph on +mimimum +\emph default + class number for +\emph on +permanent +\emph default + syslogging. + By default, this is set to -1 in order to switch off perment logging completely. + Permament logging can easily flood your syslog with such huge amounts of + messages (in particular when class=0), that your system as a whole may + become unusable (because vital kernel threads may be blocked too long or + too often by the userspace syslog daemon). + Instead, please use the flood-protected syslogging described below! +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +syslog_max_class +\family default + (rw) The +\emph on +maximum +\emph default + class number for +\emph on +permanent +\emph default + syslogging. + Please use the flood-protected version instead. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +syslog_flood_class +\family default + (rw) The mimimum class of flood-protected syslogging. + The maximum class is always 4. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +syslog_flood_limit +\family default + (rw) The maxmimum number of messages after which the flood protection will + start. + This is a hard limit for the the number of messages written to the syslog. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +syslog_flood_recovery_s +\family default + (rw) The number of seconds after which the internal flood counter is reset + (after flood protection state has been reached). + When no new messages appear after this time, the flood protection will + start over at count 0. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +The rationale behind flood protected syslogging: sysadmins are usually only + interested in the point in time where some problems / incidents / etc have + +\emph on +started +\emph default +. + They are usually not interested in capturing +\emph on +each +\emph default + and +\emph on +every +\emph default + single error message (in particular when they are flooding the system logs). +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +If you +\emph on +really +\emph default + need complete error information, use the +\family typewriter +*.log +\family default + files described above, compress them and save them to somewhere else +\emph on +regularly +\emph default + by a cron job. + This bears much less overhead than filtering via the syslog daemon, or + even remote syslogging in real time which will almost surely screw up your + system in case of network problems co-inciding with flood messages, such + as caused in turn by those problems. + Don't rely on real-time concepts, just do it the old-fashioned batch job + way. +\end_layout + +\begin_layout Subsubsection +Tuning Verbosity of Logging +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +show_debug_messages +\family default + Boolean switch, 0 or 1. + Mostly useful only for developers. + This can easily flood your logs if our are not careful. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +show_log_messages +\family default + Boolean switch, 0 or 1. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +show_connections +\family default + Boolean switch, 0 or 1. + Show detailed internal statistics on sockets. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +show_statistics_local +\begin_inset space ~ +\end_inset + +/ +\begin_inset space ~ +\end_inset + +show_statistics_global +\family default + Only useful for kernel developers. + Shows some internal information on internal brick instances, memory usage, + etc. +\end_layout + +\begin_layout Subsection +Tuning the Sync +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +sync_flip_interval_sec +\family default + (rw) The sync process must not run in parallel to logfile replay, in order + to easily guarantee consistency of your disk. + If logfile replay would be paused for the full duration of very large or + long-lasting syncs (which could take some days over very slow networks), + your +\family typewriter +/mars/ +\family default + filesystem could overflow because no replay would be possible in the meantime. + Therefore, MARS regulary flips between actually syncing and actually replaying, + if both is enabled. + You can set the time interval for flipping here. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +sync_limit +\family default + (rw) When > 0, this limits the maximum number of sync processes actually + running parallel. + This is useful if you have a large number of resources, and you don't want + to overload the network with sync processes. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +sync_nr +\family default + (ro) Passive indicator for the number of sync processes currently running. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\family typewriter +sync_want +\family default + (ro) Passive indicator for the number of sync processes which +\emph on +demand +\emph default + running. +\end_layout + +\begin_layout Subsection +Lowlevel TCP Tuning (Networking Experts Only) +\begin_inset CommandInset label +LatexCommand label +name "subsec:TCP-Tuning" + +\end_inset + + +\end_layout + +\begin_layout Standard +When +\family typewriter +CONFIG_MARS_SEPARATE_PORTS +\family default + and +\family typewriter +CONFIG_MARS_IPv4_TOS +\family default + are enabled, MARS uses the following types of traffic: +\end_layout + +\begin_layout Description + +\family typewriter +MARS_TRAFFIC_META +\family default + (by default on port 7777 with +\family typewriter +IPTOS_LOWDELAY +\family default +) This can be tuned in directory +\family typewriter +/proc/sys/mars/tcp_tuning_0_meta_traffic/ +\family default +. +\end_layout + +\begin_layout Description + +\family typewriter +MARS_TRAFFIC_REPLICATION +\family default + (by default on port 7778 with +\family typewriter +IPTOS_RELIABILITY +\family default +) This can be tuned in directory +\family typewriter +/proc/sys/mars/tcp_tuning_1_replication_traffic/ +\family default +. +\end_layout + +\begin_layout Description + +\family typewriter +MARS_TRAFFIC_SYNC +\family default + (by default on port 7779 with +\family typewriter +IPTOS_MINCOST +\family default +) This can be tuned in directory +\family typewriter +/proc/sys/mars/tcp_tuning_2_sync_traffic/ +\family default +. + Attention: since the advent of +\family typewriter +DSCP +\family default +, this bit (hex +\family typewriter +0x2 +\family default + in host byte order) is suppressed by the kernel, and yields +\family typewriter +DS0 +\family default +. +\end_layout + +\begin_layout Standard +In each of these directories, the following tunables are available (only + for networking experts who know what they are doing): +\end_layout + +\begin_layout Description + +\family typewriter +ip_tos +\family default + As explained above. + Notice: hex constants from +\family typewriter +/usr/include/linux/ip.h +\family default + must be converted to decimal before forwarding to the +\family typewriter +/proc +\family default + interface. +\end_layout + +\begin_layout Description + +\family typewriter +tcp_window_size +\family default + Current default is 8 * 1024 * 1024. +\end_layout + +\begin_layout Description + +\family typewriter +tcp_nodelay +\family default + Current default is 0. +\end_layout + +\begin_layout Description + +\family typewriter +tcp_timeout +\family default + Current default is 2. +\end_layout + +\begin_layout Description + +\family typewriter +tcp_keepcnt +\family default + Current default is 3. +\end_layout + +\begin_layout Description + +\family typewriter +tcp_keepintvl +\family default + Current default is 3. +\end_layout + +\begin_layout Description + +\family typewriter +tcp_keepidle +\family default + Current default is 4. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Further tuning parameters are in the standard Linux kernel. + Notice that +\family typewriter +IP_TOS +\family default + is internally converted to +\family typewriter +DSCP +\family default +, which in turn can be further manipulated by +\family typewriter +netfilter +\family default + / +\family typewriter +iptables +\family default + and/or by +\family typewriter +qdisc +\family default + ( +\family typewriter +tc +\family default +) and/or by further (external) networking components. + The ancient TOS settings are meant as a default +\emph on +starting point +\emph default + for further customization to your needs. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Typically, +\emph on +public +\emph default + internet transports are flattening / ignoring or otherwise manipulating +\begin_inset Foot +status open + +\begin_layout Plain Layout +DSCP markings can be only made reliable on private networks (possibly requiring + some effort). + Public Internet service and transit providers do not necessarily treat + the TOS values or DSCP markings with any form of priority and may also + remove or change them without any notice. + Some internet service or transit providers also do use specific DSCP markings + to mark packets for being dropped, which may result in hard to find transmissio +n errors. +\end_layout + +\begin_layout Plain Layout +If want to use MARS on a public internet connection, you should use +\series bold +encrypted +\series default + +\series bold +VPN +\series default + with different DSCP markings, and coordinate them with your network services + provider. +\end_layout + +\end_inset + + the TOS / DSCP fields. + There it will not work. + Anyway, you should never route unencrypted MARS traffic over public transports, + for obvious security reasons. + Notice: MARS replication is meant for company- +\emph on +internal +\emph default + networks like +\emph on +internal +\emph default + +\series bold +replication networks +\series default + (or storage networks) where some networking department has control of. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Playing with the above settings can easily tear down your whole (replication) + network if you don't know exactly what you are doing. + Please test any changes in the lab first. + Mass rollout should be done in incremental phases, each in power of 10 + units. + There might be unexpected effects like packet storms, or packet loss, etc. + Some of these effects may only show up when a certain number of hosts is + exceeded, or when certain load conditions are hammering the overall Distributed + System. + Some very old routers / switches are known to break down unexpectedly when + overloaded in certain ways. + Be careful in a production environment! +\end_layout + +\begin_layout Chapter +Tips and Tricks +\end_layout + +\begin_layout Section +IO Performance Tuning +\begin_inset CommandInset label +LatexCommand label +name "sec:IO-Performance-Tuning" + +\end_inset + + +\end_layout + +\begin_layout Standard +There +\emph on +exist +\emph default + some use cases where MARS +\emph on +can +\emph default + deliver better IO performance than a raw block device. + However, this cannot be expected +\emph on +in general +\emph default +. + In some +\emph on +other +\emph default + cases the performance may be +\emph on +lower +\emph default + than with a +\emph on +single +\emph default + local raw device. +\end_layout + +\begin_layout Standard +For demonstration, we use the +\family typewriter +blkreplay +\family default + tool from +\begin_inset Flex URL +status open + +\begin_layout Plain Layout + +http://blkreplay.org +\end_layout + +\end_inset + + and a load which has been captured from a +\series bold +real datacenter +\series default + (1&1 Ionos ShaHoLin = Shared Hosting Linux). + The load already contains a parallelism degree of 20 LXC containers running + in parallel at the same iron. + This corresponds to about 60,000 web spaces running on 20 Apache instances, + already in parallel. + In difference to artificial benchmarks (like pure random IO or pure sequential + IO), this benchmark is much more close to real server operations, while + artificial benchmarks are not meaningful for practice in general, because + they can deviate from real server operations by +\emph on +factors +\emph default + or even by +\series bold +orders of magnitude +\series default +. +\end_layout + +\begin_layout Standard +In order to determine the limits of the test candidates, the timing of the + original workload was converted to a linear ramp-up, simulating an +\series bold +overloaded +\series default + system. + Otherwise benchmarking would not be possible. +\end_layout + +\begin_layout Standard +The following +\family typewriter +blkreplay +\family default + benchmarks were executed on an otherwise unloaded Dell R630 with 40 CPU + threads on 2 sockets, 192 GB RAM, a Dell R730 hardware RAID controller + with 2 GB BBU cache, and 10 spindles Dell 1.8 TB 2.5 inch SAS disks configured + as RAID-6. + All data, including the +\family typewriter +/mars +\family default + directory, was located on the hardware RAID via LVM2. + +\family typewriter +/dev/vginfong/lv-0 +\family default + was assigned a size of 8 TiB. + For testing, vanilla kernel 4.9.x with the MARS pre-patch and +\family typewriter +mars0.1astable72 +\family default + was used. +\end_layout + +\begin_layout Standard +The +\family typewriter +blkreplay +\family default + parameters were as follows: +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +output_label="MARS" +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\begin_layout Plain Layout + +# input description +\end_layout + +\begin_layout Plain Layout + +input_file_list="http://blkreplay.org/loads/natural/1and1/shared-hosting/2016/Sha +HoLin_from_bare_metal/x20/shaholin-x20-ramped/shaholin-x20.adjacent.ramped-100.load. +gz" +\end_layout + +\begin_layout Plain Layout + +replay_duration=110 +\end_layout + +\begin_layout Plain Layout + +speedup=10 +\end_layout + +\begin_layout Plain Layout + +threads=512 +\end_layout + +\begin_layout Plain Layout + +cmode=with-conflicts +\end_layout + +\begin_layout Plain Layout + +scheduler="noop" +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\begin_layout Plain Layout + +# hardware setup +\end_layout + +\begin_layout Plain Layout + +replay_host_list="icpu5133" +\end_layout + +\begin_layout Plain Layout + +replay_device_list="/dev/vginfong/lv-0" +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\begin_layout Plain Layout + +# output description +\end_layout + +\begin_layout Plain Layout + +enable_graph=1 +\end_layout + +\begin_layout Plain Layout + +graph_options="--no-static --dynamic" +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +We start with the +\series bold +raw +\series default + device +\family typewriter +/dev/vginfong/lv-0 +\family default + which had a size of 8 TiB. + The throughput is about 1418 IOPS, and the latency diagram shows that the + system is overloaded, but can cope with that overload: +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Graphics + filename images/blkreplay/MARS.MARS.raw.iosched-noop.nr_request-128.icpu5133.vginfong.lv-0.g01.latency.realtime.png + width 100col% + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +As you can see in the filename, the NOOP kernel IO scheduler was used, and + the kernel parameter +\family typewriter +nr_requests +\family default + was left at its default value of 128. + When you read the specs of the Dell R730 hardware RAID controller, you + will notice that it can handle a much higher IO request parallelism of + almost 1024 requests in parallel. +\end_layout + +\begin_layout Standard +So the first natural tuning attempt is +\family typewriter +nr_requests=1020 +\family default +, in order to release the +\begin_inset Quotes eld +\end_inset + +kernel IO handbrake +\begin_inset Quotes erd +\end_inset + +. + This results in an improved throughput of 1562 IOPS, and even the +\emph on +maximum +\emph default + latencies are improved, but the +\emph on +average +\emph default + latencies are becoming a little bit worse: +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Graphics + filename images/blkreplay/MARS.MARS.raw.iosched-noop.nr_request-1020.icpu5133.vginfong.lv-0.g01.latency.realtime.png + width 100col% + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +It is well known since decades that there is a principal tradeoff between + throughput and latencies in IO systems. + Thus it is not a surprising result. +\end_layout + +\begin_layout Standard +On servers, overload situations should be rare, and during overload throughput + is typically much more important than latencies, as long as latencies are + not exceedingly high. + Thus we can recommend +\family typewriter +nr_requests=1000 +\family default + for production. +\end_layout + +\begin_layout Standard +However, some sysadmins might be tempted to question why the NOOP scheduler + has been used. + On the internet, there are a ton of claims that CFQ is much better. +\end_layout + +\begin_layout Standard +Well, testing with CFQ instead of NOOP is no problem for +\family typewriter +blkreplay +\family default +. + However, the result is very surprising. + While the IOPS are 1539, which is only a slight decrease which could result + from measurement tolerances, the latencies are now turning almost into + a disaster: +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Graphics + filename images/blkreplay/MARS.MARS.raw.iosched-cfq.nr_request-1020.icpu5133.vginfong.lv-0.g01.latency.realtime.png + width 100col% + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +In production, you should never encounter IO latencies of almost 15 seconds. + So what is going wrong here? +\end_layout + +\begin_layout Standard +Here is an explanation. + A hardware RAID controller +\emph on +already +\emph default + has an +\emph on +internal +\emph default + IO scheduler. + This IO scheduler is hidden in a black box, such that many sysadmins don't + know of its existence. + If you add another IO scheduler at kernel level, you will have +\series bold +two different +\series default + IO schedulers running in parallel, and sometimes taking +\series bold +contradictory decisions +\series default +. +\end_layout + +\begin_layout Standard +These contradictory scheduling decisions may lead to problems in certain + cases and scenarios. +\end_layout + +\begin_layout Standard +While kernel-level IO schedulers like CFQ certainly have their merits at + improving your workstation's IO behaviour, they are counter-productive + at servers with hardware RAID controllers. +\end_layout + +\begin_layout Standard +So the advice is clear: +\series bold +switch them off +\series default + +\emph on +in such a case +\emph default +. +\end_layout + +\begin_layout Standard +Even if you have a software RAID, check with +\family typewriter +blkreplay +\family default + that any IO schedulers are +\emph on +really +\emph default + improving things. + When possible, use your real workload, captured with +\family typewriter +blktrace +\family default +. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Never use a benchmark which only delivers IOPS! As demonstrated, inappropriate + IOPS tuning (or choice of inappropriate components) can worsen latencies + so much that production can be endangered! +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + Always look at +\emph on +both +\emph default + IOPS +\emph on +and +\emph default + latencies! +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + +\emph on +Average +\emph default + latencies, even when enriched with +\emph on +standard deviation +\emph default +, are not enough. + Classical statistics does not clearly describe operational problems like + +\series bold +hangs +\series default + and +\series bold +exceptionally high latency requests +\series default +, which may occur only rarely, but can then lead to +\series bold +serious incidents +\series default +. + Use a tool which can clearly display +\emph on +any +\emph default + faulty behaviour, such as +\family typewriter +blkreplay +\family default +'s +\series bold +latency diagrams +\series default +! +\end_layout + +\begin_layout Standard +Now we come to benchmarking +\family typewriter +/dev/mars/lv-0 +\family default + placed on top of +\family typewriter +/dev/vginfong/lv-0 +\family default +. + Notice that MARS needs to write all write requests twice: once into the + transaction logfile, and a second time by writeback into +\family typewriter +/dev/vginfong/lv-0 +\family default +. +\end_layout + +\begin_layout Standard +So you might expect that performace of +\family typewriter +/dev/mars/lv-0 +\family default + could be worse than at the underlying raw device. +\end_layout + +\begin_layout Standard +Nevertheless, the +\series bold +throughput +\series default + is now measured 4338 IOPS, which means that performance has +\series bold +more than doubled +\series default +. + You can also see it by the duration of the benchmark at the x axis. + Even the latencies have improved in many cases: +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Graphics + filename images/blkreplay/MARS.MARS.mars.iosched-noop.nr_request-1020.icpu5133.mars.lv-0.g01.latency.realtime.png + width 100col% + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +How is it possible to be faster than a RAW device? How can this be explained? +\end_layout + +\begin_layout Standard +Look at the graphics and at the explanations from section +\begin_inset CommandInset ref +LatexCommand vref +reference "sec:The-Transaction-Logger" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. + The key to local IO performance is the +\series bold +re-ordering of writeback +\series default + according to ascending sector numbers. + This can reduce mechanical seek times of hard disks considerably, and even + by factors, such that it can over-compensate the doubled writes to the + transaction logfile, and even when both are residing at the same RAID set. +\end_layout + +\begin_layout Standard +Notice: this effect is not only dependent from total RAM size and from the + maximum size of the MARS temporary memory buffer (tuning parameter +\family typewriter +/proc/sys/mars/mars_mem_percent +\family default + which defaults to a limit of 20%). + It is also highly dependent from the actual seek behaviour of the +\series bold +workload +\series default +. +\end_layout + +\begin_layout Standard +For example, if you use +\family typewriter +dd +\family default + for sequentially overwriting /dev/mars/lv-0 with a parallelism degree of + 1, the writeback optimization of MARS cannot be exploited. + However, +\family typewriter +dd +\family default + is no appropriate benchmarking tool, and has almost nothing to do with + real workloads occuring in datacenters, which typically are neither sequential, + nor do they have a parallelism degree of only 1. + Please don't try to lead any discussions about this: simply use +\family typewriter +blktrace +\family default + to capture your real server workload, and compare it to a run of dd. + Only if you encounter the same behaviour as +\family typewriter +dd +\family default +, only then you can really claim that your workload is like +\family typewriter +dd +\family default +. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Any assumptions about workloads are very dangerous: they can deviate from + practice not only by factors, but sometimes even by +\emph on +orders of magnitude +\emph default +. +\end_layout + +\begin_layout Standard +Notice: the writeback optimization of MARS can typically only improve performanc +e of HDDs, but not of SSDs. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + By placing +\family typewriter +/mars +\family default + onto its own physical device with appropriate speed, you can compensate + the doubled writes to some degree. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + Depending on the workload and on RAID parameters, +\family typewriter +/mars +\family default + may be better placed onto SSDs, or better be placed on HDDs. + There is no general rule. + Just use +\family typewriter +blktrace +\family default + on your real workload, and check several configuration alternatives (also + different RAID levels etc) with +\family typewriter +blkreplay +\family default +. +\end_layout + +\begin_layout Section + +\family typewriter +systemd +\family default + Templates +\begin_inset CommandInset label +LatexCommand label +name "sec:systemd-Templates" + +\end_inset + + +\end_layout + +\begin_layout Standard +Starting with +\family typewriter +mars0.1stable57 +\family default + (resp. + +\family typewriter +mars0.1abeta9 +\family default +), you may use +\family typewriter +systemd +\family default + as a cluster manager at the Mechanics Layer as explained in section +\begin_inset CommandInset ref +LatexCommand vref +reference "subsec:Granularity-and-Layering" + +\end_inset + +. + MARS will replicate some +\family typewriter +systemd +\family default +-relevant state information across the (big) cluster, so there is some limited + remote operation support. + In particular, automated handover via +\family typewriter +marsadm primary $resource +\family default + is supported. + More features will be added to future releases. +\end_layout + +\begin_layout Subsection +Why +\family typewriter +systemd +\family default +? +\end_layout + +\begin_layout Standard +All major Linux distributions are now +\family typewriter +systemd +\family default + based. + It is the new quasi standard. + Although there have been some discussions in the community about its merits + and shortcomings, it appears to be accepted now in large parts of the Linux + world. +\end_layout + +\begin_layout Standard +Systemd has a few advantages: +\end_layout + +\begin_layout Enumerate +It is running as +\family typewriter +init +\family default + process under the reserved +\family typewriter +pid=1 +\family default +. + If it would ever die, then your system would die. + There is no need for adding a new MARS clustermanager daemon or similar, + which could fail independently from other parts of the system. +\end_layout + +\begin_layout Enumerate +Although +\family typewriter +systemd +\family default + has been criticised as being +\begin_inset Quotes eld +\end_inset + +monolithic +\begin_inset Quotes erd +\end_inset + + (referring to its internal software architecture), its +\emph on +usage +\emph default + by sysadmins is easily decomposable into many plugins called +\series bold +units +\series default +. +\end_layout + +\begin_layout Enumerate +Local LXC containers, local VMs, +\family typewriter +iSCSI +\family default + exports, +\family typewriter +nfs +\family default + exports and many other parts of the system are often already controlled + by +\family typewriter +systemd +\family default +. + Together with +\family typewriter +udev +\family default + and other parts, it already controls devices, LVM, mountpoints, etc. + Since MARS is only a particular +\emph on +component +\emph default + in a bigger complicated stack, it is an advantage to use the same (more + or less standardized and well-integrated) tools for managing the whole + stack. +\end_layout + +\begin_layout Standard +Systemd has also a few disadvantages: +\end_layout + +\begin_layout Enumerate +It is not accepted everywhere. + Therefore the +\family typewriter +systemd +\family default + template extensions of +\family typewriter +marsadm +\family default + are not mandatory for MARS operations. + You can implement your own alternatives when necessary. +\end_layout + +\begin_layout Enumerate +It can be messy to deal with. + In particular, it can sometimes +\emph on +believe +\emph default + that the system +\emph on +were +\emph default + in a particular state, although in reality it isn't. + Compensation is hairy. +\end_layout + +\begin_layout Enumerate +Usablility / reporting: it is less usable for getting an overview over a + bigger local system, and is practically unusable (out-of-the-box) for managing + a bigger cluster at cluster level. + Monitoring needs to be done separately. +\end_layout + +\begin_layout Subsection +Working Principle of the +\family typewriter +systemd +\family default + Template Engine +\begin_inset CommandInset label +LatexCommand label +name "subsec:Working-Principle-of" + +\end_inset + + +\end_layout + +\begin_layout Standard +Systemd already has some very basic templating capabilities. + It is possible to create unit names containing the +\family typewriter +@ +\family default + symbol, which can then be expanded under certain circumstances, e.g. + to tty names etc. + However, automatic expansion is only done when somebody knows the instance + name already +\emph on +in advance +\emph default +. + The author has not found any way for creating instance names out of +\begin_inset Quotes eld +\end_inset + +thin air +\begin_inset Quotes erd +\end_inset + +, such as from dynamically created MARS resource names. + Essentially, an +\emph on +inference machine +\emph default + for systemd templates does not yet exist. +\end_layout + +\begin_layout Standard +This lacking functionality is completed with the following macro processing + capabilities of +\family typewriter +marsadm +\family default +: +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +sloppy +\end_layout + +\end_inset + + Some ordinary or templated +\family typewriter +systemd +\family default + unit files (see +\family typewriter +man systemd.unit +\family default +) can be installed into one of the following directories: +\family typewriter +./systemd-templates +\family default +, +\family typewriter +$HOME/.marsadm/systemd-templates/ +\family default +, +\family typewriter +/etc/marsadm/systemd-templates/ +\family default +, +\family typewriter +/usr/lib/marsadm/systemd-templates/ +\family default +, +\family typewriter +/usr/local/lib/marsadm/systemd-templates/ +\family default +. + Futher places can be defined by overriding the $ +\family typewriter +MARS_PATH +\family default + environment variable. +\end_layout + +\begin_layout Standard +From these directories, ordinary systemd unit files will be just copied + into +\family typewriter +/run/systemd/system/ +\family default + (configurable via +\family typewriter +$SYSTEMD_TARGET_DIR +\family default +) and then picked up by +\family typewriter +systemd +\family default + as ordinary unit files. +\end_layout + +\begin_layout Standard +Template unit files are nothing but unit files containing +\family typewriter +@{ +\emph on +varname +\emph default +} +\family default + parts or other macro definitions in their filename, and possibly also in + their bodies, at arbitrary places. + These +\family typewriter +@{...} +\family default + parts are substituted by a +\family typewriter +marsadm +\family default + macro processing engine. +\end_layout + +\begin_layout Standard +The following macro capabilities are currently defined: +\end_layout + +\begin_layout Description + +\family typewriter +@{ +\emph on +varname +\emph default +} +\family default + Expands to the value of the variable. + This can be used both in template filenames and and in content of template + files. + Predefined are the following variables: +\end_layout + +\begin_deeper +\begin_layout Description + +\family typewriter +@{res} +\family default + The MARS resource name. +\end_layout + +\begin_layout Description + +\family typewriter +@{resdir} +\family default + The MARS resource directory +\family typewriter +/mars/resource-$res/ +\family default +. +\end_layout + +\begin_layout Description + +\family typewriter +@{host} +\family default + The local host name as determined by +\family typewriter +marsadm +\family default +, or as overridden by the +\family typewriter +--host= +\family default + parameter. +\end_layout + +\begin_layout Description + +\family typewriter +@{cmd} +\family default + The +\family typewriter +marsadm +\family default + command as given on the command line (only reasonable for debugging or + for error messages). +\end_layout + +\begin_layout Description + +\family typewriter +@{ +\emph on +varname +\emph default +} +\family default + Further variables as defined by the macro processor, see section +\begin_inset CommandInset ref +LatexCommand vref +reference "par:Predefined-Variables" + +\end_inset + +, and as definable by +\family typewriter +%let{ +\emph on +varname +\emph default +}{...} +\family default + statements, see also sections +\begin_inset CommandInset ref +LatexCommand vref +reference "subsec:Predefined-Complex-and" + +\end_inset + + and +\begin_inset CommandInset ref +LatexCommand vref +reference "subsec:Predefined-Trivial-Macros" + +\end_inset + +. +\end_layout + +\end_deeper +\begin_layout Description + +\family typewriter +@eval{ +\emph on +text +\emph default +} +\family default + Calls the MARS macro processor as explained in chapter +\begin_inset CommandInset ref +LatexCommand vref +reference "chap:The-Macro-Processor" + +\end_inset + +, and substitutes its output. + Notice that systemd template variables occurring in the macro processor + +\family typewriter +\emph on +text +\family default +\emph default + must be accessed via the macro processor syntax +\family typewriter +%{varname} +\family default +, because the macro processor uses +\family typewriter +% +\family default + as an escape symbol, while the systemd template engine uses +\family typewriter +@ +\family default + instead. + This is necessary for distinction of both layers. + Notice that variables defined via the macro processor syntax +\family typewriter +%let{varname}{value} +\family default + can be afterwards accessed by the template engine via +\family typewriter +@{varname} +\family default + syntax, once the macro engine has finished working on +\family typewriter +\emph on +text +\family default +\emph default +. +\end_layout + +\begin_layout Description + +\family typewriter +^ +\emph on +{varname +\emph default +} +\family default + +\begin_inset space ~ +\end_inset + +or +\begin_inset space ~ +\end_inset + +, +\family typewriter +^ +\emph on +{varname +\emph default +}{ +\emph on +regex +\emph default +} +\family default + This can be used in template filenames only. + The +\family typewriter +\emph on +regex +\family default +\emph default + denotes a delimiter for scanning the filename until the delimiter is reached. + The matching part of the filename is assigned to +\family typewriter +\emph on +varname +\family default +\emph default +, and can be used at any following +\family typewriter + @{ +\emph on +varname +\emph default +} +\family default + substitutions, both in the rest of the filename, and in the content of + the file. + When +\family typewriter +\emph on +regex +\family default +\emph default + is omitted or empty, it defaults to +\family typewriter +- +\family default + (a single minus symbol) which is suitable for matching paths of mountpoints + as written in systemd syntax. +\end_layout + +\begin_layout Description + +\family typewriter +@esc{ +\emph on +text +\emph default +} +\family default + Calls the +\family typewriter +systemd-escape +\family default + tool for conversion of pathnames following the +\family typewriter +systemd +\family default + naming conventions (see +\family typewriter +man systemd-escape +\family default +). + For example, a dash is converted to +\family typewriter + +\backslash +x2d +\family default +. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Omitting this can lead to problems when your resource names are containing + special characters like dashes or other special symbols (in the sense of + +\family typewriter +systemd +\family default +). + Bugs of this kind are hard to find and to debug. + Either forbid special characters in your installation, or don't forget + to test everything with some crude resource names! +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + Example snippet from a +\family typewriter +.path +\family default + unit. + Please notice where escaping is needed and where it must not be used (also + notice that a dash is sometimes a legal part of the +\family typewriter +.mount +\family default + unit name, but except from the resource name part): +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +[Path] +\end_layout + +\begin_layout Plain Layout + +PathExists=/dev/mars/@{res} +\end_layout + +\begin_layout Plain Layout + +Unit=vol-@escvar{res}.mount +\end_layout + +\end_inset + + +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Another source of crude bugs is the backslash character in the +\family typewriter +systemd-escape +\family default + substitution, such as from +\family typewriter + +\backslash +x2d +\family default +. + When passed to a shell, such as in certain +\family typewriter +ExecStart= +\family default + statements, the backslash will be removed. + Therefore, don't forget to either replace any single backslash with two + backslashes, or to put the whole pathname in single quotes, or similar. + Always check the result of your substitutions! It depends on the +\emph on +target +\emph default + (such as +\family typewriter +bash +\family default +, as opposed to +\family typewriter +systemd +\family default +) whether further escaping of the escapes is needed, or whether it +\emph on +must not +\emph default + be applied. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Become a master of the escaping hell by inserting debug code into your scripts + (reporting to +\family typewriter +/dev/stderr +\family default + or to log files) and do thorough testing like a devil. +\end_layout + +\begin_layout Description + +\family typewriter +@escvar{ +\emph on +varname +\emph default +} +\family default + Equivalent to +\family typewriter +@esc{@{ +\emph on +varname +\emph default +}} +\family default +. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +When creating a new resource via +\family typewriter +marsadm create-resource +\family default +, or when adding a new replica via +\family typewriter +marsadm join-resource +\family default + or similar, the template system will automatically create new instances + for the new resource or its replicas. + Conversely, +\family typewriter +marsadm leave-resource +\family default + and its friends like +\family typewriter +delete-resource +\family default + etc will automatically remove the corresponding template instances from + +\family typewriter +/run/systemd/system/ +\family default +. +\end_layout + +\begin_layout Subsection +Example +\family typewriter +systemd +\family default + Templates +\begin_inset CommandInset label +LatexCommand label +name "subsec:Example-systemd-Templates" + +\end_inset + + +\end_layout + +\begin_layout Standard +These can be found in the MARS repo in the +\family typewriter +systemd/ +\family default + subdirectory. + At the moment, the following are available (subject to further extension + and improvements without notice): +\end_layout + +\begin_layout Description + +\family typewriter +mars.path +\family default + This ensures that the mountpoint +\family typewriter +/mars/ +\family default + is already mounted before +\family typewriter +mars.service +\family default + is started. +\end_layout + +\begin_layout Description + +\family typewriter +mars.service +\family default + This starts and stops the MARS kernel module, provided that +\family typewriter +/mars +\family default + is (somehow) mounted. + The latter can be ensured by classical +\family typewriter +/etc/fstab +\family default + methods, or by +\family typewriter +.mount +\family default + units like your own hand-crafted +\family typewriter +mars.mount +\family default + unit. +\end_layout + +\begin_layout Description + +\family typewriter +mars-trigger.path +\family default + This is used for remote triggering of the marsadm template engine from + another MARS cluster member, e.g. + when initiating a handover. + Local triggering is also possible via +\family typewriter +touch /mars/userspace/systemd-trigger +\family default +. + When triggered, the command +\family typewriter +marsadm systemd-trigger +\family default + is executed. + In turn, this will re-compute all +\family typewriter +systemd +\family default + templates and start those units where the local host is in primary role. +\end_layout + +\begin_layout Description + +\family typewriter +dev-mars-@{res}.path +\family default + This is used for generic triggering of any +\family typewriter +systemd +\family default + unit as set by +\family typewriter +marsadm set-systemd-unit $res $unit +\family default + (see below in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Handover-using-systemd" + +\end_inset + +). +\end_layout + +\begin_layout Description + +\family typewriter +^{unit}-@{res}.mount +\family default + This is one of the possible sub-ordinate targets which depend on +\family typewriter +dev-mars-@{res}.path +\family default +. + For fully automatic activation of this target, use something like +\family typewriter +marsadm set-systemd-unit mydata vol-mydata.mount +\family default + or similar. + This will automatically mount +\family typewriter +/dev/mars/mydata +\family default + to the mountpoint +\family typewriter +/vol/mydata +\family default +. + Notice that the template notation +\family typewriter +^{unit} +\family default + can be used for mounting to an arbitrary mountpoint, such as +\family typewriter + /another/mountdir/mydata +\family default +, by using the corresponding systemd template syntax in +\family typewriter +marsadm set-systemd-unit mydata another-mountdir-mydata.mount +\family default +. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + In general, it is good practice to have a +\emph on +consistent +\emph default + name scheme. + Always use the same name for the underlying LV (called disk in MARS terminology +), equal to the MARS resource name, equal to the last part of the mountpoint, + equal to the IQN of an iSCSI export, equal to the NFS share name, equal + to the LXC container name, equal to the KVM/qemu virtual machine name, + and so on. + Messing around with non-systematic naming conventions can easily result + in a hell. +\end_layout + +\begin_layout Subsection +Handover involving +\family typewriter +systemd +\begin_inset CommandInset label +LatexCommand label +name "subsec:Handover-using-systemd" + +\end_inset + + +\end_layout + +\begin_layout Standard +First, you need to install your systemd templates into one of the template + directories mentioned in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Working-Principle-of" + +\end_inset + +. + In case you have never used the template engine before, you can create + the first instantiation via +\family typewriter +marsadm systemd-trigger +\family default +. + Afterwards, inspect +\family typewriter +/run/systemd/system/ +\family default + for newly created template instances and check them. +\end_layout + +\begin_layout Standard +For each resource +\family typewriter +$res +\family default +, you should set (potentially different) systemd targets via +\family typewriter +marsadm set-systemd-unit $res +\begin_inset Quotes eld +\end_inset + +$start_unit +\begin_inset Quotes erd +\end_inset + + +\begin_inset Quotes eld +\end_inset + +$stop_unit +\family default + +\begin_inset Quotes erd +\end_inset + +. + Notice that +\family typewriter +$start_unit +\family default + and +\family typewriter +$stop_unit +\family default + are typically denoting different targets (with few exceptions) for the + following reason: +\end_layout + +\begin_layout Description +Example: assume your stack consists of +\family typewriter +vol-@{res}.mount +\family default + and +\family typewriter +nfs-export-@{res}.service +\family default +. + Before the filesystem can be exported via +\family typewriter +nfs +\family default +, it +\emph on +first +\emph default + needs to be mounted. + At startup, +\family typewriter +systemd +\family default + can do this easily for you: just add a +\family typewriter +Requires= +\family default + dependency between both targets, or similar. + However, the situation can become tricky upon shutdown. + Theoretically, +\family typewriter +systemctl stop nfs-export-@{res}.service +\family default + +\emph on +could +\emph default + work in some cases, but in general it is not reliable. + Reason: there might be other +\emph on +sister +\emph default + units which +\emph on +also +\emph default + depend on the mount. + In some cases, you need not necessarily notice that sisters, because systemd + can add further (internal) targets +\emph on +automatically +\emph default +. + The problem is easily solvable by +\family typewriter +systemctl stop vol-@{res}.mount +\family default +, which will automatically tear down all dependencies in reverse order. +\end_layout + +\begin_layout Standard +For maximum safety, +\family typewriter +$start_unit +\family default + should always point at the +\emph on +tip +\emph default + of your stack, while +\family typewriter +$stop_unit +\family default + should point at the +\emph on +bottom +\emph default + (but one level higher than +\family typewriter +/dev/mars/$res +\family default +). +\end_layout + +\begin_layout Standard +Removing any systemd targets is also possible via +\family typewriter +marsadm set-systemd-unit $res +\begin_inset Quotes eld +\end_inset + + +\begin_inset Quotes erd +\end_inset + + +\family default + . + +\end_layout + +\begin_layout Standard +When everything is set up properly, the following should work: +\end_layout + +\begin_layout Enumerate +Issue +\family typewriter +marsadm primary $res +\family default + on another node which is currently in secondary role. +\end_layout + +\begin_layout Enumerate +As a consequence, +\family typewriter +systemctl stop +\begin_inset Quotes eld +\end_inset + +$stop_unit +\begin_inset Quotes erd +\end_inset + + +\family default + should be automatically executed at the old primary side. + +\end_layout + +\begin_layout Enumerate +After a while, the MARS kernel module will notice that +\family typewriter +/dev/mars/$res +\family default + is no longer opened. + You can check this manually via +\family typewriter + marsadm view-device-opened $res +\family default + which will tell you a boolean result. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + In case the device is not closed, ordinary handover cannot proceed, because + somebody could (at least potentially) write some data into it, even after + the handover, which would lead to a split brain. + Therefore MARS +\emph on +must +\emph default + insist that the device is closed before ordinary handover will proceed. + In case it is not closed, you can (a) use +\family typewriter +primary --force +\family default + which will likely provoke a split brain, or (b) check your +\family typewriter +systemd +\family default + configuration or other sources of error why the device is not closed. + Possible reasons could be hanging processes or hanging sessions which might + need a +\family typewriter +kill +\family default + or a +\family typewriter +kill -9 +\family default + or similar. + Notice that +\family typewriter +lsof +\family default + does not catch +\emph on +all +\emph default + possible sources like (recursive or bind-) mounts. +\end_layout + +\begin_layout Enumerate +Once +\family typewriter +/dev/mars/$res +\family default + has disappeared, the ordinary MARS handover from the old primary to the + new site should proceed as usual. +\end_layout + +\begin_layout Enumerate +After +\family typewriter +/dev/mars/$res +\family default + has appeared at the new site, +\family typewriter +systemctl start +\begin_inset Quotes eld +\end_inset + +$start_unit +\begin_inset Quotes erd +\end_inset + + +\family default + should be automatically executed. + +\end_layout + +\begin_layout Standard +The rest depends on your +\family typewriter +systemd +\family default + and its configuration. + For example, you can configure systemd targets for activation of VMs, or + for +\family typewriter +LXC +\family default + containers, or for +\family typewriter +iSCSI +\family default + exports, or for +\family typewriter +nfs +\family default + exports, or for +\family typewriter +glusterfs +\family default + exports, or for whatever you need. + For true geo-redundancy, you will likely have to include some +\family typewriter +quagga +\family default + or +\family typewriter +bird +\family default + or other BGP configurations into your stack. +\end_layout + +\begin_layout Section +Creating Backups via Pseudo Snapshots +\end_layout + +\begin_layout Standard +When all your secondaries are all homogenously located in a standby datacenter, + they will be almost idle all the time. + This is a waste of computing resources. +\end_layout + +\begin_layout Standard +Since MARS is no substitute for a full-fledged backup system, and since + backups may put high system load onto your active side, you may want to + utilize your passive hardware resources in a better way. +\end_layout + +\begin_layout Standard +MARS supports this thanks to its ability to switch the +\family typewriter +pause-replay +\family default + +\emph on +independently +\emph default + from +\family typewriter +pause-fetch +\family default +. +\end_layout + +\begin_layout Standard +The basic idea is simple: just use +\family typewriter +pause-replay +\family default + at your secondary site, but leave the replication of transaction logfiles + intact by deliberately +\emph on +not +\emph default + saying +\family typewriter +pause-fetch +\family default +. + This way, your secondary replica (block device) will stay frozen for a + limited time, without loosing your redundancy: since the transaction logs + will continue to replicate in the meantime, you can start +\family typewriter +resume-replay +\family default + at any time, in particular when a primary-side incident should happen unexpecte +dly. + The former secondary will just catch up by replaying the outstanding parts + of the transaction logs in order to become recent. +\end_layout + +\begin_layout Standard +However, some +\emph on +details +\emph default + have to be obeyed. + In particular, the current version of MARS needs an additional +\family typewriter +detach +\family default + operation, in order to release exclusive access to the underlying disk + +\family typewriter +/dev/lv/$res +\family default +. + Future versions of MARS are planned to support this more directly, without + need for an intermediate +\family typewriter +detach +\family default + operation. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + +Beware: +\family typewriter +mount -o ro /dev/vg/$res +\family default + can lead to +\series bold +unnoticed write operations +\series default + if you are not careful! Some journalling filesystems like +\family typewriter +xfs +\family default + or +\family typewriter +ext4 +\family default + may replay their journals onto the disk, leading to +\emph on +binary +\emph default + differences and thus +\series bold +destroying your consistency +\series default + later when you re-enable +\family typewriter +resume-replay +\family default +! +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Therefore, you may use small LVM snapshots (only in such cases). + Typically, +\family typewriter +xfs +\family default + journal replay will require only a few megabytes. + Therefore you typically don't need much temporary space for this. + Here is a more detailed description of steps: +\end_layout + +\begin_layout Enumerate + +\family typewriter +marsadm pause-replay $res +\end_layout + +\begin_layout Enumerate + +\family typewriter +marsadm detach $res +\end_layout + +\begin_layout Enumerate + +\family typewriter +lvcreate --size 100m --snapshot --name ro-$res /dev/vg/$res +\end_layout + +\begin_layout Enumerate + +\family typewriter +mount -o ro /dev/vg/ro-$res /mnt/tmp +\end_layout + +\begin_layout Enumerate +Now draw your backup from +\family typewriter +/mnt/tmp/ +\end_layout + +\begin_layout Enumerate + +\family typewriter +umount /mnt/tmp +\end_layout + +\begin_layout Enumerate + +\family typewriter +lvremove -f /dev/vg/ro-$res +\end_layout + +\begin_layout Enumerate + +\family typewriter +marsadm up $res +\end_layout + +\begin_layout Standard +Hint: during the backup, the transaction logs will accumulate on +\family typewriter +/mars/ +\family default +. + In order to avoid overflow of +\family typewriter +/mars/ +\family default + (c.f. + section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Defending-Overflow" + +\end_inset + +), don't unnecessarily prolong the backup duration. +\end_layout + +\begin_layout Chapter +LV Football / VM Football / Container Football +\begin_inset CommandInset label +LatexCommand label +name "chap:LV-Football" + +\end_inset + + +\end_layout + +\begin_layout Standard +The Football scripts can be obtained in two different ways: +\end_layout + +\begin_layout Enumerate + +\family typewriter +git clone --recurse-submodules https://github.com/schoebel/mars +\begin_inset Newline newline +\end_inset + + +\family default +then +\family typewriter +cd mars/football/ +\end_layout + +\begin_layout Enumerate + +\family typewriter +git clone https://github.com/schoebel/football +\end_layout + +\begin_layout Standard +The +\family typewriter +--recurse-submodule +\family default + method is the preferred way for non-developers because the main repo contains + a link to the right version of Football. +\end_layout + +\begin_layout Standard +When switching branches, you should use +\family typewriter +git submodule update +\family default + for synchronizing the Football submodule with the MARS main checkout. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Recommended MARS branch for playing Football is +\family typewriter +mars0.1a.y +\family default +. + Although the old stable branch +\family typewriter +mars0.1.y +\family default + has been updated for the most important +\family typewriter +marsadm +\family default + features +\family typewriter +merge-cluster +\family default + and +\family typewriter +split-cluster +\family default +, it does not scale well for Football and can cause operational problems + when merging too many hosts together, showing some +\begin_inset Formula $O(n^{2})$ +\end_inset + + metadata update behaviour where +\begin_inset Formula $n$ +\end_inset + + is the number of machines in a MARS cluster. + The future branch +\family typewriter +mars0.1b.y +\family default + will contain more scalability improvements; in particular the +\family typewriter +split-cluster +\family default + operation should no longer be needed at all because it is planned to scale + with +\begin_inset Formula $O(k)$ +\end_inset + + where +\begin_inset Formula $k$ +\end_inset + + is the number of resources at a +\emph on +single +\emph default + host. + This should allow creation of a +\emph on +virtual(!) +\emph default + +\family typewriter +BigCluster +\family default + pool at +\emph on +metadata +\emph default + level (where metadata transfer rates are typically measured in KiB/s), + consisting of thousands of machines, while at the same time creating a + +\family typewriter +LocalSharding +\family default + or +\family typewriter +FlexibleSharding +\family default + model at the realtime IO paths (where some petabytes are pumped through + thick pipelines). + Please check the other branches regularly at the github repo whether some + newer branches will be marked +\begin_inset Quotes eld +\end_inset + +stable +\begin_inset Quotes erd +\end_inset + +, or at least +\begin_inset Quotes eld +\end_inset + +beta +\begin_inset Quotes erd +\end_inset + +. + At the moment (spring 2018), +\family typewriter +mars0.1a.y +\family default + is marked +\begin_inset Quotes eld +\end_inset + +beta +\begin_inset Quotes erd +\end_inset + + although it is in production at several thousands of machines for several + months. +\end_layout + +\begin_layout Standard +Low-level documentation is available by calling any of the scripts with + +\family typewriter +--help +\family default + parameter (see also appendix +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:football-–help" + +\end_inset + + ff): +\end_layout + +\begin_layout Itemize + +\family typewriter +./football.sh --help +\end_layout + +\begin_layout Itemize + +\family typewriter +./screener.sh --help +\end_layout + +\begin_layout Standard +By adding +\family typewriter +--verbose +\family default +, you can get a list of parameters for configuring and tweaking. +\end_layout + +\begin_layout Section +Football Overview +\begin_inset CommandInset label +LatexCommand label +name "sec:Football-Overview" + +\end_inset + + +\end_layout + +\begin_layout Standard +Topmost architectural level (not yet implemented): +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Graphics + filename images/pool-optimizer.fig + width 100col% + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +The planned heart of the Football system is the generic pool optimizer, + which aims to provide a similar functionality than Kubernetes, but working + on a sharding architecture. + Instead of controlling +\emph on +stateless +\emph default + Docker containers, its designated goal is to control masses of LVs on thousands + of machines, creating a +\begin_inset Quotes eld +\end_inset + +Virtually Distributed LVM pool +\begin_inset Quotes erd +\end_inset + + (petabytes of total storage), and doing similar things than Software Defined + Storage (SDS) on the virtual pool. +\end_layout + +\begin_layout Standard +In addition to load balancing of storage space (and its special cases like + hardware lifecycle), there will be designated plugins for dealing with + CPU and RAM dimensions. + Further dimensions and a variety of goal functions could be added via future + plugins. + The optimizer itself aims to be as generic as possible, while functionality + and interfaces can be added via plugins and/or drivers. + Future versions might even support DRBD in addition to MARS. + The first version may use a simple greedy algorithm for solving the underlying + +\begin_inset Formula ${\cal NP}$ +\end_inset + +-complete problem, but could be augmented with more sophisticated problem + solvers in future. +\end_layout + +\begin_layout Standard +The automatic operations generated by pool-optimizer will be customizable + by dozens of parameters, and also extendable by action plugins. + At the moment, the following +\family typewriter +football.sh +\family default + actions are planned: +\end_layout + +\begin_layout Description + +\family typewriter +migrate +\family default + This will move an LV (together with its VM / LXC container / etc) to a + different machine in the machine pool. + This is the classical Football +\begin_inset Quotes eld +\end_inset + +kick +\begin_inset Quotes erd +\end_inset + + operation. +\end_layout + +\begin_layout Description + +\family typewriter +shrink +\family default + This decreases the occupied LV space of a filesystem (currently only +\family typewriter +xfs +\family default + implemented, but easily extendable) via creation of a smaller temporary + LV at the hypervisor, then transferring all data during operations via + local +\family typewriter +rsync +\family default +, then shutting down the VM for a short period, doing a final incremental + +\family typewriter +rsync +\family default +, renaming the copied temporary LV to its original name, restarting the + VM on the new version (which contains the same data as before but wastes + less space), and finally re-establishing the MARS replicas (but of course + with smaller LV size). +\end_layout + +\begin_layout Description + +\family typewriter +extend +\family default + This is much easier than shrinking: it first increases the underlying LV + size dynamically on all replicas, then +\family typewriter +marsadm resize +\family default +, and finally calls +\family typewriter +xfs_growfs +\family default + while the filesystem remains mounted and while the VM / container is running. +\end_layout + +\begin_layout Description + +\family typewriter +migrate+shrink +\family default + Similar to +\family typewriter +migrate +\family default + immediately followed by +\family typewriter +shrink +\family default +, but produces less network traffic and runs faster. +\end_layout + +\begin_layout Description + +\family typewriter +migrate+shrink+back +\family default + Use this when there is not enough local temporary space for shrinking. + The LV is first migrated to a temporary host, then shrunk, and finally + migrated back to its original position. +\end_layout + +\begin_layout Standard +By running the overall system in an endless loop, a control loop for permanent + optimization can be established. + Typical periods are each few days, or once a week. + In addition, manual triggering is also possible. +\end_layout + +\begin_layout Standard +The result of an (incremental) pool-optimizer run is a CSV file, which may + be automatically forwarded to the execution engine +\family typewriter +football.sh +\family default + for +\emph on +manual +\emph default + execution, or to +\family typewriter +screener.sh +\family default + for mass execution on a common control machine. + Alternatively, intermediate steps like manual checking, filtering etc may + be inserted into the processing pipeline. +\end_layout + +\begin_layout Standard +The excecution engine +\family typewriter +football.sh +\family default + rep. + its 1&1-internal variant +\family typewriter +tetris.sh +\family default + is already in production at 1&1, and already reached more than 300 migrations + per week. + Architecture of the execution engine: +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Graphics + filename images/football.fig + width 90col% + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +The so-called Screener is simply a generic program allowing mass execution + of arbitrary scripts in background +\family typewriter +screen +\family default + sessions. + This allows masses (several hundreds, possibly thousands) of long-lasting + processes (hours or days) to run +\emph on +unattended +\emph default + in background, while allowing a (larger) group of sysadmins to attach / + detach to +\family typewriter +screen +\family default + sessions at any time for corrective by-hand actions, e.g. + in case of failures or other problems, or for supervision, etc. +\end_layout + +\begin_layout Standard +When Screener is combined with the Football execution engine +\family typewriter +football.sh +\family default +, more specialized functionality is available (via a variety of plugins): +\end_layout + +\begin_layout Itemize +Optional waiting for sysadmin confirmation before some customer downtime + is initiated. +\end_layout + +\begin_layout Itemize +Automatic generation of +\family typewriter +motd +\family default + status reporting to other sysadmins. +\end_layout + +\begin_layout Itemize +Automatic sending of email alerts or status reports, e.g. + on errors or critical errors, etc. + By sending email to SMS gateways, real-time alerting can be configured + (e.g. + over the weekend). +\end_layout + +\begin_layout Itemize +Generic interfacing to external scripts with configurable parameters, e.g. + for triggering monitoring systems, feeding external databases, etc. +\end_layout + +\begin_layout Standard +Screener can detect and will automatically manage the following states (in + this example, all state lists are empty): +\end_layout + +\begin_layout Standard +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +$common_user> ./screener.sh list +\end_layout + +\begin_layout Plain Layout + +List of waiting: +\end_layout + +\begin_layout Plain Layout + +List of delayed: +\end_layout + +\begin_layout Plain Layout + +List of condition: +\end_layout + +\begin_layout Plain Layout + +List of running: +\end_layout + +\begin_layout Plain Layout + +List of critical: +\end_layout + +\begin_layout Plain Layout + +List of serious: +\end_layout + +\begin_layout Plain Layout + +List of interrupted: +\end_layout + +\begin_layout Plain Layout + +List of illegal: +\end_layout + +\begin_layout Plain Layout + +List of failed: +\end_layout + +\begin_layout Plain Layout + +List of timeouted: +\end_layout + +\begin_layout Plain Layout + +List of done: +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +Screener can discriminate the +\emph on +seriosity +\emph default + of errors as follows: +\end_layout + +\begin_layout Description + +\family typewriter +failed +\family default + An error occurred +\emph on +outside +\emph default + of critical sections, e.g. + during preparation of LV space etc. + During ordinary operations, VMs / containers are usually running continuously, + and there is no customer impact to be expected. + Typically, +\family typewriter +./screener.sh restart $resource +\family default + should fix the problem if it is only a temporary problem. + However, for maximum safety, manual inspection via . +\family typewriter +/screener.sh attach $resource +\family default + or inspection of the logfile via . +\family typewriter +/screener.sh show $resource +\family default + is recommended before trying an automatic restart. +\end_layout + +\begin_layout Description + +\family typewriter +serious +\family default + An error occured while a VM / container was temporarily stopped, which + +\series bold +would +\series default + normally lead to customer downtime, but Football was able to +\emph on +compensate +\emph default + the problem +\emph on +for now +\emph default + by +\emph on +automatically +\emph default + restarting the VM. + Thus no long-lasting customer impact has likely occurred. + However, manual inspection and repair by sysadmins is likely necessary. +\end_layout + +\begin_layout Description + +\family typewriter +critical +\family default + An +\emph on +uncompensated +\emph default + error occured during customer downtime. + The VM / container is likely down. + This will need manual sysadmin actions ASAP, such as hardware replacement, + networking fixes, etc. +\end_layout + +\begin_layout Description + +\family typewriter +timeouted +\family default + This means that the script is assumed to hang because it did not produce + any output for more than +\family typewriter +$session_timeout +\family default + seconds (default 3600 * 3 = 3 hours). +\end_layout + +\begin_layout Description + +\family typewriter +illegal +\family default + This means that a precondition is not met. + For example, there is not enough space at the target LVM. +\end_layout + +\begin_layout Description + +\family typewriter +interrupted +\family default + Somebody has pressed +\family typewriter +Ctl-c +\family default + in a +\family typewriter +screen +\family default + session, or has otherwise sent a signal to the running script. + As a result, a signal +\family typewriter +trap +\family default + has been executed. +\end_layout + +\begin_layout Standard +\noindent +Ordinary Screener states during execution: +\end_layout + +\begin_layout Description + +\family typewriter +running +\family default + This means that a (background) process is currently running. + You can attach to the screen session either manually via +\family typewriter +screen -x $pid.$resource +\family default +, or more comfortably via +\family typewriter +./screener.sh attach $resource +\family default +. + Then you can use +\family typewriter +screen +\family default + as documented in +\family typewriter +man screen +\family default +. + The most important operation is detaching via keystrokes +\family typewriter +Ctrl-a d +\family default +. + +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + +Notice: don't press +\family typewriter +Ctrl-c +\family default + unless you know what you are doing. + In most cases, this will terminate the running process, and in consequence + lead to +\family typewriter +\series bold +interrupted +\family default +\series default + or +\family typewriter +\series bold +failed +\family default +\series default + or even +\family typewriter +\series bold +critical +\family default +\series default + state (depending on the moment of keypress). + Depending on parameter +\family typewriter +drop_shell +\family default +, the Screener session will also terminate, or you will get an interactive + shell for manual repair. +\end_layout + +\begin_layout Description + +\family typewriter +waiting +\family default + When the plugins +\family typewriter +football-waiting +\family default + and +\family typewriter +screener-waiting +\family default + are configured properly (which is +\emph on +not +\emph default + the default), the script execution will pause immediately before a customer + downtime action would be started. + Now any sysadmin from the larger group has a chance to +\family typewriter +./screener attach $resource +\family default + and to press RETURN to continue the waiting script and to personally watch + the course of the critical section. + There are some more comfortable variants like +\family typewriter +./screener continue $resource +\family default + for background continuation of a single session, or +\family typewriter +./screener continue 100 +\family default + which can be used for continuing masses of waiting sessions. + There are further variants which are automatically attaching to sessions, + see Appendix +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:screener–help" + +\end_inset + +. +\end_layout + +\begin_layout Description + +\family typewriter +delayed +\family default + This state is only entered before +\family typewriter +lvremove $resource +\family default + is executed (which will destroy your old internal backup copy), and when + configured appropriately. + Typically, you also need to configure the +\family typewriter +$wait_before_cleanup +\family default + variable in order to avoid endless waiting. + Notice that old LV data gets soon outdated after a while, so please don't + unnecessarily prolong the running time of your scripts by choosing too + long +\family typewriter +$wait_before_cleanup +\family default + values. +\end_layout + +\begin_layout Description + +\family typewriter +condition +\family default + Special case of delay: some condition is currently not met, such as the + +\family typewriter +$business_hours +\family default + feature, where you can configure when customer downtimes are allowed, and + when not. +\end_layout + +\begin_layout Description + +\family typewriter +done +\family default + This means that the script reported successful execution by exit status + +\family typewriter +0 +\family default +. + The background screen session terminated automatically. + You can inspect the logfile manually via +\family typewriter +./screener.sh show $resource +\family default +, or by looking into the directory +\family typewriter +$screener_logdir/done/ +\family default +. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Logfiles of other states can also be inspected (or monitored by standard + tools like +\family typewriter +grep +\family default +) by looking into sister directories, such as +\family typewriter +$screener_logdir/running/ +\family default +. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + +When running Screener for several months or years, old logfiles will accumulate + in these directories over time. + Call +\family typewriter +./screener.sh purge +\family default + or +\family typewriter +./screener.sh cron +\family default + regularly via a cron job, or archieve your old logfiles from time to time + via another method. +\end_layout + +\begin_layout Section +HOWTO instantiate / customize Football +\begin_inset CommandInset label +LatexCommand label +name "sec:HOWTO-instantiate-Football" + +\end_inset + + +\end_layout + +\begin_layout Standard +In order to install and operate Football, the recommended +\emph on +deployment +\emph default + strategy is bottom-up, layer by layer. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Top-down strategies should be used +\emph on +only +\emph default +, and +\emph on +only +\emph default +, for planning. + An Egyptian pyramid can never be built, even if you had some billions of + workers, by starting at the tip and by creating the foundations as the + very last step. + Suchalike attempt would end up in a disaster. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + + +\series bold +Testing +\series default + of each layer +\series bold +separately +\series default + is very important. + Before proceeding to the next higher layer, first ensure that any lower + layer is working +\emph on +correctly +\emph default +. + Otherwise debugging can become tricky. +\end_layout + +\begin_layout Subsection +Block Device Layer +\end_layout + +\begin_layout Standard +Step-by-step instructions can be found in chapter +\begin_inset CommandInset ref +LatexCommand vref +reference "chap:Quick-Start-Guide" + +\end_inset + +. +\end_layout + +\begin_layout Standard +Please ensure that your hardware (including RAID controllers and LVM and + so on), and your operating system, and your network / setup, and MARS is + working correctly before proceeding to the next layer. +\end_layout + +\begin_layout Subsection +Mechanics Layer of Cluster Operations +\begin_inset CommandInset label +LatexCommand label +name "subsec:Mechanics-Layer of Cluster" + +\end_inset + + +\end_layout + +\begin_layout Standard +In the following example, it is assumed that +\family typewriter +systemd +\family default + is used, as explained in section +\begin_inset CommandInset ref +LatexCommand vref +reference "sec:systemd-Templates" + +\end_inset + +, and now applied to +\family typewriter +vm4711 +\family default + supposed to run on hypervisors +\family typewriter +hyper1234a +\family default + (primary role) and +\family typewriter +hyper1234b +\family default + (secondary role), which is assumed to be controllable via the following + +\family typewriter +systemd +\family default + start and stop units: +\end_layout + +\begin_layout Itemize + +\family typewriter +marsadm set-systemd-unit vm4711 lxc-vm4711.target vol-vm4711.mount +\end_layout + +\begin_layout Standard +Test the cluster mechanics layer like in the following example: +\end_layout + +\begin_layout Itemize +On host +\family typewriter +hyper1234b +\family default +, the following must work: +\family typewriter +marsadm primary vm4711 +\end_layout + +\begin_layout Standard +This must result in an automatic handover of +\family typewriter +vm4711 +\family default + from the current primary site +\family typewriter +hyper1234a +\family default + to the new primary +\family typewriter +hyper1234b +\family default +, as explained in section +\begin_inset CommandInset ref +LatexCommand vref +reference "sec:systemd-Templates" + +\end_inset + +. + Please check that +\family typewriter +vm4711 +\family default + is running correctly at the new location. + It must be reachable via network. + In case you are using BGP because +\family typewriter +hyper1234a +\family default + and +\family typewriter +hyper1234b +\family default + are located in different datacenters, ensure that BGP is also controlled + by your +\family typewriter +systemd +\family default + unit dependencies, and test it. +\end_layout + +\begin_layout Subsection +Mechanics Layer of Football Operations +\begin_inset CommandInset label +LatexCommand label +name "subsec:Mechanics-Layer-of-Football" + +\end_inset + + +\end_layout + +\begin_layout Standard +At the moment, there are two alternative plugins already implemented in + the Football sub-project (see subdirectory +\family typewriter +football/plugins/ +\family default +). + Of course, you can implement some further plugins. + Please put them under GPL, and share them. + Please contact the author of MARS for inclusion into the official MARS + release. +\end_layout + +\begin_layout Description + +\family typewriter +football-cm3.sh +\family default + This plugin can be only used at Shared Hosting Linux (ShaHoLin) at 1&1, + since it is bound to a specific +\emph on +proprietary +\emph default + instance. + However, the +\emph on +sourcecode +\emph default + of the +\emph on +plugin +\emph default + itself (not the code called by the plugin, e.g. + over REST interfaces) is under GPL. + You can (and +\emph on +should +\emph default +) +\emph on +inspect +\emph default + the plugin code, and +\series bold +learn +\series default + how a real-world system (which has grown over some decades and bears a + lot of history) is actually working at certain points. +\begin_inset Newline newline +\end_inset + +This plugin is automatically activated when called via the symlink +\family typewriter +tetris.sh +\family default + instead of directly calling +\family typewriter +football.sh +\family default +. + This has historic reasons. +\end_layout + +\begin_layout Description + +\family typewriter +football-basic.sh +\family default + This plugin uses the new +\family typewriter +systemd +\family default + interface of +\family typewriter +marsadm +\family default + for controlling the mechanics. + See section +\begin_inset CommandInset ref +LatexCommand vref +reference "sec:systemd-Templates" + +\end_inset + +. + You should be familiar with commands like +\family typewriter +marsadm set-systemd-unit +\family default +. + Manual handover via +\family typewriter +marsadm primary $resource +\family default + must be already working (with high reliability +\begin_inset Formula $\leadsto$ +\end_inset + + check that any +\family typewriter +umount +\family default + works everywhere without hangups) before you can start using this plugin + for +\family typewriter +football.sh +\family default +. +\begin_inset Newline newline +\end_inset + +This plugin is automatically activated when calling football.sh. + It can be deactivated by overriding variable +\family typewriter +enable_basic +\family default +=0. +\end_layout + +\begin_layout Subsubsection +Configuring and Overriding Variables +\end_layout + +\begin_layout Standard +A detailed list of all available customization options can be obtained via + +\family typewriter +./football.sh --help --verbose +\family default +. + Each option is documented by some help text, and you can always see the + default settings. + See also section +\begin_inset CommandInset ref +LatexCommand vref +reference "sec:football-help-verbose" + +\end_inset + +. +\end_layout + +\begin_layout Standard +If you create any new plugin for Football, or if you modify an existing + one, please follow these standards. + Try to describe any option as concisely as possible. +\end_layout + +\begin_layout Standard +Configuring is possible in the following ways, in order of precedence: +\end_layout + +\begin_layout Itemize +at the command line via +\family typewriter +./football.sh --$variable_name=$value $arguments +\family default +. +\end_layout + +\begin_layout Itemize +via environment variables, e.g. + globally via +\family typewriter +export $variable_name=$value && ./football.sh $arguments +\family default +, or locally via +\family typewriter +$variable_name=$value ./football.sh $arguments +\family default +. +\end_layout + +\begin_layout Itemize +by adding some small +\family typewriter +football-*.conf +\family default + files into one of the directories +\family typewriter +/usr/lib/mars/plugins +\family default + +\family typewriter +/etc/mars/plugins +\family default + +\family typewriter +$script_dir/plugins +\family default + +\family typewriter +$HOME/.mars/plugins +\family default + +\family typewriter +./plugins +\family default +, in this order of precedence. + This list of directories can be modifed externally over the environment + variable +\family typewriter +football_includes +\family default + (but not during already running inclusions of +\family typewriter +football-*.conf +\family default + files). +\end_layout + +\begin_layout Subsubsection + +\family typewriter +football-basic.sh +\family default + Customization +\end_layout + +\begin_layout Standard +Here is a brief summary of the most important configuration tasks and options: +\end_layout + +\begin_layout Description + +\family typewriter +initial_hostname_file +\family default + Somehow, the +\family typewriter +football-basic.sh +\family default + plugin must know the hostnames of your pool. + Once Football is working, the hostname will be +\emph on +automatically +\emph default + maintained whenever +\family typewriter +marsadm join-cluster +\family default + or +\family typewriter +marsadm merge-cluster +\family default + is executed somewhere. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +For your hardware deployment strategy, this means the following: just deploy + any new hardware, or remove your old one (after Football has emptied all + of your former LV resources). + It does not matter how you are doing this, e.g. + via OpenStack, or via the proprietary +\family typewriter +Schlunix +\family default + methods used at ShaHoLin, or whatever. + Then you have the following options for adding the new machines to the + Football hostname cache (see variable +\family typewriter +hostname_cache +\family default +): +\end_layout + +\begin_deeper +\begin_layout Enumerate +Write the pure hostname(s) into the file as configured with +\family typewriter +initial_hostname_file +\family default + (by default: +\family typewriter +./hostnames.input +\family default +). + Each hostname must be on its own ASCII line. + Not only these new hosts will be picked up automatically, but also... +\end_layout + +\begin_layout Enumerate +...any further hosts reported anywhere (at the already known hosts) by +\family typewriter +marsadm view-cluster-members +\family default +, +\series bold +transitively +\series default +. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Consequence: if you are running the new +\family typewriter +mars0.1b.y +\family default + (or newer) branch of MARS, you don't need +\family typewriter +marsadm split-cluster +\family default + anymore. + Then you can operate several thousands of machines as a big +\series bold +virtual +\series default + cluster, even if their storage is local (see +\family typewriter +LocalSharding +\family default + model described in section +\begin_inset CommandInset ref +LatexCommand vref +reference "subsec:Variants-of-Sharding" + +\end_inset + +). +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Previous versions of MARS, like +\family typewriter +mars0.1.y +\family default + and +\family typewriter +mars0.1a.y +\family default +, are not yet scalable at their +\series bold +metadata +\series default + exchange level. + Trying to +\family typewriter +join-cluster +\family default + or +\family typewriter +merge-cluster +\family default + several tens or even hundreds of machines with those versions will surely + lead to a disaster. + Always use +\family typewriter +marsadm split-cluster +\family default + at those versions, regularly. + First upgrade to the future +\family typewriter +mars0.1b.y +\family default + (or later versions) before creating big clusters at +\emph on +metadata +\emph default + level! +\end_layout + +\begin_layout Enumerate +Use +\family typewriter +./football.sh basic_add_host $hostname +\family default + for adding a single new host manually. + Afterwards, the transitive closure of all reachable hosts is computed as + usual. + This may also be used for the very first initialization of a fresh Football + installation, provided you already have a big cluster at metadata level. +\end_layout + +\end_deeper +\begin_layout Standard +Test the Football mechanics like one of the following example command sequences, + where it is assumed that +\family typewriter +hyper4321a +\family default + and +\family typewriter +hyper4321b +\family default + are already +\emph on +newly +\emph default + deployed hypervisors having enough local LVM storage, and have been already + added to the MARS cluster via +\family typewriter +marsadm join-cluster +\family default +, or have been at least added to +\family typewriter +hostname_cache +\family default + as explained above: +\end_layout + +\begin_layout Itemize + +\family typewriter +ssh-add; ./football.sh migrate vm4711 hyper4321a hyper4321b +\end_layout + +\begin_layout Itemize + +\family typewriter +ssh-add; ./football.sh migrate vm4711 hyper4321a hyper4321b --screener; ./screener.s +h attach vm4711 +\end_layout + +\begin_layout Standard +Check the automatically produced logfile (via +\family typewriter +./screener.sh show vm4711 +\family default +) that Football has automatically determined the old hypervisor where +\family typewriter +vm4711 +\family default + was running before, that it has automatically executed +\family typewriter +marsadm merge-cluster +\family default + when necessary, and has created the LV replicas at the new hypervisors, + and has executed some +\family typewriter +marsadm join-resource +\family default + commands, has automatically waited for MARS fast fullsync to finish, then + successfully executed an automatic handover to the new primary hypervisor, + and finally has destructed the old MARS replicas including their old LVs. + Check that +\family typewriter +vm4711 +\family default + is running correctly at the new hypervisor pair, and that handover between + the new hypervisor sites +\family typewriter +*a +\family default + and +\family typewriter +*b +\family default + is working correctly. + +\end_layout + +\begin_layout Standard +A larger group of sysadmins can co-work over a central common control machine + via ssh agent forwarding (which must be enabled in +\family typewriter +/etc/ssh/sshd_config +\family default +) in the following way: +\end_layout + +\begin_layout Itemize +At the workstation: +\family typewriter +ssh-add; ssh -A football@common-control.mycompany.org +\family default + +\begin_inset Newline newline +\end_inset + +Then +\family typewriter +cd $script_dir +\family default + and run your +\family typewriter +./football.sh +\family default + or +\family typewriter +./screener.sh +\family default + commands as usual. + The automatically generated logfiles will be tagged with the +\emph on +real +\emph default + usernames from your original workstation login, as reported by +\family typewriter +ssh-add -l +\family default +, even transitively when using ssh agent forwarding. + Thus you may use a common username like +\family typewriter +football +\family default + on the common +\begin_inset Foot +status open + +\begin_layout Plain Layout +Of course, it is also possible to maintain individual accounts for the same + Unix group, and set +\family typewriter +umask +\family default + and common directory permissions accordingly, such that the classical group-wis +e working concept from the 1970s will do the rest. + This is much more work, but can establish more fine-grained access control. + Even more sophisticated methods could involve ACLs, but suchalike is probably + only necessary at extremely high-sensitive installations. +\end_layout + +\end_inset + + control machine. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Hint: use +\family typewriter +./screener.sh list +\family default + (or one of its more specific variants like +\family typewriter +./screener.sh list-running +\family default +) for determining what's currently going on in a larger group of sysadmins. +\end_layout + +\begin_layout Chapter +MARS for Developers +\end_layout + +\begin_layout Standard +This chapter is organized strictly top-down. +\end_layout + +\begin_layout Standard +If you are a sysadmin and want to inform yourself about internals (useful + for debugging), the relevant information is at the beginning, and you don't + need to dive into all technical details at the end. +\end_layout + +\begin_layout Standard +If you are a kernel developer and want to contribute code to the emerging + MARS community, please read it (almost) all. + Due to the top-down organization, sometimes you will need to follow some + forward references in order to understand details. + Therefore I recommend reading this chapter twice in two different reading + modes: in the first reading pass, you just get a raw network of principles + and structures in your brain (you don't want to grasp details, therefore + don't strive for a full understanding). + In the second pass, you will exploit your knowlegde from the first pass + for a deeper understanding of the details. +\end_layout + +\begin_layout Standard +Alternatively, you may first read the sections about general architecture, + and then start a bottom-up scan by first reading the last section about + generic objects and aspects, and working in reverse +\emph on +section +\emph default + order (but read +\emph on +sub +\emph default +sections in-order) until you finally reach the kernel interfaces / symlink + trees. +\end_layout + +\begin_layout Section +Motivation / Politics +\end_layout + +\begin_layout Standard +MARS is not yet upstream in the Linux kernel. + This section tries to clear up some potential doubts. + Some people have asked why MARS uses its own internal framework instead + of +\emph on +directly +\emph default + +\begin_inset Foot +status open + +\begin_layout Plain Layout +Notice that +\emph on +indirect +\emph default + use of pre-existing Linux infrastructure is not only possible, but actually + implemented, by usinig it +\emph on +internally +\emph default + in brick +\emph on +implementations +\emph default + (black-box principle). + However, such bricks are not portable to other environments like userspace. +\end_layout + +\end_inset + + being based on some already existing Linux kernel infrastructures like + the device mapper. + Here is a list of technical reasons: +\end_layout + +\begin_layout Enumerate +The existing device mapper infrastructure is based on +\family typewriter +struct bio +\family default +. + In contrast, the new XIO personality of the generic brick infrastructure + is based on the concept of AIO (Asynchronous IO), which is a +\series bold +true superset +\series default + of block IO. +\end_layout + +\begin_layout Enumerate +In particular, +\family typewriter +struct bio +\family default + is firmly referencing to +\family typewriter +struct page +\family default + (via intermediate +\family typewriter +struct bio_vec +\family default +), using types like +\family typewriter +sector_t +\family default + in the field +\family typewriter +bi_sector +\family default +. + Basic transfer units are blocks, or sectors, or pages, or the like. + In contrast, +\family typewriter +struct aio_object +\family default + used by the XIO personality can address +\series bold +arbitrary granularity +\series default + memory with byte resolution even at odd +\begin_inset Foot +status open + +\begin_layout Plain Layout +Some brick +\emph on +implementations +\emph default + (as opposed to the capabilities of the +\emph on +interface +\emph default +) may be (and, in fact, +\emph on +are +\emph default +) restricted to +\family typewriter +PAGE_SIZE +\family default + operations or the like. + This is no general problem, because IOP can automatically insert some translato +r bricks extending the capabilities to universal granularity (of course + at some performance costs). +\end_layout + +\end_inset + + positions in (virtual) files / devices, similar to classical Unix file + IO, but +\emph on +asynchronously +\emph default +. + Practical experience shows that even non-functional properties like performance + of many datacenter workloads are profiting from that +\begin_inset Foot +status open + +\begin_layout Plain Layout +The current transaction logger uses variable-sized headers at +\begin_inset Quotes eld +\end_inset + +odd +\begin_inset Quotes erd +\end_inset + + addresses. + Although this increases +\family typewriter +memcpy() +\family default + load due to +\begin_inset Quotes eld +\end_inset + +misalignment +\begin_inset Quotes erd +\end_inset + +, the +\emph on +overall performance +\emph default + was provably better than in variants where sector / page alignment was + strictly obeyed, but space was wasted for alignments. + Such functionality is only possible if the XIO infrastructure +\emph on +allows +\emph default + +\emph on +for +\emph default + (but doesn't force) +\begin_inset Quotes eld +\end_inset + +mis-aligned +\begin_inset Quotes erd +\end_inset + + IO operations. + In future, many different transaction logfile formats showing different + runtime behaviour (e.g. + optimized for high-throughput SSD loads) may co-exist in parallel. + Note that properly aligned XIO operations bear no noticeable overhead compared + to classical block IO, at least in typical datacenter RAID scenarios. +\end_layout + +\end_inset + +. + The AIO/XIO abstraction contains no fixed link to kernel abstractions and + should be +\series bold +easily portable +\series default + to other environments. + In summary, the new personality provides a uniform abstraction which abstracts + away from multiple different kernel interfaces; it is designed to be useful + even in userspace. +\end_layout + +\begin_layout Enumerate +Kernel infrastructures for the concept of +\emph on +direct IO +\emph default + are different from those for +\emph on +buffered IO +\emph default +. + The XIO personality used by MARS subsumes both concepts as use case +\emph on +variants +\emph default +. + +\series bold +Buffering +\series default + is an optional internal property of XIO bricks (almost non-functional property + with support for consistency guarantees). +\end_layout + +\begin_layout Enumerate +The AIO/XIO personality is generically designed for remote operations over + networks, at arbitrary places in the IO stack, with (almost +\begin_inset Foot +status open + +\begin_layout Plain Layout +By default, automatic network connection re-establishment and infinite network + retries are already implemented in the +\family typewriter +xio_client +\family default + and +\family typewriter +xio_server +\family default + bricks to provide fully transparent semantics. + However, this may be undesirable in case of fatal crashes. + Therefore, abort operations are also configurable, as well as network timeouts + which are then mapped to classical IO errors. +\end_layout + +\end_inset + +) no semantic differences to local operations (built-in +\series bold + network transparency +\series default +). + There are universal provisions for mixed operation of different versions + ( +\series bold +rolling software updates +\series default + in clusters / grids). +\end_layout + +\begin_layout Enumerate +The generic brick infrastructure (as well as its personalities like XIO + or any other future personality) supports +\series bold +dynamic re-wiring / re-configuration +\series default + +\emph on +during +\emph default + operation (even while parallel IO requests are flying, some of them taking + different paths in the IO stack in parallel). + This is absolutely needed for MARS logfile rotation. + In the long term, this would be useful for many advanced new features and + products, not limited to multipathing. +\end_layout + +\begin_layout Enumerate +The generic brick infrastructure (and in turn all personalities) provide + +\series bold +additional comfort +\series default + to the programmer while enabling +\series bold +increased functionality +\series default +: by use of a generalization of +\series bold +aspect orientation +\series default + +\begin_inset Foot +status open + +\begin_layout Plain Layout +Similar to AOP, insertion of IOP bricks for checking / debugging etc is + one of the key advantages of the generic brick infrastructure. + In contrast to AOP where debugging is usually {en,dis}abled statically + at compile time, IOP allows for +\emph on +dynamic +\emph default + (re-)configuration of debugging bricks, automatic repair, and many more + features promoted by +\emph on +organic computing +\emph default +. +\end_layout + +\end_inset + +, the programmer need no longer worry about dynamic memory allocations for + +\emph on +local state +\emph default + in a brick instance. + MARS is +\series bold +automating local state +\series default + even when dynamically instantiating new bricks (possibly having the same + brick type) at runtime. + Specifially, XIO is automating +\series bold +request stacking +\series default + at the completion path this way, even while dynamically reconfiguring the + IO stack +\begin_inset Foot +status open + +\begin_layout Plain Layout +The generic aspect orientation approach leads to better +\series bold +separation of concerns +\series default +: local state needed by brick implementations is not visible from outside + by default. + In other words, local state is also +\series bold +private state +\series default +. + Accidental hampering of internal operations is impeded. +\end_layout + +\begin_layout Plain Layout +Example from the kernel: in +\family typewriter +include/linux/blkdev.h +\family default + the definition of +\family typewriter +struct request +\family default + contains the following comment: +\family typewriter +/* the following two fields are internal, NEVER access directly */ +\family default +. + It appears that +\family typewriter +struct request +\family default + contains not only fields relevant for the caller, but also +\series bold +internal fields +\series default + needed only in +\emph on +some +\emph default + +\emph on +specific +\emph default + callees. + For example, +\family typewriter +rb_node +\family default + is documented to be used only in IO schedulers. +\end_layout + +\begin_layout Plain Layout +XIO goes one step further: there need not exist exactly one IO scheduler + instance in the IO stack for a single device. + Future +\family typewriter +xio_scheduler_{deadline,cfq,...} +\family default + brick types could be each instantiated many times, and in arbitrary places, + even for the same (logical) device. + The equivalent of +\family typewriter +rb_node +\family default + would then be automatically instantiated multiple times for the same IO + request, by automatically instantiating the right local aspect instances. +\end_layout + +\end_inset + +. + A similar automation +\begin_inset Foot +status open + +\begin_layout Plain Layout +DM can achieve stacking and dynamic routing by a workaround called +\emph on +request cloning +\emph default +, potentially leading to mass creation of temporary / intermediate object + instances. +\end_layout + +\end_inset + + does not exist in the rest of the Linux kernel. +\end_layout + +\begin_layout Enumerate +The generic brick infrastructure, together with personalities like XIO, + enables +\series bold +new long-term functional and non-functional opportunities +\series default + by use of concepts from instance-oriented programming (IOP +\begin_inset Foot +status open + +\begin_layout Plain Layout +See +\begin_inset Flex URL +status collapsed + +\begin_layout Plain Layout + +http://athomux.net/papers/paper_inst2.pdf +\end_layout + +\end_inset + + +\end_layout + +\end_inset + +). + The application area is +\series bold +not limited to device drivers +\series default +. + For example, a new personality for +\emph on +stackable filesystems +\emph default + could be developed in future. +\end_layout + +\begin_layout Standard +In summary, anyone who would insist that MARS should be +\emph on +directly +\begin_inset Foot +status open + +\begin_layout Plain Layout +Notice that kernel-specific structures like +\family typewriter +struct bio +\family default + are of course used by MARS, but only +\emph on +inside +\emph default + the blackbox implementation of bricks like +\family typewriter +mars_bio +\family default + or +\family typewriter +mars_if +\family default + which act as +\series bold +adaptors +\series default + to/from that structure. + It is possible to write further adaptors, e.g. + for direct interfacing to the device mapper infrastructure. +\end_layout + +\end_inset + + +\emph default + based on pre-existing kernel structures / frameworks instead of contributing + a new framework would cause a +\emph on +massive regression of functionality +\emph default +. +\end_layout + +\begin_layout Itemize +On one hand, all code contributed by the MARS project is +\series bold +non-intrusive +\series default + into the rest of the Linux kernel. + From the viewpoint of other parts of the kernel, the whole addition +\emph on +behaves +\emph default + +\emph on +like +\emph default + a driver (although its infrastructure is much more than a driver). +\end_layout + +\begin_layout Itemize +On the other hand, if people are interested, the contributed infrastructure + +\emph on +may +\emph default + be used to +\emph on +add +\emph default + to the power of the Linux kernel. + It is designed to be +\series bold +open for contributions +\series default +. +\end_layout + +\begin_layout Itemize +A +\emph on +possible +\emph default + (but not the only possible) way to do this is giving the generic brick + framework / the XIO personality as well as future personalities / the MARS + application the status of a +\emph on +subsystem +\emph default + inside the kernel (in the long term), similar to the SCSI subsystem or + the network subsystem. + Noone is forced to use it, but anybody may use it if he/she likes. +\end_layout + +\begin_layout Itemize +Politically, the author is a FOSS advocate willing to collaborate and to + support anyone interested in contributions. + The author's personal interest is long-term and is open for both in-tree + and out-of-tree extensions of both the framework and MARS by any other + party obeying the GPL and not hazarding FOSS by patents (instead supporting + organizations like the Open Invention Network). + The author is open to closer relationships with the Linux Foundation and + other parts of the Linux ecosystem. +\end_layout + +\begin_layout Section +Architecture Overview +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/MARS_Framework_Architecture.pdf + width 100col% + +\end_inset + + +\end_layout + +\begin_layout Section +Some Architectural Details +\end_layout + +\begin_layout Standard +The following pictures show some +\begin_inset Quotes eld +\end_inset + +zones of responsibility +\begin_inset Quotes erd +\end_inset + +, not necessarily a strict hierarchy (although Dijkstra's famous layering + rules from THE are tried to be respected as much as possible). + The construction principle follows the concept of +\series bold +Instance Oriented Programming +\series default + (IOP) described in +\begin_inset Flex URL +status collapsed + +\begin_layout Plain Layout + +http://athomux.net/papers/paper_inst2.pdf +\end_layout + +\end_inset + +. + Please note that MARS is only instance- +\emph on +based +\emph default + +\begin_inset Foot +status open + +\begin_layout Plain Layout +Similar to OOP, where +\begin_inset Quotes eld +\end_inset + +object-based +\begin_inset Quotes erd +\end_inset + + means a weaker form of +\begin_inset Quotes eld +\end_inset + +object-oriented +\begin_inset Quotes erd +\end_inset + +, the term +\begin_inset Quotes eld +\end_inset + +instance-based +\begin_inset Quotes erd +\end_inset + + means that the +\emph on +strategy +\emph default + brick layer need not be fully modularized according to the IOP principles, + but the +\emph on +worker +\emph default + brick layer already is. +\end_layout + +\end_inset + +, while MARS Full is planned to be fully instance- +\emph on +oriented +\emph default +. +\end_layout + +\begin_layout Subsection +MARS Architecture +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Graphics + filename images/mars-light-architecture.fig + width 40col% + +\end_inset + + +\end_layout + +\begin_layout Subsection +MARS Full Architecture (planned) +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Graphics + filename images/mars-full-architecture.fig + width 80col% + +\end_inset + + +\end_layout + +\begin_layout Section +Documentation of the Symlink Trees +\begin_inset CommandInset label +LatexCommand label +name "sec:Documentation-of-the" + +\end_inset + + +\end_layout + +\begin_layout Standard +The +\family typewriter +/mars/ +\family default + symlink tree is serving the following purposes, all at the same time: +\end_layout + +\begin_layout Enumerate +For +\series bold +communication +\series default + between cluster nodes, see sections +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-Lamport-Clock" + +\end_inset + + and +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:The-Symlink-Tree" + +\end_inset + +. + This communication is even the +\emph on +only +\emph default + communication between cluster nodes (apart from the +\emph on +contents +\emph default + of transaction logfiles and sync data). +\end_layout + +\begin_layout Enumerate + +\series bold +\emph on +Internal +\emph default + interface +\series default + between the kernel module and the userspace tool +\family typewriter +marsadm +\family default +. +\end_layout + +\begin_layout Enumerate + +\series bold +\emph on +Internal +\emph default + persistent repository +\series default + which keeps state information between reboots (also in case of node crashes). + It is even the +\emph on +only +\emph default + place where state information is kept. + There is no other place like +\family typewriter +/etc/drbd.conf +\family default +. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + +Because of its internal character, its representation and semantics may + change at any time without notice (e.g. + via an +\emph on +internal +\emph default + upgrade procedure between major releases). + It is +\emph on +not +\emph default + an external interface to the outer world. + Don't build anything on it. +\end_layout + +\begin_layout Standard +However, knowledge of the symlink tree is useful for advanced sysadmins, + for +\series bold +human inspection +\series default + and for +\series bold +debugging +\series default +. + And, of course, for developers. +\end_layout + +\begin_layout Standard +As an +\begin_inset Quotes eld +\end_inset + +official +\begin_inset Quotes erd +\end_inset + + interface from outside, only the +\family typewriter +marsadm +\family default + command should be used. +\end_layout + +\begin_layout Subsection +Documentation of the MARS Symlink Tree +\end_layout + +\begin_layout Section +XIO Worker Bricks +\end_layout + +\begin_layout Section +StrategY Worker Bricks +\end_layout + +\begin_layout Standard +NYI +\end_layout + +\begin_layout Section +The XIO Brick Personality +\end_layout + +\begin_layout Section +The Generic Brick Infrastructure Layer +\end_layout + +\begin_layout Section +The Generic Object and Aspect Infrastructure +\end_layout + +\begin_layout Chapter +\start_of_appendix +Technical Data MARS +\begin_inset CommandInset label +LatexCommand label +name "chap:Technical-Data-MARS" + +\end_inset + + +\end_layout + +\begin_layout Standard +MARS has some built-in limitations which should be overcome +\begin_inset Foot +status open + +\begin_layout Plain Layout +Some internal algorithms are quadratic. + The reason is that MARS evolved from a lab prototype which wasn't originally + intended for enterprise grade usage, but should have been succeeded by + the fully instance-oriented MARS Full much earlier. +\end_layout + +\end_inset + + by the future MARS Full. + Please don't exceed the following limits: +\end_layout + +\begin_layout Itemize +maximum 10 nodes per cluster +\end_layout + +\begin_layout Itemize +maximum 10 resources per cluster +\end_layout + +\begin_layout Itemize +maximum 100 logfiles per resource +\end_layout + +\begin_layout Chapter +Handout for Midnight Problem Solving +\begin_inset CommandInset label +LatexCommand label +name "chap:Handout-for-Midnight" + +\end_inset + + +\end_layout + +\begin_layout Standard +Here are generic instructions for the generic +\family typewriter +marsadm +\family default + and commandline level. + Other levels (e.g. + different types of cluster managers, PaceMaker, control scripts / +\family typewriter +rc +\family default + scripts / +\family typewriter +upstart +\family default + scripts, etc should be described elsewhere. +\end_layout + +\begin_layout Section +Inspecting the State of MARS +\end_layout + +\begin_layout Standard +For manual inspection, please prefer the new +\family typewriter +marsadm view all +\family default + over the old +\family typewriter +marsadm view-1and1 all +\family default +. + It shows more appropriate / detailed information. +\end_layout + +\begin_layout Standard +Hint: this might change in future when somebody will program better marcros + for the +\family typewriter +view-1and1 +\family default + variant, or create even better other macros. +\end_layout + +\begin_layout Quotation + +\family typewriter +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +# watch marsadm view all +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +Checking the low-level network connections at runtime: +\end_layout + +\begin_layout Quotation + +\family typewriter +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +# watch "netstat --tcp | grep 777" +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +Meaning of the port numbers (as currently configured into the kernel module, + may change in future): +\end_layout + +\begin_layout Itemize +7777 = metadata / symlink propagation +\end_layout + +\begin_layout Itemize +7778 = transfer of transaction logfiles +\end_layout + +\begin_layout Itemize +7779 = transfer of sync traffic +\end_layout + +\begin_layout Standard +7777 must be always active on a healthy cluster. + 7778 and 7779 will appear only on demand, when some data is transferred. +\end_layout + +\begin_layout Standard +Hint: when one of the columns Send-Q or Recv-Q are constantly at high values, + you might have a network bottleneck. +\end_layout + +\begin_layout Section +Replication is Stuck +\end_layout + +\begin_layout Standard +Indications for a stuck: +\end_layout + +\begin_layout Itemize +One of the flags shown by +\family typewriter +marsadm view all +\family default + or +\family typewriter +marsadm view-flags all +\family default + contain a symbol +\family typewriter +"-" +\family default + (dash). + This means that some switch is currently switched off (deliberately). + Please check whether there is a valid reason why somebody else switched + it off. + If the switch-off is just by accident, use the following command to fix + the stuck: +\family typewriter + +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +# marsadm up all +\end_layout + +\end_inset + + +\family default +(or replace +\family typewriter +all +\family default + by a particular resource name if you want to start only a specific one). +\begin_inset Newline newline +\end_inset + +Note: +\family typewriter +up +\family default + is equivalent to the sequence +\family typewriter +attach; resume-fetch; resume-replay; resume-sync +\family default +. + Instead of switching each individual knob, use +\family typewriter +up +\family default + as a shortcut for switching on anything which is currently off. +\end_layout + +\begin_layout Itemize + +\family typewriter +netstat --tcp | grep 7777 +\family default + does not show anything. + Please check the following: +\begin_inset Separator latexpar +\end_inset + + +\end_layout + +\begin_deeper +\begin_layout Itemize +Is the kernel module loaded? Check +\family typewriter +lsmod | grep mars +\family default +. + When necessary, run +\family typewriter +modprobe mars +\family default +. +\end_layout + +\begin_layout Itemize +Is the network interface down? Check +\family typewriter +ifconfig +\family default +, and/or +\family typewriter +ethtool +\family default + and friends, and fix it when necessary. +\end_layout + +\begin_layout Itemize +Is a +\family typewriter +ping +\family default + possible? If not, fix the network / routing / firewall / etc. + When fixed, the MARS connections should automatically appear after about + 1 minute. +\end_layout + +\begin_layout Itemize +When +\family typewriter +ping +\family default + is possible, but a MARS connection to port 7777 does not appear after a + few minutes, try to connect to remote port 7777 by hand via +\family typewriter +telnet +\family default +. + But don't type anything, just abort the connection immediately when it + works! Typing anything will almost certainly throw a harsh error message + at the other server, which could unnecessarily alarm other people. +\end_layout + +\end_deeper +\begin_layout Itemize +Check whether +\family typewriter +marsadm view all +\family default + shows some progress bars somewhere. + Example: +\family typewriter +\size scriptsize + +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +istore-test-bap1:~# marsadm view all +\end_layout + +\begin_layout Plain Layout + +--------- resource lv-0 +\end_layout + +\begin_layout Plain Layout + + lv-0 OutDated[F] PausedReplay dCAS-R Secondary istore-test-bs1 +\end_layout + +\begin_layout Plain Layout + + replaying: [>...................] 1.21% (12/1020)MiB logs: [2..3] +\end_layout + +\begin_layout Plain Layout + + > fetch: 1008.198 MiB rate: 0 B/sec remaining: --:--:-- hrs +\end_layout + +\begin_layout Plain Layout + + > replay: 0 B rate: 0 B/sec remaining: 00:00:00 hrs +\end_layout + +\end_inset + + +\family default +\size default +At least one of the +\family typewriter +rate: +\family default + values should be greater than 0. + When none of the +\family typewriter +rate: +\family default + values indicate any progress for a longer time, try +\family typewriter +marsadm up all +\family default + again. + If it doesn't help, check and repair the network. + If even this does not help, check the hardware for any IO hangups, or kernel + hangups. + First, check the RAID controllers. + Often (but not certainly), a stuck kernel can be recognized when many processes + are +\emph on +permanently +\emph default + in state "D", for a long time: +\family typewriter +ps ax | grep " D" | grep -v grep +\family default + or similar. + Please check whether there is just an overload, or +\emph on +really +\emph default + a true kernel problem. + Discrimination is not easy, and requires experience (as with any other + system; not limited to MARS). + A truly stuck kernel can only be resurrected by rebooting. + The same holds for any hardware problems. +\end_layout + +\begin_layout Itemize +Check whether +\family typewriter +marsadm view all +\family default + reports any lines like +\family typewriter +WARNING: SPLIT BRAIN at '' detected +\family default +. + In such a case, check that there is +\emph on +really +\emph default + a split brain, before obeying the instructions in section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Resolution-of-Split" + +\end_inset + +. + Notice that network outages or missing +\family typewriter +marsadm log-delete-all all +\family default + or +\family typewriter +cron +\family default + may continue to report an old split brain which has gone in the meantime. +\end_layout + +\begin_layout Itemize +Check whether +\family typewriter +/mars/ +\family default + is too full. + For a rough impression, +\family typewriter +df /mars/ +\family default + may be used. + For getting authoritative values as internally used by the MARS emergency-mode + computations, use +\family typewriter +marsadm view-rest-space +\family default + (the unit is GiB). + In practice, the differences are only marginal, at least on bigger +\family typewriter +/mars/ +\family default + partitions. + When there is only few rest space (or none at all), please obey the instruction +s in section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Resolution-of-Emergency" + +\end_inset + +. +\end_layout + +\begin_layout Section +Resolution of Emergency Mode +\begin_inset CommandInset label +LatexCommand label +name "sec:Resolution-of-Emergency" + +\end_inset + + +\end_layout + +\begin_layout Standard +Emergency mode occurs when +\family typewriter +/mars/ +\family default + runs out of space, such that no new logfile data can be written anymore. +\end_layout + +\begin_layout Standard +In emergency mode, the primary will write any write requests +\emph on +directly +\emph default + to the underlying disk, as if MARS were not present at all. + Thus, your application will continue to run. + Only the +\emph on +replication +\emph default + as such is stopped. +\end_layout + +\begin_layout Standard +\begin_inset Note Greyedout +status open + +\begin_layout Plain Layout +Notice: emergency mode means that your secondary nodes are usually in a + +\emph on +consistent +\emph default +, but +\emph on +outdated +\emph default + state (exception: when a sync was running in parallel to the emergency + mode, then the sync will be automatically started over again). + You can check consistency via +\family typewriter +marsadm view-flags all +\family default +. + Only when a local disk shows a lower-case letter +\family typewriter +"d" +\family default + instead of an uppercase +\family typewriter +"D" +\family default +, it is known to be inconsistent (e.g. + during a sync). + When there is a dash instead, it usually means that the disk is detatched + or misconfigured or the kernel module is not started. + Please fix these problems first before believing that your local disk is + unusable. + Even if it is really inconsistent (which is very unlikely, typically occurring + only as a consequence of hardware failures, or of the above-mentioned exception +), you have a big chance to recover most of the data via +\family typewriter +fsck +\family default + and friends. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +A currently existing Emergency mode can be detected by +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +primary:~# marsadm view-is-emergency all +\end_layout + +\begin_layout Plain Layout + +secondary:~# marsadm view-is-emergency all +\end_layout + +\end_inset + + Notice: this delivers the current state, telling nothing about the past. +\end_layout + +\begin_layout Standard +Currently, emergency mode will also show something like +\family typewriter +WARNING: SPLIT BRAIN at '' detected +\family default +. + This ambiguity will be resolved in a future MARS release. + It is however not crucial: the resolution methods for both cases are very + similar. + If in doubt, start emergency resolution first, and only proceed to split + brain resoultion if it did not help. +\end_layout + +\begin_layout Standard +Preconditions: +\end_layout + +\begin_layout Itemize +Only current version of MARS: the space at the primary side should have + been already released, and the emergency mode should have been already + left. + Otherwise, you might need the split-brain resolution method from section + +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Resolution-of-Split" + +\end_inset + +. +\end_layout + +\begin_layout Itemize +The network +\series bold +must +\series default + be working. + Check that the following gives an entry for each secondary: +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +primary:~# netstat --tcp | grep 7777 +\end_layout + +\end_inset + +When necessary, fix the network first (see instructions above). +\end_layout + +\begin_layout Standard +Emergency mode should now be resolved via the following instructions: +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +primary:~# marsadm view-is-emergency all +\end_layout + +\begin_layout Plain Layout + +primary:~# du -s /mars/resource-* | sort -n +\end_layout + +\end_inset + +Remember the affected resources. + Best practice is to do the following, starting with the +\emph on +biggest +\emph default + resource as shown by the +\family typewriter +du | sort +\family default + output in reverse order, but +\emph on +starting +\emph default + the following only with the +\emph on +affected +\emph default + resources in the first place: +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +secondary1:~# marsadm invalidate +\end_layout + +\begin_layout Plain Layout + +secondary1:~# marsadm log-delete-all all +\end_layout + +\begin_layout Plain Layout + +... + dito with all resources showing emergency mode +\end_layout + +\begin_layout Plain Layout + +... + dito on all other secondaries +\end_layout + +\begin_layout Plain Layout + +primary:~# marsadm log-delete-all all +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +Hint: during the resolution process, some other resources might have gone + into emergency mode concurrently. + In addition, it is possible that some secondaries are stuck at particular + resources while the corresponding primary has +\emph on +not yet +\emph default + entered emergency mode. + Please repeat the steps in such a case, and look for emergency modes at + secondaries additionally. + When necessary, extend your list of +\emph on +affected +\emph default + resources. +\end_layout + +\begin_layout Standard +Hint: be patient. + Deleting large bulks of logfile data may take a long time, at least on + highly loaded systems. + You should give the cleanup processes at least 5 minutes before concluding + that an +\family typewriter +invalidate +\family default + followed by +\family typewriter +log-delete-all +\family default + had no effect! Don't forget to give the +\family typewriter +log-delete-all +\family default + at all cluster nodes, even when seemingly unaffected. +\end_layout + +\begin_layout Standard +In very complex scenarios, when the primary roles of different resources + are spread over diffent hosts (aka mixed operation), you may need to repeat + the whole cycle iteratively for a few cycles until the jam is resolved. +\end_layout + +\begin_layout Standard +If it does not go away, you have another chance by the following split-brain + resolution process, which will also cleanup emergency mode as a side effect. +\end_layout + +\begin_layout Section +Resolution of Split Brain and of Emergency Mode +\begin_inset CommandInset label +LatexCommand label +name "sec:Resolution-of-Split" + +\end_inset + + +\end_layout + +\begin_layout Standard +Hint: in many cases (but not guaranteed), the previous receipe for resolution + of emergency mode will also cleanup split brain. + Good chances are in case of +\begin_inset Formula $k=2$ +\end_inset + + total replicas. + Please collect your own experiences which method works better for you! +\end_layout + +\begin_layout Standard +Precondition: the network must be working. + Check that the following gives an entry for each secondary: +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +primary:~# netstat --tcp | grep 7777 +\end_layout + +\end_inset + + When necessary, fix the network first (see instructions above). +\end_layout + +\begin_layout Standard +Inspect the split brain situation: +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +primary:~# marsadm view all +\end_layout + +\begin_layout Plain Layout + +primary:~# du -s /mars/resource-* | sort -n +\end_layout + +\end_inset + +Remember those resources where a message like +\family typewriter +WARNING: SPLIT BRAIN at '' detected +\family default + appears. + Do the following only for +\emph on +affected +\emph default + resources, starting with the biggest one (before proceeding to the next + one). +\end_layout + +\begin_layout Standard +Do the following with only +\emph on +one +\emph default + resource at a time (before proceeding to the next one), and repeat the + actions on that resource at every secondary (if there are multiple secondaries) +: +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +secondary1:~# marsadm leave-resource $res1 +\end_layout + +\begin_layout Plain Layout + +secondary1:~# marsadm log-delete-all all +\end_layout + +\end_inset + +Check whether the split brain has vanished everywhere. + Startover with other resources at their secondaries when necessary. +\end_layout + +\begin_layout Standard +Finally, when no split brain is reported at any (former) secondary, do the + following on the primary: +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +primary:~# marsadm log-delete-all all +\end_layout + +\begin_layout Plain Layout + +primary:~# sleep 30 +\end_layout + +\begin_layout Plain Layout + +primary:~# marsadm view all +\end_layout + +\end_inset + + Now, the split brain should be gone even at the primary. + If not, repeat this step. +\end_layout + +\begin_layout Standard +In case even this should fail on some +\family typewriter +$res +\family default + (which is very unlikely), read the PDF manual before using +\family typewriter +marsadm log-purge-all $res +\family default +. + +\end_layout + +\begin_layout Standard +Finally, when the split brain is gone everywhere, rebuild the redundancy + at every secondary via +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +secondary1:~# marsadm join-resource $res1 /dev//$res1 +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +If even this method does not help, setup the whole cluster afresh by +\family typewriter +rmmod mars +\family default + everywhere, and creating a fresh +\family typewriter +/mars/ +\family default + filesystem everywhere, followed by the same procedure as installing MARS + for the first time (which is outside the scope of this handout). +\end_layout + +\begin_layout Section +Handover of Primary Role +\end_layout + +\begin_layout Standard +When there exists a method for primary handover in higher layers such as + cluster managers, please prefer that method (e.g. + +\family typewriter +cm3 +\family default + or other tools). +\end_layout + +\begin_layout Standard +If suchalike doesn't work, or if you need to handover some resource +\family typewriter +$res1 +\family default + by hand, do the following: +\end_layout + +\begin_layout Itemize +Stop the load / application corresponding to +\family typewriter +$res1 +\family default + on the old primary side. +\end_layout + +\begin_layout Itemize + +\family typewriter +umount /dev/mars/$res1 +\family default +, or otherwise close any openers such as iSCSI. +\end_layout + +\begin_layout Itemize +At the new primary: +\family typewriter +marsadm primary $res1 +\end_layout + +\begin_layout Itemize +Restart the application at the new site (in reverse order to above). + In case you want to switch +\emph on +all +\emph default + resources which are not yet at the new side, you may use +\family typewriter +marsadm primary all +\family default +. +\end_layout + +\begin_layout Section +Emergency Switching of Primary Role +\end_layout + +\begin_layout Standard +Emergency switching is necessary when your primary is no longer reachable + over the network for a +\emph on +longer +\emph default + time, or when the hardware is defective. +\end_layout + +\begin_layout Standard +Emergency switching will very often lead to a split brain, which requires + lots of manual actions to resolve (see above). + Therefore, try to avoid emergency switching when possible! +\end_layout + +\begin_layout Standard +Hint: MARS can automatically recover after a primary crash / reboot, as + well as after secondary crashes, just by executing +\family typewriter +modprobe mars +\family default + after +\family typewriter +/mars/ +\family default + had been mounted. + Please consider to wait until your system comes up again, instead of risking + a split brain. +\end_layout + +\begin_layout Standard +The decision between emergency switching and continuing operation at the + same primary side is an operational one. + MARS can support your decision by the following information at the potentially + new primary side (which was in secondary mode before): +\family typewriter +\size scriptsize + +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +istore-test-bap1:~# marsadm view all +\end_layout + +\begin_layout Plain Layout + +--------- resource lv-0 +\end_layout + +\begin_layout Plain Layout + +lv-0 InConsistent Syncing dcAsFr Secondary istore-test-bs1 +\end_layout + +\begin_layout Plain Layout + +syncing: [====>..............] 27.84% (567/2048)MiB rate: 72583.00 KiB/sec remaining: 00:00:20 + hrs +\end_layout + +\begin_layout Plain Layout + +> sync: 567.293/2048 MiB rate: 72583 KiB/sec remaining: 00:00:20 hrs +\end_layout + +\begin_layout Plain Layout + +replaying: [>:::::::::::::::::::] 0.00% (0/12902)KiB logs: [1..1] +\end_layout + +\begin_layout Plain Layout + +> fetch: 0 B rate: 38 KiB/s remaining: 00:00:00 +\end_layout + +\begin_layout Plain Layout + +> replay: 12902.047 KiB rate: 0 B/s remaining: --:--:-- +\end_layout + +\end_inset + + +\family default +\size default +When your target is syncing (like in this example), you cannot switch to + it (same as with DRBD). + When you had an emergency mode before, you should first resolve that (whenever + possible). + When a split brain is reported, try to resolve it first (same as with DRBD). + Only in case you +\emph on +know +\emph default + that the primary is really damaged, or it is really impossible to the run + the application there for some reason, emergency switching is desirable. +\end_layout + +\begin_layout Standard +Hint: in case the secondary is inconsistent for some reason, e.g. + because of an incremental fast full-sync, you have a last chance to recover + most data after forceful switching by using a filesystem check or suchalike. + This might be even faster than restoring data from the backup. + But use it only if you are +\emph on +really +\emph default + desperate! +\end_layout + +\begin_layout Standard +The amount of data which is +\emph on +known +\emph default + to be missing at your secondary is shown after the +\family typewriter +> fetch: +\family default + in human-readable form. + However, in cases of networking problems this information may be outdated. + You +\emph on +always +\emph default + need to consider further facts which cannot be known by MARS. +\end_layout + +\begin_layout Standard +When there exists a method for emergency switching of the primary in higher + layers such as cluster managers, please prefer that method in front of + the following one. +\end_layout + +\begin_layout Standard +If suchalike doesn't work, or when a handover attempt has failed several + times, or if you +\emph on +really need +\emph default + forceful switching of some resource +\family typewriter +$res1 +\family default + by hand, you can do the following: +\end_layout + +\begin_layout Itemize +When possible, stop the load / application corresponding to +\family typewriter +$res1 +\family default + on the old primary side. +\end_layout + +\begin_layout Itemize +When possible, +\family typewriter +umount /dev/mars/$res1 +\family default +, or otherwise close any openers such as iSCSI. +\end_layout + +\begin_layout Itemize +When possible (if you have some time), wait until as much data has been + propagated to the new primary as possible (watch the +\family typewriter +fetch: +\family default + indicator). +\end_layout + +\begin_layout Itemize +At the new primary: +\family typewriter +marsadm disconnect $res1; marsadm primary --force $res1 +\end_layout + +\begin_layout Itemize +Restart the application at the new site (in reverse order to above). +\end_layout + +\begin_layout Itemize +After the application is known to run reliably, check for split brains and + cleanup them when necessary. +\end_layout + +\begin_layout Chapter +Alternative Methods for Split Brain Resolution +\begin_inset CommandInset label +LatexCommand label +name "chap:Alternative-Methods-for" + +\end_inset + + +\end_layout + +\begin_layout Standard +Instead of +\family typewriter +marsadm invalidate +\family default +, the following steps may be used. + In preference, start with the old +\begin_inset Quotes eld +\end_inset + +wrong +\begin_inset Quotes erd +\end_inset + + primaries first: +\end_layout + +\begin_layout Enumerate + +\family typewriter +marsadm leave-resource mydata +\end_layout + +\begin_layout Enumerate +After having done this on one cluster node, check whether the split brain + is already gone (e.g. + by saying +\family typewriter +marsadm view mydata +\family default +). + There are chances that you don't need this on all of your nodes. + Only in very rare +\begin_inset Foot +status open + +\begin_layout Plain Layout +When your network had partitioned in a very awkward way for a long time, + and when your partitioned primaries did several +\family typewriter +log-rotate +\family default + operations indendently from each other, there is a small chance that +\family typewriter +leave-resource +\family default + does not clean up +\emph on +all +\emph default + remains of such an awkward situation. + Only in such a case, try +\family typewriter +log-purge-all +\family default +. +\end_layout + +\end_inset + + cases, it might happen that the preceding l +\family typewriter +eave-resource +\family default + operations were not able to clean up all logfiles produced in parallel + by the split brain situation. + +\end_layout + +\begin_layout Enumerate +Read the documentation about +\family typewriter +log-purge-all +\family default + (see page +\begin_inset CommandInset ref +LatexCommand pageref +reference "log-purge-all$res" + +\end_inset + +) and use it. +\end_layout + +\begin_layout Enumerate +If you want to restore redundancy, you can follow-up a +\family typewriter +join-resource +\family default + phase to the old resource name (using the correct device name, double-check + it!) This will restore your redundancy by overwriting your bad split brain + version with the correct one. +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +It is important to resolve the split brain +\emph on +before +\emph default + you can start the +\family typewriter +join-resource +\family default + reconstruction phase! In order to keep as many +\begin_inset Quotes eld +\end_inset + +good +\begin_inset Quotes erd +\end_inset + + versions as possible (e.g. + for emergency cases), don't re-join them all in parallel, but rather start + with the oldest / most outdated / worst / inconsistent version first. + It is recommended to start the next one only when the previous one has + sucessfully finished. +\end_layout + +\begin_layout Chapter +Alternative De- and Reconstruction of a Damaged Resource +\begin_inset CommandInset label +LatexCommand label +name "chap:Alternative-De--and" + +\end_inset + + +\end_layout + +\begin_layout Standard +In case +\family typewriter +leave-resource --host= +\family default + does not work, you may use the following fallback. + On the surviving new designated primary, give the following commands: +\end_layout + +\begin_layout Enumerate + +\family typewriter +marsadm disconnect-all mydata +\end_layout + +\begin_layout Enumerate + +\family typewriter +marsadm down mydata +\end_layout + +\begin_layout Enumerate +Check by hand whether your local disk is consistent, e.g. + by test-mounting it readonly, +\family typewriter +fsck +\family default +, etc. +\end_layout + +\begin_layout Enumerate + +\family typewriter +marsadm delete-resource mydata +\end_layout + +\begin_layout Enumerate +Check whether the other vital cluster nodes don't report the dead resource + any more, e.g. + +\family typewriter +marsadm view all +\family default + at +\emph on +each +\emph default + of them. + In case the resource has not disappeared anywhere (which may happen during + network problems), do the +\family typewriter +down ; delete-resource +\family default + steps also there (optionally again with +\family typewriter +--force +\family default +). +\end_layout + +\begin_layout Enumerate +Be sure that the resource has disappeared +\emph on +everywhere +\emph default +. + When necessary, repeat the +\family typewriter +delete-resource +\family default + with +\family typewriter +--force +\family default +. +\end_layout + +\begin_layout Enumerate + +\family typewriter +marsadm create-resource newmydata ... + +\family default + at the +\emph on +correct +\emph default + node using the +\emph on +correct +\emph default + disk device containing the +\emph on +correct +\emph default + version, and further steps to setup your resource from scratch, preferably + under a different name to minimize any risk. +\end_layout + +\begin_layout Standard +\noindent +In any case, +\series bold +manually check +\series default + whether a split brain is reported for any resource on any of your +\emph on +surviving +\emph default + cluster nodes. + If you find one there (and only then), please (re-)execute the split brain + resolution steps on the affected node(s). +\end_layout + +\begin_layout Chapter +Cleanup in case of Complicated Cascading Failures +\begin_inset CommandInset label +LatexCommand label +name "subsec:Cleanup-in-case" + +\end_inset + + +\end_layout + +\begin_layout Standard +MARS does its best to recover even from multiple failures (e.g. + +\series bold +rolling disasters +\series default +). + Chances are high that the instructions from sections +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Split-Brain-Resolution" + +\end_inset + + +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Final-Destroy-of" + +\end_inset + + or appendix +\begin_inset CommandInset ref +LatexCommand ref +reference "chap:Alternative-Methods-for" + +\end_inset + + +\begin_inset CommandInset ref +LatexCommand ref +reference "chap:Alternative-De--and" + +\end_inset + + will work even in case of multiple failures, such as a network failure + plus local node failure at only 1 node (even if that node is the former + primary node). +\end_layout + +\begin_layout Standard +However, in general (e.g. + when more than 1 node is damaged and/or when the filesystem +\family typewriter +/mars/ +\family default + is badly damaged) there is no general guarantee that recovery will +\emph on +always +\emph default + succeed under +\emph on +any +\emph default + (weird) circumstances. + That said, your chances for recovery are +\emph on +very +\emph default + high when some disk remains usable at least at one of your surviving secondarie +s. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +It should be very hard to finally trash a secondary, because the transaction + logfiles are containing +\family typewriter +md5 +\family default + checksums for all data records. + Any attempt to replay currupted logfiles is refused by MARS. + In addition, the sequence numbers of +\family typewriter +log-rotate +\family default +d logfiles are checked for contiguity. + Finally, the +\emph on +sequence path +\emph default + of logfile applications (consisting of logfile names plus their respective + length) is additionally secured by a +\family typewriter +git +\family default +-like incremental checksum over the whole path history (so-called +\begin_inset Quotes eld +\end_inset + +version links +\begin_inset Quotes erd +\end_inset + +). + This should detect split brains even if logfiles are appended / modified + +\emph on +after +\emph default + a (forceful) switchover has already taken place. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + That said, your risk of final data loss is very high if you remove the + +\series bold +BBU +\series default + from your hardware RAID controller before all hot data has been flushed + to the physical disks. + Therefore, never try to +\begin_inset Quotes eld +\end_inset + +repair +\begin_inset Quotes erd +\end_inset + + a seemingly dead node before your replication is up again somewhere else! + Only unplug the network cables when advised, but never try to repair the + hardware instantly! +\end_layout + +\begin_layout Standard +In case of desperate situations where none of the previous instructions + have succeeded, your last chance is rebuilding all your resources from + intact disks as follows: +\end_layout + +\begin_layout Enumerate +Do +\family typewriter +rmmod mars +\family default + on all your cluster nodes and/or reboot them. + Note: if you are less desperate, chances are high that the following will + also work when the kernel module remains active and everywhere a +\family typewriter +marsadm down +\family default + is given instead, but for an +\emph on +ultimate +\emph default + instruction you should eliminate +\emph on +potential +\emph default + kernel problems by +\family typewriter +rmmod +\family default + / +\family typewriter +reboot +\family default +, at least if you can afford the downtime on concurrently operating resources. +\end_layout + +\begin_layout Enumerate +For safety, physically remove the storage network cables on +\emph on +all +\emph default + your cluster nodes. + Note: the same disclaimer holds. + MARS really does its best, even when +\family typewriter +delete-resource +\family default + is given while the network is fully active and multiple split-brain primaries + are actively using their local device in parallel (approved by some testcases + from the automatic test suite, but note that it is impossible to catch + all possible failure scenarios). + Don't challenge your fate if you are desperate! Don't +\emph on +rely +\emph default + on this! Nothing is absolutely fail-safe! +\end_layout + +\begin_layout Enumerate + +\series bold +Manually +\series default + check which surviving disk is usable, and which is the +\begin_inset Quotes eld +\end_inset + +best +\begin_inset Quotes erd +\end_inset + + one for your purpose. +\end_layout + +\begin_layout Enumerate +Do +\family typewriter +modprobe mars +\family default + +\emph on +only +\emph default + on that node. + If that fails, +\family typewriter +rmmod +\family default + and/or reboot again, and start over with a completely fresh +\family typewriter +/mars/ +\family default + partition ( +\family typewriter +mkfs.ext4 /mars/ +\family default + or similar) +\emph on +everywhere +\emph default + on +\emph on +all +\emph default + cluster nodes, and continue with step 7. +\end_layout + +\begin_layout Enumerate +If your old +\family typewriter +/mars/ +\family default + works, and you did not already (forcefully) switch your designated primary + to the final destination, do it now (see description in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Forced-Switching" + +\end_inset + +). + Wait until any old logfile data has been replayed. +\end_layout + +\begin_layout Enumerate +Say +\family typewriter +marsadm delete-resource mydata --force +\family default +. + This will cleanup all internal symlink tree information for the resource, + but will leave your disk data intact. +\end_layout + +\begin_layout Enumerate +Locally build up the new resource(s) as usual, out of the underlying disks. +\end_layout + +\begin_layout Enumerate +Check whether the new resource(s) work in standalone mode. +\end_layout + +\begin_layout Enumerate +When necessary, repeat these steps with other resources. +\end_layout + +\begin_layout Standard +Now you can choose how the rebuild your cluster. + If you rebuilt +\family typewriter +/mars/ +\family default + anywhere, you +\emph on +must +\emph default + rebuild it on +\emph on +all +\emph default + new cluster nodes and start over with a fresh +\family typewriter +join-cluster +\family default + on each of them, from scratch. + It is not possible to mix the old cluster with the new one. +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{enumerate} +\backslash +setcounter{enumi}{9} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +item +\end_layout + +\end_inset + + Finally, do all the necessary +\family typewriter +join-resource +\family default +s on the respective cluster nodes, according to your new redundancy scenario + after the failures (e.g. + after activating spare nodes, etc). + If you have +\begin_inset Formula $k>2$ +\end_inset + + replicas, start +\family typewriter +join-resource +\family default + on the worst / most damaged version first, and start the next preferably + only after the previous sync has completed successfully. + This way, you will be permanently retaining some (old and outdated, but + hopefully potentially usable) replicas while a sync is running. + Don't start too many syncs in parallel. +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +end{enumerate} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Never use +\family typewriter +delete-resource +\family default + twice on the same resource name, after you have already a working standalone + primary +\begin_inset Foot +status open + +\begin_layout Plain Layout +Of course, when you don't have created the +\emph on +same +\emph default + resource anew, you may repeat +\family typewriter +delete-resource +\family default + on other cluster nodes in order to get rid of local files / symlinks which + had not been propagated to other nodes before. +\end_layout + +\end_inset + +. + You might accidentally destroy your again-working copy! You +\emph on +can +\emph default + issue +\family typewriter +delete-resource +\family default + multiple times on different nodes, e.g. + when the network has problems, but doing so +\emph on +after +\emph default + re-establishment of the initial primary bears some risk. + Therefore, the safest way is first deleting the resources everywhere, and + then starting over afresh. +\end_layout + +\begin_layout Standard +Before re-connecting any network cable on any non-primary (new secondaries), + ensure that all +\family typewriter +/dev/mars/mydata +\family default + devices are no longer in use (e.g. + from an old primary role before the incident happened), and that each local + disk is detached. + Only after that, you should be able to safely re-connect the network. + The +\family typewriter +delete-resource +\family default + given at the new primary should propagate now to each of your secondaries, + and your local disk should be usable for a re- +\family typewriter +join-resource +\family default +. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +When you did not rebuild your cluster from scratch with fresh +\family typewriter +/mars/ +\family default + filesystems, and one of the old cluster nodes is supposed to be removed + permanently, use +\family typewriter +leave-resource +\family default + (optionally with +\family typewriter +--host= +\family default + and/or +\family typewriter +--force +\family default +) and finally +\family typewriter +leave-cluster +\family default +. +\end_layout + +\begin_layout Chapter +Experts only: Special Trick Switching and Rebuild +\begin_inset CommandInset label +LatexCommand label +name "chap:Experts-only:-Special" + +\end_inset + + +\end_layout + +\begin_layout Standard +The following is a further alternative for +\series bold +experts +\series default + who really know what they are doing. + The method is very simple and therefore well-suited for coping with mass + failures, e.g. + +\series bold +power blackout of whole datacenters +\series default +. +\end_layout + +\begin_layout Standard +In case a primary datacenter fails as a whole for whatever reason and you + have a backup datacenter, do the following steps in the backup datacenter: +\end_layout + +\begin_layout Enumerate +Fencing step: by means of firewalling, +\series bold +ensure +\series default + that the (virtually) damaged datacenter nodes +\series bold +cannot +\series default + be reached over the network. + For example, you may place REJECT rules into all of your local iptables + firewalls at the backup datacenter. + Alternatively / additionally, you may block the routes at the appropriate + central router(s) in your network. +\end_layout + +\begin_layout Enumerate +Run the sequence +\family typewriter +marsadm disconnect all; marsadm primary --force all +\family default + on all nodes in the backup datacenter. +\end_layout + +\begin_layout Enumerate +Restart your services in the backup datacenter (as far as necessary). + Depending on your network setup, further steps like switching BGP routes + etc may be necessary. +\end_layout + +\begin_layout Enumerate +Check that +\emph on +all +\emph default + your services are +\emph on +really +\emph default + up and running, before you try to repair anything! Failing to do so may + result in data loss when you execute the following restore method for +\emph on +experts +\emph default +. +\end_layout + +\begin_layout Standard +Now your backup datacenter should continue servicing your clients. + The final reconstruction of the originally primary datacenter works as + follows: +\end_layout + +\begin_layout Enumerate +At the damaged primary datacenter, ensure that nowhere the MARS kernel module + is running. + In case of a power blackout, you shouldn't have executed an automatic +\family typewriter +modprobe mars +\family default + anywhere during reboot, so you should be already done when all your nodes + are up again. + In case some nodes had no reboot, execute +\family typewriter +rmmod mars +\family default + everywhere. + If +\family typewriter +rmmod +\family default + refuses to run, you may need to umount the +\family typewriter +/dev/mars/mydata +\family default + device first. + When nothing else helps, you may just mass reboot your hanging nodes. +\end_layout + +\begin_layout Enumerate +At the failed side, do +\family typewriter +rm -rf /mars/resource-$mydata/ +\family default + for all those resources which had been primary before the blackout. + Do this +\emph on +only +\emph default + for those cases, otherwise you will need unnecessary +\family typewriter +leave-resource +\family default +s or +\family typewriter +invalidate +\family default +s later (e.g. + when half of your nodes were already running at the surving side). + In order to avoid unnecessary traffic, please do this only as far as really + necessary. + Don't remove any other directories. + In particular, +\family typewriter +/mars/ips/ +\family default + +\emph on +must +\emph default + remain intact. + In case you accidentally deleted them, or you had to re-create +\family typewriter +/mars/ +\family default + from scratch, try +\family typewriter +rsync +\family default + with the correct options. +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/MatieresCorrosives.png + lyxscale 50 + scale 17 + +\end_inset + + Caution! before doing this, check that the corresponding directory exists + at the backup datacenter, and that it is +\emph on +really +\emph default + healthy! +\end_layout + +\begin_layout Enumerate +Un-Fencing: restore your network firewall / routes and check that they work + ( +\family typewriter +ping +\family default + etc). +\end_layout + +\begin_layout Enumerate +Do +\family typewriter +modprobe mars +\family default + everywhere. + All missing directories and their missing symlinks should be automatically + fetched from the backup datacenter. +\end_layout + +\begin_layout Enumerate +Run +\family typewriter +marsadm join-resource $res +\family default +, but only at those places where the directory was removed previously, while + using the same disk devices as before. + This will minimize actual traffic thanks to the fast full sync algorithm. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +It is +\series bold +crucial +\series default + that the fencing step +\series bold +must +\series default + be executed +\emph on +before +\emph default + any +\family typewriter +primary --force +\family default +! This way, no split brain will be +\emph on +visible +\emph default + at the backup datacenter side, because there is simply no chance for transferri +ng different versions over the network. + It is also crucial to remove any (potentially diverging) resource directories + +\emph on +before +\emph default + the +\family typewriter +modprobe +\family default +! This way, the backup datacenter never runs into split brain. + This saves you a lot of detail work for split brain resolution when you + have to restore bulks of nodes in a short time. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +In case the repair of a full datacenter should take so extremely long that + some +\family typewriter +/mars/ +\family default + partitions are about to run out of space at the surviving side, you may + use the +\family typewriter +leave-resource --host=failed-node +\family default + trick described earlier, followed by +\family typewriter +log-delete-all +\family default +. + Best if you have prepared a fully automatic script long before the incident, + which executes suchalike only as far as necessary in each individual case. +\end_layout + +\begin_layout Standard +\noindent +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +Even better: train such scenarios in advance, and prepare scripts for mass + automation. + Look into section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Scripting-HOWTO" + +\end_inset + +. +\end_layout + +\begin_layout Chapter +Mathematical Model of Architectural Reliability +\begin_inset CommandInset label +LatexCommand label +name "chap:Mathematical-Model-of" + +\end_inset + + +\end_layout + +\begin_layout Standard +The assumptions used in the model are explained in detail in section +\begin_inset CommandInset ref +LatexCommand vref +reference "sub:Detailed-explanation" + +\end_inset + +. + Here is a quick recap of the main parameters: +\end_layout + +\begin_layout Itemize +\begin_inset Formula $n$ +\end_inset + + is the number of basic storage units. + It is also used for the number of application units, assumed to be the + same. +\end_layout + +\begin_layout Itemize +\begin_inset Formula $k$ +\end_inset + + is the replication degree, or number of replicas. + In general, you will have to deploy +\begin_inset Formula $N=k*n$ +\end_inset + + storage servers for getting +\begin_inset Formula $n$ +\end_inset + + basic storage units. + This applies to any of the competing architectures. + +\end_layout + +\begin_layout Itemize +\begin_inset Formula $s$ +\end_inset + + is the architecture-dependent spread exponent: it tells whether a storage + incident will spread to the application units. + Examples: +\begin_inset Formula $s=0$ +\end_inset + + means that there is no spread between storage unit failures and application + unit failures, other than a local 1:1 one. + +\begin_inset Formula $s=1$ +\end_inset + + means that an uncompensated storage node incident will cause +\begin_inset Formula $n$ +\end_inset + + application incidents. +\end_layout + +\begin_layout Itemize +\begin_inset Formula $p$ +\end_inset + + is the probability of a storage server incident. + In the examples at section +\begin_inset CommandInset ref +LatexCommand vref +reference "sec:Reliability-Arguments-from" + +\end_inset + +, a fixed +\begin_inset Formula $p=0.0001$ +\end_inset + + was used for easy understanding, but the following formulae should also + hold for any other +\begin_inset Formula $p\in(0,1)$ +\end_inset + +. +\end_layout + +\begin_layout Itemize +\begin_inset Formula $T$ +\end_inset + + is the observational period, introduced for convenience of understanding. + The following can also be computed independently from any +\begin_inset Formula $T$ +\end_inset + +, as long as the probability +\begin_inset Formula $p$ +\end_inset + + does not change over time, which is assumed. + Because +\begin_inset Formula $T$ +\end_inset + + is only here for convenience of understanding, we set it to +\begin_inset Formula $T=1/p$ +\end_inset + +. + In the examples from section +\begin_inset CommandInset ref +LatexCommand vref +reference "sub:Detailed-explanation" + +\end_inset + +, a fixed +\begin_inset Formula $T=10,000$ +\end_inset + + hours was used. +\end_layout + +\begin_layout Section +Formula for DRBD / MARS +\end_layout + +\begin_layout Standard +We need not discrimiate between a storage failure probability S and an applicati +on failure probability A because applications are run locally at the storage + servers 1:1. + The probability for failure of a single shard consisting of +\begin_inset Formula $k$ +\end_inset + + nodes is +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +A_{p}(k)=p^{k} +\] + +\end_inset + +because all +\begin_inset Formula $k$ +\end_inset + + shard members have to be down all at the same time. + In section +\begin_inset CommandInset ref +LatexCommand vref +reference "sub:Detailed-explanation" + +\end_inset + + we assumed that there is no cross-communication between shards. + Therefore they are completely independent from each other, and the total + downtime of +\begin_inset Formula $n$ +\end_inset + + shards during the observational period +\begin_inset Formula $T$ +\end_inset + + is +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +A_{p,T}(k,n)=T*n*p^{k} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +When introducing the spread exponent +\begin_inset Formula $s$ +\end_inset + +, the formula turns into +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +A_{s,p,T}(k,n)=T*n^{s+1}*p^{k} +\] + +\end_inset + + +\end_layout + +\begin_layout Section +Formula for Unweighted BigCluster +\end_layout + +\begin_layout Standard +This is based on the Bernoulli formula. + The probability that exactly +\begin_inset Formula $\bar{k}$ +\end_inset + + storage nodes out of +\begin_inset Formula $N=k*n$ +\end_inset + + total storage nodes are down is +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\bar{S}_{p}(\bar{k},N)=\binom{N}{\bar{k}}*p^{\bar{k}}*(1-p)^{N-\bar{k}} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +Similarly, the probability for getting +\begin_inset Formula $k$ +\end_inset + + or more storage node failures (up to +\begin_inset Formula $N$ +\end_inset + +) at the same time is +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +S_{p}(k,N)=\sum_{\bar{k}=k}^{N}\bar{S}_{p}(\bar{k},N)=\sum_{\bar{k}=k}^{N}\binom{N}{\bar{k}}*p^{\bar{k}}*(1-p)^{N-\bar{k}} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +By replacing +\begin_inset Formula $N$ +\end_inset + + with +\begin_inset Formula $k*n$ +\end_inset + + (for conversion of the x axis into basic storage units) and by introducing + +\begin_inset Formula $T$ +\end_inset + + we get +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +S_{p,T}(k,n)=T*\sum_{\bar{k}=k}^{k*n}\binom{k*n}{\bar{k}}*p^{\bar{k}}*(1-p)^{k*n-\bar{k}} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +For comparability with DRBDorMARS, we have to compute the application downtime + A instead of the storage downtime S, which depends on the spread exponent + +\begin_inset Formula $s$ +\end_inset + + as follows: +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +A_{s,p,T}(k,n)=n^{s+1}*S_{p,T}(k,n)=n^{s+1}*T*\sum_{\bar{k}=k}^{k*n}\binom{k*n}{\bar{k}}*p^{\bar{k}}*(1-p)^{k*n-\bar{k}} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +Notice that at +\begin_inset Formula $s=0$ +\end_inset + + we have introduced a factor of +\begin_inset Formula $n$ +\end_inset + +, which corresponds to the hashing effect (teardown of +\begin_inset Formula $n$ +\end_inset + + application instances by a single uncompensated storage incident) as described + in section +\begin_inset CommandInset ref +LatexCommand vref +reference "sub:Detailed-explanation" + +\end_inset + +. +\end_layout + +\begin_layout Section +Formula for SizeWeighted BigCluster +\end_layout + +\begin_layout Standard +In difference to above, we need to introduce a correction factor by the + fraction of affected objects, relative to basic storage units. + Otherwise the y axis would not stay comparable due to different units. +\end_layout + +\begin_layout Standard +For the special case of +\begin_inset Formula $k=1$ +\end_inset + +, there is no difference to above. +\end_layout + +\begin_layout Standard +For the special case of +\begin_inset Formula $k=2$ +\end_inset + + replica, the correction factor is +\begin_inset Formula $1/(N-1)$ +\end_inset + +, because we assume that all the replica of the affected first node are + uniformly spread to all other nodes, which is +\begin_inset Formula $N-1$ +\end_inset + +. + The probability for hitting the intersection of the first node with the + second node is thus +\begin_inset Formula $1/(N-1)$ +\end_inset + +. +\end_layout + +\begin_layout Standard +For higher values of +\begin_inset Formula $k$ +\end_inset + +, and with a similar argument (never put another replica of the same object + onto the same storage node) we get the correction factor as +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +C(k,N)=\prod_{l=1}^{k-1}\frac{1}{N-l} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +Hint: there are maximum +\begin_inset Formula $k$ +\end_inset + + physical replicas on the disks. + For higher values of +\begin_inset Formula $\bar{k}\geq k$ +\end_inset + +, there are +\begin_inset Formula $\binom{\bar{k}}{k}$ +\end_inset + + combinations of object intersections (when assuming that the number of + objects on a node is very large such and no further object repetition can + occur execpt for the +\begin_inset Formula $k$ +\end_inset + +-fold replica placement). + Thus the generalization to +\begin_inset Formula $\bar{k}\geq k$ +\end_inset + + is +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +C(k,\bar{k},N)=\binom{\bar{k}}{k}\prod_{l=1}^{k-1}\frac{1}{N-l} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +By inserting this into the above fomula, we get +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +A_{s,p,T}(k,n)=n^{s+1}*T*\sum_{\bar{k}=k}^{k*n}C(k,\bar{k},k*n)*\binom{k*n}{\bar{k}}*p^{\bar{k}}*(1-p)^{k*n-\bar{k}} +\] + +\end_inset + + +\end_layout + +\begin_layout Chapter +Command Documentation for Userspace Tools +\begin_inset CommandInset label +LatexCommand label +name "chap:Command-Documentation-for" + +\end_inset + + +\end_layout + +\begin_layout Section + +\family typewriter +marsadm --help +\begin_inset CommandInset label +LatexCommand label +name "sec:marsadm-–help" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +input{marsadm.help} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Section + +\family typewriter +football.sh --help +\begin_inset CommandInset label +LatexCommand label +name "sec:football-–help" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +input{football.help} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Section + +\family typewriter +football.sh --help --verbose +\begin_inset CommandInset label +LatexCommand label +name "sec:football-help-verbose" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +input{football-verbose.help} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Section + +\family typewriter +screener.sh --help +\begin_inset CommandInset label +LatexCommand label +name "sec:screener–help" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +input{screener.help} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Section + +\family typewriter +screener.sh --help --verbose +\begin_inset CommandInset label +LatexCommand label +name "sec:screener-help-verbose" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +input{screener-verbose.help} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Chapter +Football Redundancy Diagrams +\begin_inset CommandInset label +LatexCommand label +name "chap:Football-Redundancy-Diagrams" + +\end_inset + + +\end_layout + +\begin_layout Standard +The following tables are showing the number of replicas during Football. + We focus at the common case of starting with 2 replicas, and ending up + in a total of another 2 replicas at another machines. + Further cases, involving multiple secondaries, should go analogously. +\end_layout + +\begin_layout Standard +Active primaries are colored in red. +\end_layout + +\begin_layout Standard +Backup or shadow replicas (which are present at LVM level, but currently + not used by MARS) are in parentheses. + In case of emergency, they could be activated again. +\end_layout + +\begin_layout Standard +Replicas which are not in parentheses are kept in +\family typewriter +UpToDate +\family default + state all the time, until they are retired into backup replicas. +\end_layout + +\begin_layout Section +Parallel +\family typewriter +migrate +\end_layout + +\begin_layout Standard +This creates two additional replicas in parallel, at the target pair. + After handover to the new site, and after some configurable waiting time, + the old replicas are deleted. +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Tabular + + + + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "14col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +migrate +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +SRC +\begin_inset Newline newline +\end_inset + +Primary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +SRC +\begin_inset Newline newline +\end_inset + +Secondary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +DST +\begin_inset Newline newline +\end_inset + +Primary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +DST +\begin_inset Newline newline +\end_inset + +Secondary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "12col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +# Replicas +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Start +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Migrate x 2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +4 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +After Cleanup +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Section +Stepwise +\family typewriter +migrate +\end_layout + +\begin_layout Standard +This variant is useful for hardware lifecycle. + The uplink of the old hardware is only loaded with creation of 1 replica + in migration step 1. + Step 2 creates then another replica at the new hardware, which should have + a better replication network (e.g. + better uplinks and/or better capacity for cross-traffic between datacenters). +\end_layout + +\begin_layout Standard +This variant is selected by parameter +\family typewriter +migrate_two_phase=1 +\family default +. +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Tabular + + + + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "14col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +2-Step +\begin_inset Newline newline +\end_inset + +migrate +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +SRC +\begin_inset Newline newline +\end_inset + +Primary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +SRC +\begin_inset Newline newline +\end_inset + +Secondary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +DST +\begin_inset Newline newline +\end_inset + +Primary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +DST +\begin_inset Newline newline +\end_inset + +Secondary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "12col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +# Replicas +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Start +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Migrate 1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +3 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Migrate 2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +4 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +After Cleanup +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Section +Plain +\family typewriter +shrink +\end_layout + +\begin_layout Standard +Here we need to discriminate between replicas with the old size, and the + new size (which is typically smaller than the old size). +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Tabular + + + + + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "14col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +shrink +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +SRC +\begin_inset Newline newline +\end_inset + +Primary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +SRC +\begin_inset Newline newline +\end_inset + +Secondary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +DST +\begin_inset Newline newline +\end_inset + +Primary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +DST +\begin_inset Newline newline +\end_inset + +Secondary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "12col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +# Replicas +\begin_inset Newline newline +\end_inset + +old_size +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "12col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +# Replicas +\begin_inset Newline newline +\end_inset + +new_size +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Start +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\color inherit ++ (1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Working +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) + +\color red + 1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(2) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Finished +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) + 1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(2) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +After Cleanup +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Section +Full +\family typewriter +migrate+shrink +\end_layout + +\begin_layout Standard +This variant is +\emph on +almost +\emph default + equivalent to +\family typewriter +migrate +\family default + followed by +\family typewriter +shrink +\family default +. + The only difference is that cleanup is done +\emph on +later +\emph default +. + This means, more replicas are kept for some longer time. + Thus this variant is more safe than doing +\family typewriter +migrate +\family default + and +\family typewriter +shrink +\family default + separately. +\end_layout + +\begin_layout Standard +This variant is selected by parameters +\family typewriter +migrate_two_phase=0 +\family default + and +\family typewriter +migrate_always_all=1 +\family default + and +\family typewriter +migrate_early_cleanup=0 +\family default +: +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Tabular + + + + + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "14col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +FULL +\begin_inset Newline newline +\end_inset + +migrate+shrink +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +SRC +\begin_inset Newline newline +\end_inset + +Primary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +SRC +\begin_inset Newline newline +\end_inset + +Secondary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +DST +\begin_inset Newline newline +\end_inset + +Primary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +DST +\begin_inset Newline newline +\end_inset + +Secondary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "12col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +# Replicas +\begin_inset Newline newline +\end_inset + +old_size +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "12col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +# Replicas +\begin_inset Newline newline +\end_inset + +new_size +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Start +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Migrate x 2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +4 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Start +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\color inherit ++ (1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +4 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Working +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) + +\color red + 1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(4) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Finished +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) + 1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(4) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +After Cleanup +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +There is a variant which does early cleanup, which is roughly equivalent + to doing a standalone +\family typewriter +migrate +\family default + followed by a standalone +\family typewriter +shrink +\family default +. + +\end_layout + +\begin_layout Standard +This variant is selected by parameters +\family typewriter +migrate_two_phase=0 +\family default + and +\family typewriter +migrate_always_all=1 +\family default + and +\family typewriter +migrate_early_cleanup=1 +\family default +. + It is less safe because it keeps less replicas and is thus less recommended: +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Tabular + + + + + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "14col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +Sequential +\begin_inset Newline newline +\end_inset + +migrate+shrink +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +SRC +\begin_inset Newline newline +\end_inset + +Primary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +SRC +\begin_inset Newline newline +\end_inset + +Secondary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +DST +\begin_inset Newline newline +\end_inset + +Primary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +DST +\begin_inset Newline newline +\end_inset + +Secondary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "12col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +# Replicas +\begin_inset Newline newline +\end_inset + +old_size +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "12col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +# Replicas +\begin_inset Newline newline +\end_inset + +new_size +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Start +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Migrate x 2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +4 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Early Cleanup +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Start +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\color inherit ++ (1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Working +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) + +\color red + 1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(2) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Finished +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) + 1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(2) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +After Cleanup +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Section +Stepwise +\family typewriter +migrate+shrink +\end_layout + +\begin_layout Standard +This variant is useful for hardware lifecycle. + The uplink of the old hardware is only loaded with creation of 1 replica + in migration step 1. + Step 2 creates then another replica at the new hardware, which should have + a better replication network. + +\end_layout + +\begin_layout Standard +This variant is selected by parameters +\family typewriter +migrate_two_phase=1 +\family default + and +\family typewriter +migrate_always_all=1 +\family default + and +\family typewriter +migrate_early_cleanup=0 +\family default +: +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Tabular + + + + + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "14col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +2-Step +\begin_inset Newline newline +\end_inset + +migrate+shrink +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +SRC +\begin_inset Newline newline +\end_inset + +Primary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +SRC +\begin_inset Newline newline +\end_inset + +Secondary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +DST +\begin_inset Newline newline +\end_inset + +Primary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +DST +\begin_inset Newline newline +\end_inset + +Secondary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "12col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +# Replicas +\begin_inset Newline newline +\end_inset + +old_size +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "12col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +# Replicas +\begin_inset Newline newline +\end_inset + +new_size +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Start +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Migrate 1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +3 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Migrate 2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +4 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Start +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\color inherit ++ (1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +4 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Working +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) + +\color red + 1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(4) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Finished +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) + 1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(4) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +After Cleanup +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +This variant can also be combined with early cleanup. + The result is similar to above. + The only difference is that the second additional replica is created at + the new hardware. + +\end_layout + +\begin_layout Standard +This variant is selected by parameters +\family typewriter +migrate_two_phase=1 +\family default + and +\family typewriter +migrate_always_all=0 +\family default + and +\family typewriter +migrate_early_cleanup= +\family default +1. + Again, this variant is less safe and therefore less recommended. +\end_layout + +\begin_layout Standard +However, it keeps at least 2 (backup) replicas all the time and thus could + be an alternative when decommissioning of old hardware is time-critical. +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Tabular + + + + + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "14col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +2-Step +\begin_inset Newline newline +\end_inset + +migrate+shrink +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +SRC +\begin_inset Newline newline +\end_inset + +Primary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +SRC +\begin_inset Newline newline +\end_inset + +Secondary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +DST +\begin_inset Newline newline +\end_inset + +Primary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +DST +\begin_inset Newline newline +\end_inset + +Secondary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "12col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +# Replicas +\begin_inset Newline newline +\end_inset + +old_size +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "12col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +# Replicas +\begin_inset Newline newline +\end_inset + +new_size +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Start +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Migrate 1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +3 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Migrate 2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +4 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Early Cleanup +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Start +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\color inherit ++ (1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Working +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) + +\color red + 1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(2) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Finished +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) + 1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(2) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +After Cleanup +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Section +FAST +\family typewriter +migrate+shrink +\end_layout + +\begin_layout Standard +This variant tries to keep a balance by not creating too many unnecessary + replicas and to reduce network traffic. +\end_layout + +\begin_layout Standard +This variant is selected by parameters +\family typewriter +migrate_two_phase=0 +\family default + and +\family typewriter +migrate_always_all=0 +\family default + and +\family typewriter +migrate_early_cleanup=0 +\family default +: +\end_layout + +\begin_layout Standard +\noindent +\align center +\begin_inset Tabular + + + + + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "14col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +FAST +\begin_inset Newline newline +\end_inset + +migrate+shrink +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +SRC +\begin_inset Newline newline +\end_inset + +Primary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +SRC +\begin_inset Newline newline +\end_inset + +Secondary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +DST +\begin_inset Newline newline +\end_inset + +Primary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "10col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +DST +\begin_inset Newline newline +\end_inset + +Secondary +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "12col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +# Replicas +\begin_inset Newline newline +\end_inset + +old_size +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "12col%" +special "none" +height "1in" +height_special "totalheight" +thickness "0.4pt" +separation "3pt" +shadowsize "4pt" +framecolor "black" +backgroundcolor "none" +status open + +\begin_layout Plain Layout +# Replicas +\begin_inset Newline newline +\end_inset + +new_size +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Start +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Migrate x 1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +3 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Start +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\color inherit ++ (1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +3 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Working +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) + +\color red + 1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(3) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Shrink Finished +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(1) + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +(3) +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +After Cleanup +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\color red +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2 +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +As before, this could +\emph on +theoretically +\emph default + be combined with early cleanup. + Such a combination is however not recommended because there is one intermediate + step where all existing replicas are at the DST primary, and thus this + one machine must not fail. +\end_layout + +\begin_layout Chapter +GNU Free Documentation License +\begin_inset CommandInset label +LatexCommand label +name "chap:GNU-FDL" + +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent + +\family typewriter +\size footnotesize +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +lstinputlisting{fdl.txt} +\end_layout + +\end_inset + + +\end_layout + +\end_body +\end_document