From feb0b34604f195a7383b94da616b0b27f3c34583 Mon Sep 17 00:00:00 2001 From: Thomas Schoebel-Theuer Date: Wed, 13 Jan 2016 11:20:17 +0100 Subject: [PATCH] marsadm: fix irritating "Inconsistent" display at primary side At an actual primary, "Inconsistent" would be the correct description for the state of the _disk_. However most sysadmins will confuse this with the state of the _replication_ (which is of course never inconsistent during writeback from the memory buffer). Although documented correctly, misunderstandings continue to survive, because humans are automatically abstracting away from detail components such as a "disk", and are automatically assuming that "marsadm view" would relate to the replication as a whole. Avoid misunderstandings by more detailed message distinctions aiming to address all of these in parallel. --- docu/mars-manual.lyx | 65 ++++++++++++++++++++++++++++++++------------ userspace/marsadm | 8 +++++- 2 files changed, 55 insertions(+), 18 deletions(-) diff --git a/docu/mars-manual.lyx b/docu/mars-manual.lyx index f4af92ef..26783922 100644 --- a/docu/mars-manual.lyx +++ b/docu/mars-manual.lyx @@ -6244,27 +6244,26 @@ initial logfile replay after the sync has not yet finished), because the ordinary logfile replay always maintains anytime consistency once a consistent state had been reached. - On a +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/MatieresToxiques.png + lyxscale 50 + scale 17 + +\end_inset + + \emph on -running +Only \emph default - primary, this display does not mean that something went wrong. - It just means that there exists some + in case of a primary node crash, and \emph on -writeback +only \emph default - in the temporary memory buffer which has to be flushed before consistency - of the underlying local disk (as opposed to -\family typewriter -/dev/mars/mydata -\family default - which will -\emph on -always -\emph default - appear as being consistent) will be reached again. - Only in case of a primary node crash, and only after attempts have failed - to become primary again (e.g. + after attempts have failed to become primary again (e.g. IO errors, etc), this \emph on can @@ -6276,6 +6275,38 @@ fsck \family default can fix any remaining problems (and, of course, you can also switchover to a former secondary). +\begin_inset Newline newline +\end_inset + + +\begin_inset Graphics + filename images/lightbulb_brightlit_benj_.png + lyxscale 12 + scale 7 + +\end_inset + +When this message appears, simply start MARS again (e.g. + +\family typewriter +modprobe mars; marsadm up all +\family default +), in whatever role you are intending. + This will +\emph on +automatically +\emph default + try to replay any necessary transaction logfile(s) in order to fix the + inconsistency. + Only if the automatic fix fails and this message persists for a long time + without progress, you +\emph on +might +\emph default + have a problem. + Typically, as observed at a large installation at 1&1, this happens extremely + rarely, and then typically indicates that your hardware is likely to be + defective. \end_layout \begin_layout Labeling diff --git a/userspace/marsadm b/userspace/marsadm index ffc20299..f8f4144b 100755 --- a/userspace/marsadm +++ b/userspace/marsadm @@ -3593,7 +3593,13 @@ my %complex_macros = . "}{%not{%is-attach{}}}{" . "NoAttach" . "}{%not{%is-consistent{}}}{" - . "InConsistent" + . "%elsif{%not{%todo-primary{}}}{" + . "InConsistent" + . "}{%is-primary{}}{" + . "WriteBack" + . "}{" + . "Recovery" + . "}" . "}{%not{%work-reached{}}}{" . "%elsif{%not{%todo-primary{}}}{" . "OutDated[%call{outdated-flags}]"