From 6962b4eb9b5c5209dd3de6c5930b3485735cb912 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Wed, 11 Jul 2018 12:56:06 -0700 Subject: [PATCH] doc: create mds state documentation Fixes: http://tracker.ceph.com/issues/22989 Signed-off-by: Patrick Donnelly --- doc/cephfs/.gitignore | 1 + doc/cephfs/Makefile | 7 + doc/cephfs/administration.rst | 1 + doc/cephfs/disaster-recovery.rst | 1 + doc/cephfs/index.rst | 2 + doc/cephfs/mds-state-diagram.dot | 71 +++++++ doc/cephfs/mds-state-diagram.svg | 311 +++++++++++++++++++++++++++++++ doc/cephfs/mds-states.rst | 227 ++++++++++++++++++++++ doc/cephfs/multimds.rst | 1 + doc/cephfs/standby.rst | 1 + 10 files changed, 623 insertions(+) create mode 100644 doc/cephfs/.gitignore create mode 100644 doc/cephfs/Makefile create mode 100644 doc/cephfs/mds-state-diagram.dot create mode 100644 doc/cephfs/mds-state-diagram.svg create mode 100644 doc/cephfs/mds-states.rst diff --git a/doc/cephfs/.gitignore b/doc/cephfs/.gitignore new file mode 100644 index 00000000000..e8232139cc9 --- /dev/null +++ b/doc/cephfs/.gitignore @@ -0,0 +1 @@ +mds-state-diagram.svg diff --git a/doc/cephfs/Makefile b/doc/cephfs/Makefile new file mode 100644 index 00000000000..eee2fa5728d --- /dev/null +++ b/doc/cephfs/Makefile @@ -0,0 +1,7 @@ +TARGETS=mds-state-diagram.svg + +%.svg: %.dot + dot -Tsvg -o $@ $^ + + +all: $(TARGETS) diff --git a/doc/cephfs/administration.rst b/doc/cephfs/administration.rst index 3c4806876bd..348990a0484 100644 --- a/doc/cephfs/administration.rst +++ b/doc/cephfs/administration.rst @@ -1,3 +1,4 @@ +.. _cephfs-administration: CephFS Administrative commands ============================== diff --git a/doc/cephfs/disaster-recovery.rst b/doc/cephfs/disaster-recovery.rst index dd91f5d9c1a..71344e903b1 100644 --- a/doc/cephfs/disaster-recovery.rst +++ b/doc/cephfs/disaster-recovery.rst @@ -1,3 +1,4 @@ +.. _cephfs-disaster-recovery: Disaster recovery ================= diff --git a/doc/cephfs/index.rst b/doc/cephfs/index.rst index 859d3efbfd4..ad69d2a388d 100644 --- a/doc/cephfs/index.rst +++ b/doc/cephfs/index.rst @@ -51,6 +51,7 @@ least one :term:`Ceph Metadata Server` running. :maxdepth: 1 Add/Remove MDS(s) + MDS states MDS failover and standby configuration MDS Configuration Settings Client Configuration Settings @@ -86,6 +87,7 @@ authentication keyring. :maxdepth: 1 Deployment best practices + MDS States Administrative commands Understanding MDS Cache Size Limits POSIX compatibility diff --git a/doc/cephfs/mds-state-diagram.dot b/doc/cephfs/mds-state-diagram.dot new file mode 100644 index 00000000000..dee82506729 --- /dev/null +++ b/doc/cephfs/mds-state-diagram.dot @@ -0,0 +1,71 @@ +digraph { + +node [shape=circle,style=unfilled,fixedsize=true,width=2.0] + +node [color=blue,peripheries=1]; +N0 [label="up:boot"] + +node [color=orange,peripheries=2]; +N1 [label="up:creating"] +N0 -> N1 [color=orange,penwidth=2.0]; +N2 [label="up:starting"] +N0 -> N2 [color=orange,penwidth=2.0]; +N3 [label="up:replay"] +N0 -> N3 [color=orange,penwidth=2.0]; +N4 [label="up:resolve"] +N3 -> N4 [color=orange,penwidth=2.0]; +N5 [label="up:reconnect"] +N3 -> N5 [color=orange,penwidth=2.0]; +N4 -> N5 [color=orange,penwidth=2.0]; +N6 [label="up:rejoin"] +N5 -> N6 [color=orange,penwidth=2.0]; +N7 [label="up:clientreplay"] +N6 -> N7 [color=orange,penwidth=2.0]; + +node [color=green,peripheries=2]; +S0 [label="up:active"] +N7 -> S0 [color=green,penwidth=2.0]; +N1 -> S0 [color=green,penwidth=2.0]; +N2 -> S0 [color=green,penwidth=2.0]; +N6 -> S0 [color=green,penwidth=2.0]; +node [color=green,peripheries=1]; +S1 [label="up:standby"] +N0 -> S1 [color=green,penwidth=2.0]; +S2 [label="up:standby_replay"] +N0 -> S2 [color=green,penwidth=2.0]; + +// going down but still accessible by clients +node [color=purple,peripheries=2]; +S3 [label="up:stopping"] +S0 -> S3 [color=purple,penwidth=2.0]; + +// terminal (but "in") +node [shape=polygon,sides=6,color=red,peripheries=2]; +D0 [label="down:failed"] +N2 -> D0 [color=red,penwidth=2.0]; +N3 -> D0 [color=red,penwidth=2.0]; +N4 -> D0 [color=red,penwidth=2.0]; +N5 -> D0 [color=red,penwidth=2.0]; +N6 -> D0 [color=red,penwidth=2.0]; +N7 -> D0 [color=red,penwidth=2.0]; +S0 -> D0 [color=red,penwidth=2.0]; +S3 -> D0 [color=red,penwidth=2.0]; +D0 -> N3 [color=red,penwidth=2.0]; + +// terminal (but not "in") +node [shape=polygon,sides=6,color=black,peripheries=1]; +D1 [label="down:damaged"] +N3 -> D1 [color=black,penwidth=2.0]; +N4 -> D1 [color=black,penwidth=2.0]; +N5 -> D1 [color=black,penwidth=2.0]; +N6 -> D1 [color=black,penwidth=2.0]; +N7 -> D1 [color=black,penwidth=2.0]; +S0 -> D1 [color=black,penwidth=2.0]; +S3 -> D1 [color=black,penwidth=2.0]; +D1 -> D0 [color=red,penwidth=2.0] + +node [shape=polygon,sides=6,color=purple,peripheries=1]; +D3 [label="down:stopped"] +S3 -> D3 [color=purple,penwidth=2.0]; + +} diff --git a/doc/cephfs/mds-state-diagram.svg b/doc/cephfs/mds-state-diagram.svg new file mode 100644 index 00000000000..6c3127a3c23 --- /dev/null +++ b/doc/cephfs/mds-state-diagram.svg @@ -0,0 +1,311 @@ + + + + + + +%3 + + + +N0 + +up:boot + + + +N1 + + +up:creating + + + +N0->N1 + + + + + +N2 + + +up:starting + + + +N0->N2 + + + + + +N3 + + +up:replay + + + +N0->N3 + + + + + +S1 + +up:standby + + + +N0->S1 + + + + + +S2 + +up:standby_replay + + + +N0->S2 + + + + + +S0 + + +up:active + + + +N1->S0 + + + + + +N2->S0 + + + + + +D0 + + +down:failed + + + +N2->D0 + + + + + +N4 + + +up:resolve + + + +N3->N4 + + + + + +N5 + + +up:reconnect + + + +N3->N5 + + + + + +N3->D0 + + + + + +D1 + +down:damaged + + + +N3->D1 + + + + + +N4->N5 + + + + + +N4->D0 + + + + + +N4->D1 + + + + + +N6 + + +up:rejoin + + + +N5->N6 + + + + + +N5->D0 + + + + + +N5->D1 + + + + + +N7 + + +up:clientreplay + + + +N6->N7 + + + + + +N6->S0 + + + + + +N6->D0 + + + + + +N6->D1 + + + + + +N7->S0 + + + + + +N7->D0 + + + + + +N7->D1 + + + + + +S3 + + +up:stopping + + + +S0->S3 + + + + + +S0->D0 + + + + + +S0->D1 + + + + + +S3->D0 + + + + + +S3->D1 + + + + + +D3 + +down:stopped + + + +S3->D3 + + + + + +D0->N3 + + + + + +D1->D0 + + + + + diff --git a/doc/cephfs/mds-states.rst b/doc/cephfs/mds-states.rst new file mode 100644 index 00000000000..617d8b1e3d5 --- /dev/null +++ b/doc/cephfs/mds-states.rst @@ -0,0 +1,227 @@ + +MDS States +========== + + +The Metadata Server (MDS) goes through several states during normal operation +in CephFS. For example, some states indicate that the MDS is recovering from a +failover by a previous instance of the MDS. Here we'll document all of these +states and include a state diagram to visualize the transitions. + +State Descriptions +------------------ + +Common states +~~~~~~~~~~~~~~ + + +:: + + up:active + +This is the normal operating state of the MDS. It indicates that the MDS +and its rank in the file system is available. + + +:: + + up:standby + +The MDS is available to takeover for a failed rank (see also :ref:`mds-standby`). +The monitor will automatically assign an MDS in this state to a failed rank +once available. + + +:: + + up:standby_replay + +The MDS is following the journal of another ``up:active`` MDS. Should the +active MDS fail, having a standby MDS in replay mode is desirable as the MDS is +replaying the live journal and will more quickly takeover. A downside to having +standby replay MDSs is that they are not available to takeover for any other +MDS that fails, only the MDS they follow. + + +Less common or transitory states +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +:: + + up:boot + +This state is broadcast to the Ceph monitors during startup. This state is +never visible as the Monitor immediately assign the MDS to an available rank or +commands the MDS to operate as a standby. The state is documented here for +completeness. + + +:: + + up:creating + +The MDS is creating a new rank (perhaps rank 0) by constructing some per-rank +metadata (like the journal) and entering the MDS cluster. + + +:: + + up:starting + +The MDS is restarting a stopped rank. It opens associated per-rank metadata +and enters the MDS cluster. + + +:: + + up:stopping + +When a rank is deactivated (stopped), the monitors command an active MDS to +enter the ``up:stopping`` state. In this state, the MDS accepts no new client +connections, migrates all subtrees to other ranks in the file system, flush its +metadata journal, and, if the last rank (0), evict all clients and shutdown +(see also :ref:`cephfs-administration`). + + +:: + + up:replay + +The MDS taking over a failed rank. This state represents that the MDS is +recovering its journal and other metadata. + + +:: + + up:resolve + +The MDS enters this state from ``up:replay`` if the Ceph file system has +multiple ranks (including this one), i.e. it's not a single active MDS cluster. +The MDS is resolving any uncommitted inter-MDS operations. All ranks in the +file system must be in this state or later for progress to be made, i.e. no +rank can be failed/damaged or ``up:replay``. + + +:: + + up:reconnect + +An MDS enters this state from ``up:replay`` or ``up:resolve``. This state is to +solicit reconnections from clients. Any client which had a session with this +rank must reconnect during this time, configurable via +``mds_reconnect_timeout``. + + +:: + + up:rejoin + +The MDS enters this state from ``up:reconnect``. In this state, the MDS is +rejoining the MDS cluster cache. In particular, all inter-MDS locks on metadata +are reestablished. + +If there are no known client requests to be replayed, the MDS directly becomes +``up:active`` from this state. + + +:: + + up:clientreplay + +The MDS may enter this state from ``up:rejoin``. The MDS is replaying any +client requests which were replied to but not yet durable (not journaled). +Clients resend these requests during ``up:reconnect`` and the requests are +replayed once again. The MDS enters ``up:active`` after completing replay. + + +Failed states +~~~~~~~~~~~~~ + +:: + + down:failed + +No MDS actually holds this state. Instead, it is applied to the rank in the file system. For example: + +:: + + $ ceph fs dump + ... + max_mds 1 + in 0 + up {} + failed 0 + ... + +Rank 0 is part of the failed set. + + +:: + + down:damaged + +No MDS actually holds this state. Instead, it is applied to the rank in the file system. For example: + +:: + + $ ceph fs dump + ... + max_mds 1 + in 0 + up {} + failed + damaged 0 + ... + +Rank 0 has become damaged (see also :ref:`cephfs-disaster-recovery`) and placed in +the ``damaged`` set. An MDS which was running as rank 0 found metadata damage +that could not be automatically recovered. Operator intervention is required. + + +:: + + down:stopped + +No MDS actually holds this state. Instead, it is applied to the rank in the file system. For example: + +:: + + $ ceph fs dump + ... + max_mds 1 + in 0 + up {} + failed + damaged + stopped 1 + ... + +The rank has been stopped by reducing ``max_mds`` (see also :ref:`cephfs-multimds`). + +State Diagram +------------- + +This state diagram shows the possible state transitions for the MDS/rank. The legend is as follows: + +Color +~~~~~ + +- Green: MDS is active. +- Orange: MDS is in transient state trying to become active. +- Red: MDS is indicating a state that causes the rank to be marked failed. +- Purple: MDS and rank is stopping. +- Red: MDS is indicating a state that causes the rank to be marked damaged. + +Shape +~~~~~ + +- Circle: an MDS holds this state. +- Hexagon: no MDS holds this state (it is applied to the rank). + +Lines +~~~~~ + +- A double-lined shape indicates the rank is "in". + +.. image:: mds-state-diagram.svg diff --git a/doc/cephfs/multimds.rst b/doc/cephfs/multimds.rst index c0b81e0c14f..a60ef39e5a7 100644 --- a/doc/cephfs/multimds.rst +++ b/doc/cephfs/multimds.rst @@ -1,3 +1,4 @@ +.. _cephfs-multimds: Configuring multiple active MDS daemons --------------------------------------- diff --git a/doc/cephfs/standby.rst b/doc/cephfs/standby.rst index 9bd37984b67..0aaab9b76c7 100644 --- a/doc/cephfs/standby.rst +++ b/doc/cephfs/standby.rst @@ -1,3 +1,4 @@ +.. _mds-standby: Terminology -----------