systemd: Support Graceful Reboot for AIO Node

Ceph AIO installation with single/multiple node is not friendly for
loopback mount, especially always get deadlock issue during graceful
system reboot.

We already have `rbdmap.service` with graceful system reboot friendly as
below:

    [Unit]
    After=network-online.target
    Before=remote-fs-pre.target
    Wants=network-online.target remote-fs-pre.target

    [Service]
    ExecStart=/usr/bin/rbdmap map
    ExecReload=/usr/bin/rbdmap map
    ExecStop=/usr/bin/rbdmap unmap-all

This PR introduce:

  - `ceph-mon.target`: Ensure startup after `network-online.target` and
    before `remote-fs-pre.target`
  - `ceph-*.target`: Ensure startup after `ceph-mon.target` and before
    `remote-fs-pre.target`
  - `rbdmap.service`: Once all `_netdev` get unmount by
    `remote-fs.target`, ensure unmap all RBD BEFORE any Ceph components
    under `ceph.target` get stopped during shutdown

The logic is concept proof by
<https://github.com/alvistack/ansible-role-ceph_common/tree/develop>;
also works as expected with Ceph + Kubernetes deployment by
<https://github.com/alvistack/ansible-collection-kubernetes/tree/develop>.
No more deadlock happened during graceful system reboot, both AIO
single/multiple no de with loopback mount.

Also see:

  - <https://github.com/ceph/ceph/pull/36776>
  - <https://github.com/etcd-io/etcd/pull/12259>
  - <https://github.com/cri-o/cri-o/pull/4128>
  - <https://github.com/kubernetes/release/pull/1504>

Fixes: https://tracker.ceph.com/issues/47528
Signed-off-by: Wong Hoi Sing Edison <hswong3i@gmail.com>
This commit is contained in:
Wong Hoi Sing Edison 2020-08-25 12:16:54 +08:00
parent ff16f5d67c
commit d88c834ea4
No known key found for this signature in database
GPG Key ID: 1FB1DE0629A57FD1
15 changed files with 35 additions and 16 deletions

View File

@ -2,5 +2,6 @@
Description=ceph target allowing to start/stop all ceph-fuse@.service instances at once
PartOf=ceph.target
Before=ceph.target
[Install]
WantedBy=remote-fs.target ceph.target

View File

@ -2,5 +2,6 @@
Description=ceph target allowing to start/stop all ceph-immutable-object-cache@.service instances at once
PartOf=ceph.target
Before=ceph.target
[Install]
WantedBy=multi-user.target ceph.target

View File

@ -1,6 +1,9 @@
[Unit]
Description=ceph target allowing to start/stop all ceph-mds@.service instances at once
PartOf=ceph.target
After=ceph-mon.target
Before=ceph.target
Wants=ceph.target ceph-mon.target
[Install]
WantedBy=multi-user.target ceph.target

View File

@ -1,8 +1,9 @@
[Unit]
Description=Ceph metadata server daemon
After=network-online.target local-fs.target time-sync.target
Wants=network-online.target local-fs.target time-sync.target
PartOf=ceph-mds.target
After=network-online.target local-fs.target time-sync.target
Before=remote-fs-pre.target ceph-mds.target
Wants=network-online.target local-fs.target time-sync.target remote-fs-pre.target ceph-mds.target
[Service]
Environment=CLUSTER=ceph

View File

@ -1,6 +1,9 @@
[Unit]
Description=ceph target allowing to start/stop all ceph-mgr@.service instances at once
PartOf=ceph.target
After=ceph-mon.target
Before=ceph.target
Wants=ceph.target ceph-mon.target
[Install]
WantedBy=multi-user.target ceph.target

View File

@ -1,8 +1,9 @@
[Unit]
Description=Ceph cluster manager daemon
After=network-online.target local-fs.target time-sync.target
Wants=network-online.target local-fs.target time-sync.target
PartOf=ceph-mgr.target
After=network-online.target local-fs.target time-sync.target
Before=remote-fs-pre.target ceph-mgr.target
Wants=network-online.target local-fs.target time-sync.target remote-fs-pre.target ceph-mgr.target
[Service]
Environment=CLUSTER=ceph
@ -28,11 +29,9 @@ RestartSec=10
RestrictSUIDSGID=true
StartLimitBurst=3
StartLimitInterval=30min
# We need to disable this protection as some python libraries generate
# dynamic code, like python-cffi, and require mmap calls to succeed
MemoryDenyWriteExecute=false
[Install]
WantedBy=ceph-mgr.target

View File

@ -2,5 +2,7 @@
Description=ceph target allowing to start/stop all ceph-mon@.service instances at once
PartOf=ceph.target
Before=ceph.target
Wants=ceph.target
[Install]
WantedBy=multi-user.target ceph.target

View File

@ -1,13 +1,13 @@
[Unit]
Description=Ceph cluster monitor daemon
PartOf=ceph-mon.target
# According to:
# http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
# these can be removed once ceph-mon will dynamically change network
# configuration.
After=network-online.target local-fs.target time-sync.target
Wants=network-online.target local-fs.target time-sync.target
PartOf=ceph-mon.target
Before=remote-fs-pre.target ceph-mon.target
Wants=network-online.target local-fs.target time-sync.target remote-fs-pre.target ceph-mon.target
[Service]
Environment=CLUSTER=ceph

View File

@ -1,6 +1,9 @@
[Unit]
Description=ceph target allowing to start/stop all ceph-osd@.service instances at once
PartOf=ceph.target
After=ceph-mon.target
Before=ceph.target
Wants=ceph.target ceph-mon.target
[Install]
WantedBy=multi-user.target ceph.target

View File

@ -1,8 +1,9 @@
[Unit]
Description=Ceph object storage daemon osd.%i
After=network-online.target local-fs.target time-sync.target ceph-mon.target
Wants=network-online.target local-fs.target time-sync.target
PartOf=ceph-osd.target
After=network-online.target local-fs.target time-sync.target
Before=remote-fs-pre.target ceph-osd.target
Wants=network-online.target local-fs.target time-sync.target remote-fs-pre.target ceph-osd.target
[Service]
Environment=CLUSTER=ceph

View File

@ -1,6 +1,9 @@
[Unit]
Description=ceph target allowing to start/stop all ceph-radosgw@.service instances at once
PartOf=ceph.target
After=ceph-mon.target
Before=ceph.target
Wants=ceph.target ceph-mon.target
[Install]
WantedBy=multi-user.target ceph.target

View File

@ -1,8 +1,9 @@
[Unit]
Description=Ceph rados gateway
After=network-online.target local-fs.target time-sync.target
Wants=network-online.target local-fs.target time-sync.target
PartOf=ceph-radosgw.target
After=network-online.target local-fs.target time-sync.target
Before=remote-fs-pre.target ceph-radosgw.target
Wants=network-online.target local-fs.target time-sync.target remote-fs-pre.target ceph-radosgw.target
[Service]
Environment=CLUSTER=ceph

View File

@ -2,5 +2,6 @@
Description=ceph target allowing to start/stop all ceph-rbd-mirror@.service instances at once
PartOf=ceph.target
Before=ceph.target
[Install]
WantedBy=multi-user.target ceph.target

View File

@ -1,4 +1,5 @@
[Unit]
Description=ceph target allowing to start/stop all ceph*@.service instances at once
[Install]
WantedBy=multi-user.target

View File

@ -1,9 +1,8 @@
[Unit]
Description=Map RBD devices
After=network-online.target
After=network-online.target ceph.target
Before=remote-fs-pre.target
Wants=network-online.target remote-fs-pre.target
Wants=network-online.target remote-fs-pre.target ceph.target
[Service]
EnvironmentFile=-@SYSTEMD_ENV_FILE@