systemd: specific units for icpu

This commit is contained in:
Thomas Schoebel-Theuer 2020-12-07 13:39:56 +01:00 committed by Thomas Schoebel-Theuer
parent aaba8f5063
commit e28f13f21a
9 changed files with 299 additions and 0 deletions

View File

@ -0,0 +1,24 @@
[Unit]
Description=MARS global single-threaded daemon-reloead
Documentation=https://github.com/schoebel/mars/docu/mars-manual.pdf
## The trick is to "misuse" this service (which exists only once)
## for inhibiting startup of "systemctl daemon-real" in parallel to
## itself.
## In addition, Before= and After= depencencies may be used for
## runtime exclusion of parallelism from/with other services.
## This appears to necessary in some places, where races have
## between daemon-reload and startup of other service have been observed.
Before=mars-trigger.service
[Service]
# Important for blocking parallelism with itself
Type=oneshot
RemainAfterExit=no
# Important for avoiding failures: do not send any signals
KillMode=none
ExecStart=/usr/bin/systemctl daemon-reload
[Install]
WantedBy=mars.service

View File

@ -0,0 +1,17 @@
[Unit]
Description=MARS per-resource watcher for /mars/resource-@{res}/systemd-trigger
Documentation=https://github.com/schoebel/mars/docu/mars-manual.pdf
Requires=mars-trigger.path
After=mars-trigger.path
## Markers for the template processor
# ALWAYS_START
[Path]
PathExists=/mars/resource-@{res}/systemd-trigger
PathChanged=/mars/resource-@{res}/systemd-trigger
PathModified=/mars/resource-@{res}/systemd-trigger
Unit=mars-@escvar{res}-trigger.service
[Install]
WantedBy=mars.service

View File

@ -0,0 +1,36 @@
[Unit]
Description=MARS per-resource trigger from /mars/resource-@{res}/systemd-trigger
Documentation=https://github.com/schoebel/mars/docu/mars-manual.pdf
## Prohibit parallelism with any daemon-reload
After=daemon-reload.service
## The per-resource trigger is the _only_ one which is allowed
## to remotely start / stop any units dealing with resources.
## Otherwise, races or conflicting start / stop operations might
## occur, leading to deadlocks / failures / etc.
##
## General rule: each *.path watcher can only start its corresponding
## *-trigger.service at most _once_ in parallel (provided that the actions
## caused by "marsadm systemd-trigger-extern" are not forking into background).
## So there is never any parallelism of _any_ unit with exactly _itself_
## (although _different_ units are allowed to run in parallel to each other,
## of course).
## Do not start both the global and any of the per-resource triggers
## in parallel.
## It could lead to various races between global and per-resource units.
## In contrast, different per-resource triggers are not mutually exclusive.
After=mars-trigger.service
[Service]
# Important for blocking parallelism with itself
Type=oneshot
RemainAfterExit=no
# Important for avoiding failures: do not send any signals
KillMode=none
# Important: the resource argument will keep remote triggers disjoint
ExecStart=/usr/bin/marsadm systemd-trigger-extern @{res}
[Install]
WantedBy=mars.service

View File

@ -0,0 +1,20 @@
[Unit]
Description=MARS global watcher /mars/userspace/systemd-trigger
Documentation=https://github.com/schoebel/mars/docu/mars-user-manual.pdf
## Global path watcher for calling mars-trigger.service
## Markers for the template processor
# ALWAYS_START
BindsTo=mars.service
After=mars.service
[Path]
PathExists=/mars/userspace/systemd-trigger
PathChanged=/mars/userspace/systemd-trigger
PathModified=/mars/userspace/systemd-trigger
Unit=mars-trigger.service
[Install]
WantedBy=mars.service

View File

@ -0,0 +1,26 @@
[Unit]
Description=MARS global trigger from /mars/userspace/systemd-trigger
Documentation=https://github.com/schoebel/mars/docu/mars-manual.pdf
## Prohibit parallelism with any daemon-reload
After=daemon-reload.service
## The global trigger calls "marsadm systemd-trigger-extern" _without_
## resource argument.
## The global variant will only generate any new templates, but will not
## activate/deactivate them (except when they are new).
## This is necessary for race avoidance with per-resource triggers.
## Only the per-resource triggers (see mars-@{res}-trigger.service)
## are allowed to actually start/stop any units dealing with resources.
[Service]
# Important for blocking parallelism with itself
Type=oneshot
RemainAfterExit=no
# Important for avoiding failures: do not send any signals
KillMode=none
# Important: no resource argument must be given here
ExecStart=/usr/bin/marsadm systemd-trigger-extern
[Install]
WantedBy=mars.service

13
systemd-icpu/mars.path Normal file
View File

@ -0,0 +1,13 @@
[Unit]
Description=MARS block layer HA (activation by appearance of /mars/uuid)
Documentation=https://github.com/schoebel/mars/docu/mars-user-manual.pdf
## Markers for the template processor
# ALWAYS_START
[Path]
PathExists=/mars/uuid
Unit=mars.service
[Install]
WantedBy=mars.service

26
systemd-icpu/mars.service Normal file
View File

@ -0,0 +1,26 @@
[Unit]
Description=MARS block layer HA (kernel module)
Documentation=https://github.com/schoebel/mars/docu/mars-user-manual.pdf
## Markers for the template processor
# ALWAYS_START
BindsTo=mars.mount
After=mars.mount
RequiresMountsFor=/mars/uuid
Wants=network-online.target
StopWhenUnneeded=false
IgnoreOnIsolate=true
[Service]
Type=oneshot
# Important for avoiding failures: do not send any signals
KillMode=none
RemainAfterExit=yes
ExecStart=/sbin/modprobe mars
ExecStop=/sbin/rmmod mars
[Install]
WantedBy=mars.mount

23
systemd-icpu/modprobe-mars.sh Executable file
View File

@ -0,0 +1,23 @@
#!/bin/bash
# do no longer start any systemd-controlled resources via nodeagent init
for res in $(
marsadm get-systemd-unit all |\
grep "^nodeagent" |\
awk '{ print $1; }' |\
sed 's/nodeagent-//' |\
cut -d. -f1 |\
sed 's/\\x2d/-/g'
); do
# workaround "nodeagent status --target"
nodeagent stop $res || echo ignore the error
done
modprobe mars
rc=$?
sleep 3
marsadm systemd-trigger
exit $rc

View File

@ -0,0 +1,114 @@
#!/bin/bash
# TST Nov 2020
# Tried to make any operations as idempotent as possible.
#
# At the moment, idempotent startup fails with a message like
# "Failed to start transient scope unit: Unit infongws-test06.scope already exists."
# Theoretically, I could treat the existence and/or the runtime status of said intermediate scope unit
# as an indicator for directly reporting success.
# However, I am unsure what could be the consequences when the main LXC container fails but the
# scope unit persists for whatever reason. Possibly, it could remain in a state where it could be
# neither startable nor stoppable.
# Leaving this kind of behaviour untouched for now.
# Currently, it doesn't seem to produce harm, but rather looks like a false-positive error message.
# In case somebody would be confused by this error message, some other parts of nodeagent
# (which is outside of my scope) should be fixed, in preference to trying to workaround it here.
# Markers for the template processor
# DEFAULT_START
# DEFAULT_STOP
# KEEP_RUNNING
# ALSO=mars-@escvar{res}-trigger.path
# ALSO=mars-@escvar{res}-trigger.service
op="$1"
# Ensure that pure trigger does not change anything.
systemd_want_path="/mars/resource-@{res}/systemd-want"
systemd_copy_path="/mars/resource-@{res}/userspace/systemd-want"
systemd_want="$(stat --printf="%Y\n" $systemd_want_path)"
systemd_copy="$(stat --printf="%Y\n" $systemd_copy_path)"
echo "Old timestamp: '$systemd_copy'"
echo "New timestamp: '$systemd_want'"
if (( systemd_copy && systemd_want == systemd_copy )); then
echo "Unmodified timestamp $systemd_want_path $systemd_want"
exit 0
fi
function report_success
{
rsync -av $systemd_want_path $systemd_copy_path
}
mars_dev=/dev/mars/@{res}
case "$op" in
status)
/usr/sbin/nodeagent status @{res}
;;
start)
# Assumption: start and vmstart seem to be idempotent already
if ! [[ -b $mars_dev ]]; then
echo "ignoring, $mars_dev is not present"
exit 0
fi
if mountpoint /vol/@{res}; then
/usr/sbin/nodeagent vmstart @{res}
else
/usr/sbin/nodeagent start @{res}
fi
rc=$?
if (( !rc )); then
report_success
fi
exit $rc
;;
stop)
# The following does not work: if ! mountpoint /vol/@{res}; then ...; fi
# In the presence of bind mounts, the main mountpoint may vanish,
# but sub-bindmounts may persist, leaving /dev/mars/@{res} opened,
# and consequently hindering a planned handover.
# This is a Linux kernel feature, not a bug.
# Thus we need to mistrust the main mountpoint /vol/@{res} .
# For safety, check the presence of /dev/mars/@{res} instead.
if ! [[ -b "/dev/mars/@{res}" ]]; then
# Idempotence
exit 0
fi
if ! [[ -b $mars_dev ]]; then
echo "ignoring, $mars_dev is not present"
exit 0
fi
# Additionally, check whether the mars device is opened.
if (( !$(marsadm view-device-opened @{res}) )); then
# Idempotence
exit 0
fi
/usr/sbin/nodeagent stop @{res}
rc=$?
# for safety
if (( !rc )); then
sleep 1
if mountpoint /vol/@{res}; then
umount /vol/@{res}
fi
report_success
fi
exit $rc
;;
*)
# Ignore all other ops, like enable / disable / etc
echo "Ignore '$op'"
exit 0
esac
exit 0