mirror of https://github.com/schoebel/mars
515 lines
16 KiB
Bash
Executable File
515 lines
16 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# This file is part of MARS project: http://schoebel.github.io/mars/
|
|
#
|
|
# Copyright (C) 2015 Thomas Schoebel-Theuer
|
|
# Copyright (C) 2015 1&1 Internet AG
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
############################################################
|
|
|
|
# TST spring 2015 lab prototype for mass rollout of MARS
|
|
|
|
# Environment-specific actions are encoded into variables.
|
|
# Change them (e.g. in /etc/mars/rollout.conf) for adaptation to
|
|
# any other operating environment.
|
|
#
|
|
# A few conventions are firmly built in: resource names and LVM disk names
|
|
# must be equal. In addition, it is advisable that VM names and
|
|
# resource names should be also strongly related (but VM names
|
|
# may have suffixes like infong4711.schlund.de).
|
|
#
|
|
# Please feel free to adapt this to your needs.
|
|
|
|
set -o pipefail
|
|
|
|
orig_vars="$(set | grep '^[_A-Za-z0-9]\+=' | cut -d= -f1)"
|
|
|
|
# Defaults for configuration variables
|
|
default_config=${default_config:-/etc/mars/rollout.conf}
|
|
# The rest is hardcoded here in case the config file does not exist
|
|
dry_run=${dry_run:-0}
|
|
verbose=${verbose:-0}
|
|
confirm=${confirm:-1}
|
|
help=${help:-0}
|
|
phase="{0..8}"
|
|
use_fake_sync=${use_fake_sync:-1}
|
|
override_fake_sync=${override_fake_sync:-0}
|
|
fakeable_resources="${fakeable_resources:-}"
|
|
sshopt="${sshopt:--4 -A -T -o StrictHostKeyChecking=no -o ForwardX11=no -o KbdInteractiveAuthentication=no -o VerifyHostKeyDNS=no}"
|
|
primary="${primary:-}"
|
|
secondary="${secondary:-}"
|
|
devices="${devices:-}"
|
|
device_pattern="${device_pattern:-/dev/vg*/{infong,ovz\}*}"
|
|
device_remove_regex="${device_remove_regex:-.-md\|old\|-bak}"
|
|
lvcreate_cmd="${lvcreate_cmd:-lvcreate -I 4M -L512G -n mars}"
|
|
drbd_force_unload="${drbd_force_unload:-0}"
|
|
drbd_dstate_cmd="${drbd_dstate_cmd:-drbdadm dstate}"
|
|
drbd_dstate_pattern="${drbd_dstate_pattern:-UpToDate/UpToDate}"
|
|
drbd_get_resources="${drbd_get_resources:-configure_InfongSpace.pl --list all | awk '{ print \$1; }' | sort -u}"
|
|
drbd_down_cmd="${drbd_down_cmd:-drbdadm down all || echo IGNORING failed DRBD shutdown because the kernel module will be unloaded anyway}"
|
|
drbd_update_config_res="${drbd_update_config_res:-configure_InfongSpace.pl --update-infong \$res repltype=mars}"
|
|
drbd_update_config_global="${drbd_update_config_global:-configure_InfongSpace.pl --write-drbd-conf}"
|
|
drbd_stop_cmd="${drbd_stop_cmd:-/etc/init.d/drbd stop || { ! [[ -e /proc/drbd ]] && echo stopping DRBD by hand && rmmod drbd; \}}"
|
|
mars_start_cmd="${mars_start_cmd:-ui-config-modify -c MARS_ENABLED=true; /etc/init.d/mars start}"
|
|
vm_reinit_cmd="${vm_reinit_cmd:-/etc/init.d/clustermanager restart; sleep 20; cm3 --stop all; sleep 5; cm3 -us}"
|
|
vm_status_cmd="${vm_status_cmd:-cm3 -us}"
|
|
vm_stopped_all_cmd="${vm_stopped_all_cmd:-cm3 --list-vms --with-status | grep -v '^\$' | grep -vi stopped | grep '.'}"
|
|
vm_stop_cmd="${vm_stop_cmd:-cm3 --stop all || { sleep 10; /etc/init.d/clustermanager restart && sleep 20 && cm3 --stop all; \}}"
|
|
vm_start_cmd="${vm_start_cmd:-/etc/init.d/clustermanager restart; sleep 20; cm3 --stop all; /etc/init.d/clustermanager restart; sleep 20; cm3 --stop all; cm3 -us; cm3 --start all; sleep 10; cm3 -us; for dummy in {0..2\}; do count=0; for i in \$(cm3 --list-vms --with-status | grep -i broken | cut -d: -f1); do echo \"RESTARTING BROKEN \$i\"; (( count++ )); cm3 -us; sleep 10; cm3 --stop \$i; done; if (( count )); then sleep 10; cm3 --start all; sleep 10; fi; done}"
|
|
# END configuration variables
|
|
|
|
param_vars="$(set | grep '^[_A-Za-z0-9]\+=' | cut -d= -f1)"
|
|
|
|
function fail
|
|
{
|
|
local txt="${1:-Unkown failure}"
|
|
echo "FAILURE: $txt" >> /dev/stderr
|
|
exit -1
|
|
}
|
|
|
|
function do_confirm
|
|
{
|
|
local skip="$1"
|
|
local response
|
|
|
|
(( !confirm )) && return 0
|
|
|
|
[[ "$skip" != "" ]] && skip="S to skip, "
|
|
echo -n "[CONFIRM: Press ${skip}Return to continue, ^C to abort] "
|
|
read response
|
|
! [[ "$response" =~ ^[sS] ]]
|
|
return $?
|
|
}
|
|
|
|
function remote
|
|
{
|
|
local host="$1"
|
|
local cmd="$2"
|
|
local nofail="${3:-0}"
|
|
|
|
(( verbose > 0 )) && echo "Executing on $host: '$cmd'" >> /dev/stderr
|
|
[[ "${cmd## }" = "" ]] && return 0
|
|
if ssh $sshopt root@$host "$cmd"; then
|
|
return 0
|
|
elif (( nofail )); then
|
|
return $?
|
|
else
|
|
fail "ssh to '$host' command '$cmd' failed with status $?"
|
|
fi
|
|
}
|
|
|
|
function remote_action
|
|
{
|
|
local host="$1"
|
|
local cmd="$2"
|
|
|
|
if (( dry_run )); then
|
|
echo "DRY_RUN REMOTE $host ACTION '$cmd'"
|
|
elif (( confirm )); then
|
|
echo "REMOTE $host ACTION '$cmd'"
|
|
if do_confirm 1; then
|
|
remote "$host" "$cmd"
|
|
else
|
|
echo "SKIPPING $host ACTION '$cmd'"
|
|
fi
|
|
else
|
|
remote "$host" "$cmd"
|
|
fi
|
|
}
|
|
|
|
function _get_resource
|
|
{
|
|
local device="${1:-$(fail "Resource argument is missing")}" || exit $?
|
|
echo "$device" | sed 's:^.*/::'
|
|
}
|
|
|
|
function are_all_vms_stopped
|
|
{
|
|
local host="$1"
|
|
local ret=$(remote $host "{ $vm_stopped_all_cmd ; } 1>&2 ; echo \$?")
|
|
echo "VMs on $host are $( (( !ret )) && echo "NOT ")stopped" >> /dev/stderr
|
|
return $(( !ret ))
|
|
}
|
|
|
|
function source_when_possible
|
|
{
|
|
local file="$1"
|
|
local type="$2"
|
|
|
|
if [[ -r "$file" ]]; then
|
|
. "$file" || fail "$type file $file is not parsable"
|
|
fi
|
|
}
|
|
|
|
source_when_possible "$default_config" "config"
|
|
|
|
# Allow forceful override of any _known_ variable at the command line
|
|
for i; do
|
|
if [[ "$i" =~ ^--[-_A-Za-z0-9]+$ ]]; then
|
|
param="${i#--}"
|
|
var="${param//-/_}"
|
|
[[ "$(eval "echo \"\$$var\"")" = "" ]] && abort "Variable '$var' is unknown"
|
|
eval "$var=1"
|
|
elif [[ "$i" =~ ^--[-_A-Za-z0-9]+= ]]; then
|
|
param="${i#--}"
|
|
var="${param%%=*}"
|
|
var="${var//-/_}"
|
|
val="${param#*=}"
|
|
[[ "$(eval "echo \"\$$var\"")" = "" ]] && abort "Variable '$var' is unknown"
|
|
eval "$var=$val"
|
|
elif [[ "$i" =~ ^-h$ ]]; then
|
|
help=1
|
|
elif [[ "$i" =~ ^-v$ ]]; then
|
|
(( verbose++ ))
|
|
elif [[ "$primary" = "" ]]; then
|
|
primary="$i"
|
|
elif [[ "$secondary" = "" ]]; then
|
|
secondary="$i"
|
|
else
|
|
abort "bad parameter syntax '$i'"
|
|
fi
|
|
done
|
|
|
|
function do_help
|
|
{
|
|
cat <<EOF
|
|
usage: $0 [options] <primaryhost> <secondaryhost>
|
|
|
|
The following parameter variables can be either passed by the
|
|
environment, or used for hard overriding on the command line
|
|
via --variable=value syntax:
|
|
|
|
$(
|
|
declare -A orig
|
|
for i in $orig_vars; do
|
|
orig[$i]=1
|
|
done
|
|
for i in $param_vars; do
|
|
[[ "$i" =~ _vars$ ]] && continue
|
|
if (( !orig[$i] )); then
|
|
if [[ "$(eval "echo \${$i}")" =~ ^[0-9]+$ ]]; then
|
|
echo "$i=$(eval "echo \${$i}")"
|
|
else
|
|
echo "$i=\"$(eval "echo \${$i}")\""
|
|
fi
|
|
fi
|
|
done
|
|
)
|
|
EOF
|
|
}
|
|
|
|
if (( help )); then
|
|
do_help
|
|
exit 0
|
|
fi
|
|
|
|
if [[ "$primary" = "" ]]; then
|
|
do_help
|
|
fail "No primary hostname given"
|
|
fi
|
|
if [[ "$secondary" = "" ]]; then
|
|
do_help
|
|
fail "No secondary hostname given"
|
|
fi
|
|
[[ "$primary" = "$secondary" ]] && fail "Primary and secondary hostnames must be distinct"
|
|
|
|
function do_phase
|
|
{
|
|
local phase="$1"
|
|
local host
|
|
|
|
echo ""
|
|
echo "------- Phase $phase"
|
|
echo ""
|
|
|
|
case "$phase" in
|
|
0)
|
|
echo "Create the /mars filesystem when necessary, ensure that it is mounted"
|
|
for host in $primary $secondary; do
|
|
if (( $(remote $host "ls /dev/*/mars 1>&2; echo \$?") )); then
|
|
local line="$(remote $host "vgdisplay -c | sort -n -t: -k16 -r | head -1")" || fail "Cannot determine VG"
|
|
local vg_name="$(echo "$line" | cut -d: -f1)"
|
|
[[ "${vg_name// /}" = "" ]] && fail "Invalid VG name '$vg_name'"
|
|
local pv_count="$(echo "$line" | cut -d: -f10)"
|
|
(( pv_count < 1 )) && fail "Invalid PV count '$pv_count'"
|
|
echo "Host $host VG '$vg_name' (has $pv_count physical volumes)"
|
|
remote_action $host "$lvcreate_cmd -i $pv_count $vg_name"
|
|
sleep 2
|
|
if (( $(remote $host "ls /dev/*/mars 1>&2; echo \$?") )); then
|
|
fail "No LV for /mars exists on $host"
|
|
fi
|
|
fi
|
|
if (( $(remote $host "grep -q /mars /proc/mounts; echo \$?") )); then
|
|
remote_action $host "[[ -d /mars ]] || mkdir /mars; mount /mars || { mkfs.ext4 -L mars /dev/*/mars && mount /dev/*/mars /mars; }"
|
|
if (( $(remote $host "grep -q /mars /proc/mounts; echo \$?") )); then
|
|
fail "No /mars is mounted on $host"
|
|
fi
|
|
fi
|
|
done
|
|
;;
|
|
|
|
1)
|
|
echo "Create/join the MARS cluster when necessary"
|
|
if (( $(remote $primary "ls -l /mars/uuid 1>&2; echo \$?") )); then
|
|
echo "Host $primary create-cluster"
|
|
remote_action $primary "marsadm create-cluster"
|
|
fi
|
|
if (( $(remote $secondary "ls -l /mars/uuid 1>&2; echo \$?") )); then
|
|
echo "Host $secondary join-cluster"
|
|
remote_action $secondary "marsadm join-cluster $primary"
|
|
fi
|
|
;;
|
|
|
|
2)
|
|
echo "Stop VMs when necessary"
|
|
for host in $primary $secondary; do
|
|
if are_all_vms_stopped $host; then
|
|
echo "No VMs are running on host $host."
|
|
else
|
|
echo "Some VMs are running on host $host"
|
|
(( !downtime_start )) && downtime_start=$(date +%s)
|
|
remote_action $host "$vm_stop_cmd"
|
|
downtime_end=$(date +%s)
|
|
echo "ESTIMATED operation duration: $(( downtime_end - downtime_start )) seconds"
|
|
if ! are_all_vms_stopped $host; then
|
|
fail "Some VMs are running on host $host"
|
|
fi
|
|
fi
|
|
done
|
|
if (( downtime_start )); then
|
|
echo "ESTIMATED total shutdown operation duration: $(( downtime_end - downtime_start )) seconds"
|
|
fi
|
|
;;
|
|
|
|
3)
|
|
echo "Stop DRBD when necessary"
|
|
if (( drbd_force_unload || !$(remote $primary "[[ -e /proc/drbd ]]; echo \$?") )); then
|
|
local drbd_res="$(remote $primary "$drbd_get_resources")" || fail "Cannot get DRBD resources on $primary"
|
|
echo "DRBD resources on host $primary: $(echo $drbd_res)"
|
|
local cmd="for i in $(echo $drbd_res); do echo -n \"\$i \"; $drbd_dstate_cmd \$i; done"
|
|
echo "DRBD dstate on host $primary:"
|
|
local tmpfile=/tmp/dstate.$primary.$$
|
|
remote $primary "$cmd" | tee $tmpfile
|
|
if grep -qv "$drbd_dstate_pattern" < $tmpfile; then
|
|
echo "DRBD on $primary is NOT in sync"
|
|
else
|
|
echo "DRBD on $primary is in sync"
|
|
fi
|
|
if (( use_fake_sync )); then
|
|
echo "The following resources are fakeable:"
|
|
while read res txt; do
|
|
echo "$res $txt"
|
|
fakeable_resources+=" $res"
|
|
done <<EOF
|
|
$(grep "$drbd_dstate_pattern" < $tmpfile)
|
|
EOF
|
|
echo "List of fakeable DRBD resources: $fakeable_resources"
|
|
fi
|
|
rm -f $tmpfile
|
|
for host in $primary $secondary; do
|
|
echo "Creating DRBD backup on $host"
|
|
remote_action $host "tar czvf /var/backups/drbd-config-$(date +%Y%m%d-%H%M).tgz /etc/drbd* || true"
|
|
echo "Shutdown DRBD on $host"
|
|
remote_action $host "$drbd_down_cmd"
|
|
local res
|
|
local cmd=""
|
|
for res in $drbd_res; do
|
|
cmd+="${drbd_update_config_res/\$res/$res} ; "
|
|
done
|
|
cmd+="$drbd_update_config_global ; $drbd_stop_cmd"
|
|
remote_action $host "$cmd"
|
|
done
|
|
else
|
|
echo "DRBD is NOT in use, switching off fake-sync"
|
|
use_fake_sync=0
|
|
fi
|
|
;;
|
|
|
|
4)
|
|
echo "Start MARS when necessary"
|
|
for host in $primary $secondary; do
|
|
if (( $(remote $host "[[ -d /proc/sys/mars ]]; echo \$?") )); then
|
|
remote_action $host "$mars_start_cmd"
|
|
sleep 3 &
|
|
else
|
|
echo "MARS is already running on $host"
|
|
fi
|
|
done
|
|
wait
|
|
|
|
for host in $primary $secondary; do
|
|
local device
|
|
local cmd=""
|
|
for device in $(eval "echo \${devices_${host//-/_}}"); do
|
|
local res="$(_get_resource $device)"
|
|
[[ "$res" = "" ]] && fail "Implausible resource name '$res'"
|
|
local this_size=${sizes[$res]}
|
|
(( this_size < 4096 )) && fail "Implausible device size '$this_size'"
|
|
if (( $(remote $host "[[ -e /mars/resource-$res/data-$host ]]; echo \$?") )); then
|
|
echo "RESOURCE $res on $host: device $device size $this_size"
|
|
if [[ "$host" = "$primary" ]]; then
|
|
cmd+="marsadm create-resource $res $device $res $this_size && "
|
|
else
|
|
[[ "$cmd" = "" ]] && cmd="marsadm wait-cluster ; "
|
|
cmd+="marsadm join-resource $res $device && "
|
|
fi
|
|
else
|
|
echo "RESOURCE $res already exists on $host"
|
|
fi
|
|
done
|
|
if [[ "$cmd" != "" ]]; then
|
|
remote_action $host "$cmd true"
|
|
fi
|
|
done
|
|
if (( use_fake_sync )) && [[ "$fakeable_resources" != "" ]]; then
|
|
echo "Starting FAKE-SYNC on resources $fakeable_resources"
|
|
remote_action $secondary "for i in $fakeable_resources; do marsadm fake-sync \$i; done"
|
|
elif (( override_fake_sync )); then
|
|
echo "OVERRIDING FAKE-SYNC on ALL resources"
|
|
remote_action $secondary "marsadm fake-sync all"
|
|
else
|
|
echo "no fake-sync is executed"
|
|
fi
|
|
;;
|
|
|
|
5)
|
|
echo "Show status of MARS"
|
|
for host in $primary $secondary; do
|
|
echo ""
|
|
echo "MARS Status on $host:"
|
|
remote $host "marsadm view all"
|
|
done
|
|
;;
|
|
|
|
6)
|
|
echo "Reinit VM clustermanager"
|
|
for host in $primary $secondary; do
|
|
echo "------ Reinit $host:"
|
|
remote_action $host "$vm_reinit_cmd"
|
|
done
|
|
;;
|
|
|
|
7)
|
|
echo "Start VMs when necessary"
|
|
if are_all_vms_stopped $primary; then
|
|
uptime_start=$(date +%s)
|
|
remote_action $primary "$vm_start_cmd"
|
|
final=$(date +%s)
|
|
echo "ESTIMATED startup duration: $(( final - uptime_start )) seconds"
|
|
if (( downtime_start )); then
|
|
echo "ESTIMATED total VM downtime: $(( final - downtime_start )) seconds"
|
|
fi
|
|
echo ""
|
|
else
|
|
echo "Some VMs are running on host $primary. Please check by hand whether some of them need a restart."
|
|
fi
|
|
;;
|
|
|
|
8)
|
|
echo "Show status of VMs"
|
|
for host in $primary $secondary; do
|
|
echo "------ Status on $host:"
|
|
remote $host "$vm_status_cmd"
|
|
done
|
|
;;
|
|
|
|
*)
|
|
echo "Unknown / unimplemented phase '$phase'"
|
|
;;
|
|
esac
|
|
}
|
|
|
|
function main
|
|
{
|
|
echo "Script $0 running phase $phase"
|
|
echo ""
|
|
echo "Params: $0 $*"
|
|
echo ""
|
|
echo "primary: '$primary'"
|
|
echo "secondary: '$secondary'"
|
|
echo ""
|
|
|
|
script_start=$(date +%s)
|
|
|
|
for host in $primary $secondary; do
|
|
ping -c 1 $host || fail "Host '$primary' is not reachable"
|
|
remote $host uptime || fail "ssh connection to '$host' does not work. Ensure that ssh-agent is running."
|
|
done
|
|
echo ""
|
|
|
|
# when necessary, determine list of devices
|
|
|
|
if [[ "$devices" = "" ]]; then
|
|
for host in $primary $secondary; do
|
|
eval "devices_${host//-/_}=\"$(remote $host "ls $device_pattern" 2>/dev/null | grep -v "$device_remove_regex")\"" || fail "cannot determine devices on $host"
|
|
eval "echo devices_${host//-/_}: \${devices_${host//-/_}}"
|
|
done
|
|
else
|
|
for host in $primary $secondary; do
|
|
eval "devices_${host//-/_}=\"$devices\""
|
|
done
|
|
echo "Using given devices '$devices' for both hosts $primary $secondary"
|
|
fi
|
|
|
|
for host in $primary $secondary; do
|
|
[[ "$(eval "echo \${devices_${host//-/_}}")" = "" ]] && fail "No devices have been determined on $host"
|
|
eval "resources_${host//-/_}=\"\$(for i in \${devices_${host//-/_}}; do _get_resource "\$i"; done | sort)\""
|
|
eval "echo resources_${host//-/_}: \${resources_${host//-/_}}"
|
|
[[ "$(eval "echo \${resources_${host//-/_}}")" = "" ]] && fail "No resources have been determined on $host"
|
|
done
|
|
if [[ "$(eval "echo \${resources_${primary//-/_}}")" != "$(eval "echo \${resources_${secondary//-/_}}")" ]]; then
|
|
fail "Primary resource list is different from secondary resource list"
|
|
fi
|
|
|
|
declare -A sizes
|
|
|
|
for host in $primary $secondary; do
|
|
echo "Host $host:"
|
|
while read device sector_size; do
|
|
this_size=$(( sector_size * 512 ))
|
|
echo " device $device: size $this_size"
|
|
this_resource="$(_get_resource $device)"
|
|
if (( !sizes[$this_resource] || this_size < sizes[$this_resource] )); then
|
|
sizes[$this_resource]=$this_size
|
|
fi
|
|
done <<EOF
|
|
$(remote $host "/sbin/lvdisplay -c $(eval "echo \${devices_${host//-/_}}") | cut -d: -f1,7" | sed 's/:/ /')
|
|
EOF
|
|
done
|
|
|
|
echo ""
|
|
echo "Determined the following sizes:"
|
|
for res in ${!sizes[*]}; do
|
|
echo " $res: ${sizes[$res]}"
|
|
done
|
|
echo ""
|
|
|
|
do_confirm
|
|
|
|
for this_phase in $(eval "echo $phase"); do
|
|
do_phase $this_phase
|
|
done
|
|
|
|
script_end=$(date +%s)
|
|
echo "ESTIMATED script duration: $(( script_end - script_start )) seconds"
|
|
}
|
|
|
|
downtime_start=0
|
|
uptime_start=0
|
|
|
|
main 2>&1 | tee rollout-$(date +%Y%m%d-%H%M).$primary.$secondary.log
|