#!/bin/bash
#
# This file is part of MARS project: http://schoebel.github.io/mars/
#
# Copyright (C) 2015 Thomas Schoebel-Theuer
# Copyright (C) 2015 1&1 Internet AG
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

############################################################

# TST autumn 2015 lab prototype
# for mass switchover and other generic mass commands

# Environment-specific actions are encoded into variables.
# Change them (e.g. in /etc/mass-actions/mass-actions.conf) for
# adaptation to any other operating environment.
#
# In addition, you will need an association file host-assoc.txt
# containing 2 fields separated by colon: first the hostname, second
# an arbitrary key value with an arbitrary meaning. It is wise to
# use locations, room numbers, rack numbers, etc for that field.
# What exactly is up to you. Multiple keys may be assigned to the same
# host.
#
# Please feel free to adapt this to your needs.

set -o pipefail
shopt -s nullglob
export LC_ALL=C
export start_stamp="$(date "+%F_%T" | sed 's/:/./g')"

declare -A doc
orig_vars="$(set | grep '^[_A-Za-z0-9]\+=' | cut -d= -f1)"

# START defaults for configuration variables

default_config="${default_config:-./mass-actions.conf}"
doc[default_config]="Default config file. Here you can override variables or add additional commands to the array cmd_table[]."

additional_configs="${additional_configs:-/etc/mass-actions/*.conf $HOME/.mass-actions/*.conf}"
doc[additional_configs]="Blank-separated list of wildcarded additional config files. Here you can override variables or add additional commands to the array cmd_table[]."

status_dir="${status_dir:-./status-dir}"
doc[status_dir]="Output directory where progress logfiles of remotely issued ssh commands are created. You may grep in it."

# The rest is hardcoded here in case the config file does not exist

dry_run=${dry_run:-0}
doc[dry_run]="When enabled, remote ssh actions are only displayed instead of really executed."

verbose=${verbose:-0}
doc[verbose]="Increase speakyness."

confirm=${confirm:-0}
doc[confirm]="Each remote ssh command must be individually confirmed before it is actually executed. As a side effect, commands are running sequentially instead of parallel."

do_wait=${do_wait:-1}
allow_unknown_hosts=${allow_unknown_hosts:-0}
help=${help:-0}
status=${status:-0}
clean=${clean:-0}

sshopt="${sshopt:--4 -A -T -o StrictHostKeyChecking=no -o ForwardX11=no -o KbdInteractiveAuthentication=no -o VerifyHostKeyDNS=no -o ConnectTimeout=60 -o TCPKeepAlive=yes}"
max_jobs_parallelism="${max_jobs_parallelism:-3000}"

host_spec="${host_spec:-}"
action="${action:-}"
cmd="${cmd:-}"
prefix_cmd="${prefix_cmd:-set -o pipefail; shopt -s nullglob; }"

host_list="${host_list:-}"
host_filter="${host_filter:-}"

skip=${skip:-0}
max=${max:-0}

assoc_file="${assoc_file:-host-assoc.txt}"
assoc_dirs="${assoc_dirs:-. $HOME/.mass-actions /var/cache/mass-actions /etc/mass-actions}"

txt_ok="${txt_ok:-CMD OK}"
txt_fail="${txt_fail:-CMD FAIL \$?}"

# Command table for defining shorthand actions.
# Replace or extend this for your needs.
#
# Hint: use /etc/mass-actions/mass-actions.conf (or put it at another place)
# for overriding these example commands.
#
# All available action keywords can displayed by "$0 --help".
# Variables starting with tmp_ are suppressed in the display and may
# be used for internal structuring / better readability of complex commands.

declare -A cmd_table

cmd_table[test]="uptime"
cmd_table[mars_status]="if [[ -d /mars ]]; then marsadm view-replstate all; else echo 'NO_MARS_HOST'; fi"
cmd_table[drbd_status]="if [[ -r /proc/drbd ]]; then cat /proc/drbd; else echo 'NO_DRBD_HOST'; fi"
cmd_table[cm3_status]="cm3 -us || cm3 -s"
cmd_table[kernel_status]="uptime; ${cmd_table[cm3_status]}; ${cmd_table[mars_status]}; ${cmd_table[drbd_status]}; available=\"\$(ls -t /boot/vmlinuz-* | head -1 | cut -d- -f2-)\"; echo AVAILABLE_KERNEL=\$available; running=\"\$(cat /proc/version | awk '{print \$3; }')\"; echo RUNNING_KERNEL=\$running; if [[ \"\$running\" = \"\$available\" ]]; then echo KERNEL_IS_RECENT; elif [[ -r /proc/drbd ]] && grep ':Primary/' < /proc/drbd; then echo CANNOT_REBOOT_DRBD_PRIMARY; elif [[ -d /mars ]] && marsadm view-is-primary all | grep '^1\$'; then echo CANNOT_REBOOT_MARS_PRIMARY; elif [[ -x /usr/lib/1und1/scripts/is_node_in_mode_active.sh ]] && /usr/lib/1und1/scripts/is_node_in_mode_active.sh; then echo CANNOT_REBOOT_NODE_ACTIVE; else echo NEEDS_REBOOT; fi"
cmd_table[cm3_switchable_status]="if [[ -d /etc/ovz ]]; then cm3_switchable=1; else cm3_switchable=0; for dummy in {0..3}; do cm3 -us; slots_needed=\"\$(cm3 -s | grep \" \(remote\\|stopped\|broken\) \" | wc -l)\"; slots_available=\"\$(cm3 -s | grep idle | wc -l)\"; if (( slots_needed <= slots_available )); then cm3_switchable=1; break; fi; sleep 7; echo CM3_REPEAT; done; if (( cm3_switchable )); then echo CM3_SWITCHOVER_POSSIBLE; else echo CM3_SWITCHOVER_NOT_POSSIBLE; fi; fi"
cmd_table[mars_module_status]="uptime; mars_available=\"\$(modinfo mars | grep '^version' | awk '{ print \$2; }')\"; echo \"AVAILABLE_MARS=\$mars_available\"; mars_running=\"\$(cat /sys/module/mars/version | awk '{ print \$1; }')\"; echo \"RUNNING_MARS=\$mars_running\"; if [[ \"\$mars_running\" = \"\" ]]; then echo echo 'NO_MARS_HOST'; elif [[ \"\$mars_running\" = \"\$mars_available\" ]]; then echo MARS_IS_RECENT; elif marsadm view-is-primary all | grep '^1\$'; then echo MARS_CANNOT_RELOAD; else echo MARS_NEEDS_RELOAD; fi"
cmd_table[bgp_status]="if mountpoint /kunden/homepages/; then if ping -c 1 -w 10 8.8.8.8; then echo BGP_OK; else echo BGP_FAIL; fi; else echo BGP_UNUSED; fi"

cmd_table[detect_double]="if [[ -r /proc/drbd ]]; then cat /proc/drbd; if grep ' ds:' < /proc/drbd && mountpoint /mars && [[ -h /mars/uuid ]]; then marsadm view all; echo DOUBLE; else echo 'NO_MARS_HOST'; fi; else echo 'NO_DRBD_HOST'; fi"

cmd_table[kernel_reboot_when_necessary]="if { ${cmd_table[kernel_status]}; } | tee -a /dev/stderr | grep -q '^NEEDS_REBOOT$'; then if [[ -r /etc/lilo.conf ]] && grep rtrfix < /etc/lilo.conf; then lilo && sleep 3 && lilo -R rtrfix && sleep 3 && sync && echo coldreboot && coldreboot; else echo reboot; reboot; fi; fi"

cmd_table[mars_reload_when_necessary]="if { ${cmd_table[mars_module_status]}; } | tee -a /dev/stderr | grep -q '^MARS_NEEDS_RELOAD$'; then rmmod mars; modprobe mars; fi"

cmd_table[mars_switchover]="if [[ -d /mars ]]; then marsadm up all; marsadm primary all; fi; ${cmd_table[mars_status]}"
cmd_table[mars_failover]="if [[ -d /mars ]]; then marsadm pause-fetch all; marsadm attach all; marsadm primary --force all; fi; ${cmd_table[mars_status]}"

cmd_table[drbd_switchover]="if [[ -r /proc/drbd ]]; then drbdadm up all; drbdadm primary all; fi; ${cmd_table[drbd_status]}"
cmd_table[drbd_failover]="if [[ -r /proc/drbd ]]; then drbdadm disconnect all; drbdadm primary --force all; fi; ${cmd_table[drbd_status]}"

tmp_cm3_options="--timeout=3600 --vmhandler-timeout=3600"

tmp_mars_detect_others="export resources=\"\$(marsadm view-my-resources)\"; other_hosts=\"\"; for res in \$resources; do primary=\"\$(marsadm view-get-primary \$res)\"; if [[ \"\$primary\" != \"\$(hostname)\" ]] && ! [[ \"\$other_hosts\" =~ \$primary ]]; then other_hosts+=\" \$primary\"; fi; done"
tmp_mars_check_switchable="if ! [[ -d /proc/sys/mars ]]; then echo 'CANNOT_START_MARS_SWITCHOVER: kernel module not loaded'; exit -1; fi; if marsadm view-is-attach all | grep -q \"^0\$\"; then echo 'CANNOT_START_MARS_SWITCHOVER: some resource not attached'; exit -1; fi; if marsadm view-is-alive all | grep -v \"^---\" | grep -v \"^1\$\"; then echo 'CANNOT_START_MARS_SWITCHOVER: network is not alive'; exit -1; fi; if marsadm view-sync-rest all | grep -v \"^---\" | grep -v \"^0\$\"; then echo 'CANNOT_START_MARS_SWITCHOVER: some resource not synced'; exit -1; fi; if marsadm view-is-split-brain all | grep -v \"^---\" | grep -v \"^0\$\"; then echo 'CANNOT_START_MARS_SWITCHOVER: some resource is in split brain'; exit -1; fi; if marsadm view-is-consistent all | grep -v \"^---\" | grep -v \"^1\$\"; then echo 'CANNOT_START_MARS_SWITCHOVER: some resource is inconsistent'; exit -1; fi"
tmp_drbd_detect_others="export resources=\"\$(if [[ -d /etc/ovz/drbd.conf.d/ ]]; then  (cd /etc/ovz/drbd.conf.d/ && echo \$(ls ovz*.cfg ovz*.cfg.old | cut -d. -f1 | sort -u) ); else echo \$(cm3 --list-vms | cut -d. -f1); fi)\"; if grep -q ':Secondary/' < /proc/drbd; then other_hosts=\"\$(hostname | tr ab ba)\"; fi"
tmp_drbd_check_switchable="if grep \" cs:\" < /proc/drbd | grep -v \"cs:Connected .* ds:UpToDate/UpToDate\"; then echo 'CANNOT_START_DRBD_SWITCHOVER'; exit -1; fi"
tmp_cm3_stop_other="ssh $sshopt root@\$host \"$prefix_cmd cm3 $tmp_cm3_options --stop all; sleep 20; count=0; for i in \\\$(cm3 --list-vms --with-status | grep -i broken | cut -d: -f1 | cut -d. -f1); do echo \"RESTOPPING BROKEN \\\$i\"; (( count++ )); sleep 20; cm3 -us; sleep 10; cm3 $tmp_cm3_options --stop \\\$i; done\""
tmp_rebuild_ovz_tmp="for dev in /dev/vg*/ovz[0-9]*tmp; do if grep \"\$(echo \$dev | sed 's:^.*/::')\" < /proc/mounts; then echo \"Cannot rebuild \$dev\"; else echo \"Rebuild \$dev\"; if mkfs.xfs -f \$dev; then mount \$dev /mnt; chmod a+rwxt /mnt; umount /mnt; fi; fi; done"
#tmp_cm3_restart_local="for dummy in {0..2\}; do count=0; for i in \$(cm3 --list-vms --with-status | grep -i \"broken\|stopped\" | cut -d: -f1 | cut -d. -f1); do echo \"RESTARTING BROKEN \$i\"; (( count++ )); cm3 -us; sleep 10; cm3 $tmp_cm3_options --stop \$i; done; if (( count )); then sleep 10; cm3 $tmp_cm3_options --start all; sleep 10; fi; done"
tmp_cm3_restart_local="echo skip restart"
tmp_cm3_start_local="$tmp_rebuild_ovz_tmp; cm3 $tmp_cm3_options --start all; sleep 10; cm3 -us; $tmp_cm3_restart_local"
tmp_cm3_status_local="${cmd_table[mars_status]}; ${cmd_table[drbd_status]}; cm3 -us; cm3 -s | grep -q 'broken\|stopped' && exit -1"

tmp_mars_restart_cmd="drbdadm down all; /etc/init.d/drbd stop; sleep 3; /etc/init.d/drbd stop; sleep 3; rmmod drbd; sleep 1; modprobe mars"
# Problem: ssh evaluates its arguments once more. Solution: for symmetry reasons, use eval at the local side to get the same number of evaluations. Use enough backslashes to distinguish between the different numbers of evaluation levels.
tmp_mars_update_configs_resources_cmd="if which configure_InfongSpace.pl; then configure_InfongSpace.pl --update-infong \\\$res repltype=mars; elif which ui-config-modify; then ui-config-modify -c MARS_ENABLED=true; fi"
tmp_mars_make_resources_primary="echo RESOURCES \$resources; for res in \$resources; do echo marsadm create-resource \$res /dev/*/\$res; marsadm create-resource \\\$res /dev/*/\\\$res || exit -1; $tmp_mars_update_configs_resources_cmd; done"
tmp_mars_make_resources_secondary="echo RESOURCES \$resources; for res in \$resources; do echo marsadm join-resource \\\$res /dev/*/\\\$res; marsadm join-resource \\\$res /dev/*/\\\$res || exit -1; $tmp_mars_update_configs_resources_cmd; done"
tmp_update_configs_cmd="for i in /etc/ovz/drbd.conf.d/*.cfg; do mv \\\$i \\\$i.MARS; done; if [[ -r /etc/ovz/fstab.include ]]; then for file in /etc/ovz/fstab.include /etc/fstab; do sed --in-place=.MARS 's:\(/dev/drbd[0-9]\+\) \+/vz/\([0-9]\+\):/dev/mars/ovz\\2 /vz/\\2:' \\\$file; done; fi"
cmd_table[fix_mars_config]="eval \"$tmp_update_configs_cmd\""
tmp_restart_cm3_cmd="/etc/init.d/clustermanager stop; sleep 3; marsadm secondary all; /etc/init.d/clustermanager start; sleep 20"

tmp_mars_make_resources="if [[ -h /mars/uuid ]]; then $tmp_mars_restart_cmd; ssh $sshopt root@\$other_hosts \"$prefix_cmd $tmp_mars_restart_cmd\"; eval \"$tmp_mars_make_resources_primary\"; eval \"$tmp_update_configs_cmd\"; sleep 10; res=SCHEISSE; ssh $sshopt root@\$other_hosts \"$prefix_cmd $tmp_mars_make_resources_secondary; $tmp_update_configs_cmd\"; $tmp_restart_cm3_cmd; ssh $sshopt root@\$other_hosts \"$prefix_cmd $tmp_restart_cm3_cmd\"; fi"
tmp_mars_create_cluster="if ! [[ -h /mars/uuid ]]; then ssh $sshopt root@\$other_hosts \"mount /mars; marsadm create-cluster\"; marsadm join-cluster \$other_hosts; fi"
tmp_mars_migrate="mount /mars; if [[ \"\$other_hosts\" != \"\" ]] && [[ \"\$resources\" != \"\" ]] && [[ -r /proc/drbd ]] && grep ' ro:' < /proc/drbd && mountpoint /mars && ! grep 'ro:Primary/' < /proc/drbd && ! grep -o -i 'ds:[a-z/]\+' < /proc/drbd | grep -v 'UpToDate/UpToDate'; then echo \"---- MIGRATING \$(hostname) (\$other_hosts) [\$resources] ------\"; $tmp_mars_create_cluster; $tmp_mars_make_resources; fi"
#tmp_mars_migrate="echo WEGLASSEN"

cmd_table[cm3_get_resources]="if [[ -d  /sys/module/mars/ ]] ; then $tmp_mars_check_switchable; $tmp_mars_detect_others; elif [[ -r /proc/drbd ]]; then $tmp_drbd_check_switchable; $tmp_drbd_detect_others; else echo 'NO_CM3_RUNNING'; exit 0; fi; for res in \$resources; do echo \"\$res:\$(ls /dev/*/\$res | grep -v /mars | tail -1)\"; done"
cmd_table[cm3_switchover]="${cmd_table[cm3_get_resources]}; if [[ \"\$resources\" = \"\" ]]; then echo NO_RESOURCES_EXIST; exit 0; fi; echo \"other_hosts='\$other_hosts'\"; ${cmd_table[cm3_switchable_status]}; if (( !cm3_switchable )); then exit -1; fi; for host in \$other_hosts; do echo \"---- STOPPING \$host ------\"; $tmp_cm3_stop_other; sleep 10; done; $tmp_mars_migrate; echo \"---- STARTING \$(hostname) ------\"; sleep 10; $tmp_cm3_start_local; sleep 10; $tmp_cm3_status_local; ${cmd_table[bgp_status]}; exit 0"
cmd_table[repair_ovz_drbd]="/etc/init.d/drbd stop; /etc/init.d/clustermanager stop; /etc/init.d/drbd stop; rmmod mars; umount /mars; for i in /etc/ovz/drbd.conf.d/*.cfg.MARS /etc/ovz/fstab.include.MARS /etc/fstab.MARS; do mv \$i \${i/.MARS/}; done; /etc/init.d/drbd start; /etc/init.d/clustermanager start; mkfs.ext4 /dev/vg00/mars; mount /mars"

# The following functions may be overridden in the config file.
# When new functions are declared, their function names must follow
# the convention print_[a-z0-9_]+_status()
#
# Any new functions are automatically detected and included.
#
# Typically, they will grep in the output of previously defined remote commands
# and display some statistics about the contents.
#
# Important: these functions should not print anything when no data
# is available.

function print_ping_status
{
    local output="$(cat $status_dir/*.log |\
	grep -o " packets transmitted, [0-9]\+ received" |\
	awk '{ print $3; }' |\
	sort -n |\
	uniq -c |\
	awk '{ printf(" %s=%d", $2, $1); }')"
    if [[ "$output" != "" ]]; then
	echo "    PING STATUS:$output"
    fi
}

function print_ssh_status
{
    local msg_list="Host.key.verification.failed Permission.denied Connection.refused Connection.timed.out Could.not.resolve.hostname unknown.host"
    local output="$(cat $status_dir/*.log |\
	grep -o "\(${msg_list// /\\|}\)" |\
	sed 's/ /_/g' |\
	sort |\
	uniq -c |\
	awk '{ printf(" %s=%d", $2, $1); }')"
    if [[ "$output" != "" ]]; then
	echo "    SSH STATUS:$output"
    fi
}

function print_uptime_status
{
    local day_limits="0 1 7 30 365"
    local load_limits="0 1 3 10 30 100 300"
    local count=0
    local limit
    for limit in $day_limits; do
	eval local days_$limit=$(
	    cat $status_dir/*.log |\
		grep -o "up [0-9]\+ days," |\
		awk "{ if (\$2 >= $limit) { print \$2} }" |\
		wc -l)
	(( days_$limit && count++ ))
    done
    for limit in $load_limits; do
	eval local load_$limit=$(
	    cat $status_dir/*.log |\
		grep -o "load average: [0-9]\+" |\
		awk "{ if (\$3 >= $limit) { print \$3} }" |\
		wc -l)
	(( load_$limit && count++ ))
    done
    if (( count )); then
	echo -n "    UPTIME:"
	for limit in $day_limits; do
	    echo -n " >${limit}_days=$(eval echo "\${days_$limit}")"
	done
	echo ""
	echo -n "    LOADAVG:"
	for limit in $load_limits; do
	    echo -n " >${limit}=$(eval echo "\${load_$limit}")"
	done
	echo ""
    fi
}

function print_kernel_status
{
    local msg_list="KERNEL_IS_RECENT CANNOT_REBOOT[A-Z_]* NEEDS_REBOOT"
    local output="$(cat $status_dir/*.log |\
	grep -o "^\(${msg_list// /\\|}\)$" |\
	sort |\
	uniq -c |\
	awk '{ printf(" %s=%d", $2, $1); }')"
    if [[ "$output" != "" ]]; then
	echo "    KERNEL STATUS:$output"
    fi
}

function print_mars_status
{
    local msg_list="NO_MARS_HOST MARS_IS_RECENT MARS_CANNOT_RELOAD[A-Z_]* MARS_NEEDS_RELOAD"
    local output="$(cat $status_dir/*.log |\
	grep -o "^\(${msg_list// /\\|}\)$" |\
	sort |\
	uniq -c |\
	awk '{ printf(" %s=%d", $2, $1); }')"
    if [[ "$output" != "" ]]; then
	echo "    MARS STATUS:$output"
    fi

    local msg_list="ModuleNotLoaded UnResponsive NotJoined NotStarted EmergencyMode Replicating NotYetPrimary PausedSync Syncing PausedFetch PausedReplay NoPrimaryDesignated PrimaryUnreachable Replaying"
    local msg
    for msg in $msg_list; do
	eval "local $msg=0"
    done
    local count=0
    local var
    for var in $(cat $status_dir/*.log | grep -o "^\(${msg_list// /\\|}\)$"); do
	(( count++ ))
	eval "(( $var++ ))"
    done
    if (( count )); then
	echo -n "    MARS RESOURCES:"
	for msg in $msg_list; do
	    if (( $(eval echo \${$msg}) )); then
		echo -n " $msg=$(eval echo \${$msg})"
	    fi
	done
	echo ""
    fi
}

function print_drbd_status
{
    local output="$(cat $status_dir/*.log |\
	grep -i -o 'NO_DRBD_HOST\| cs:[a-z]\+\| ro:[a-z/]\+\| ds:[a-z/]\+' |\
	sed 's/^ *[a-z]\+://' |\
	sort |\
	uniq -c |\
	awk '{ printf(" %s=%d", $2, $1); }')"
    if [[ "$output" != "" ]]; then
	echo "    DRBD RESOURCES:$output"
    fi
}

function print_cm3_status
{
    local msg_list="NO_CM3_RUNNING NO_RESOURCES_EXIST CANNOT_START_DRBD_SWITCHOVER CANNOT_START_MARS_SWITCHOVER CM3_SWITCHOVER_POSSIBLE CM3_SWITCHOVER_NOT_POSSIBLE"
    local found=0
    local var;
    for var in $msg_list; do
	eval "local ${var//./_}=0";
    done
    local var
    for var in $(cat $status_dir/*.log | grep -o "^\(${msg_list// /\\|}\)" | sed 's/ /_/g'); do
	(( found++ ))
	eval "(( ${var//./_}++ ))"
    done
    if (( found )); then
	echo -n "    CM3 STATUS:"
	local msg
	for msg in $msg_list; do
	    if (( $(eval echo \${${msg//\./_}}) )); then
		echo -n " ${msg//\./_}=$(eval echo \${${msg//\./_}})"
	    fi
	done
	echo ""
    fi

    local key_list="started stopped active remote broken disabled"
    found=0
    local key
    for key in $key_list; do
	eval "local nr_$key=0"
    done
    for file in $status_dir/*.log; do
	# determine the last line, in case there are multiple invocations
	# of "cm3 -s" in the same logfile.
	line="$(grep -n "VM *.*STATE *NODE *STORAGE" < $file | tail -1 | cut -d: -f1)"
	if [[ "$line" != "" ]]; then
	    (( found++ ))
	    for key in $(tail -n +$line < $file | grep -o " \(${key_list// /\\|}\) .*" | awk '{ print $1; rest=$2; while (rest = gensub("[^,]*,?", "", "", rest)) { print $1; } }'); do
		eval "(( nr_$key++ ))"
	    done
	fi
    done
    if (( found )); then
	echo -n "    CM3 RESOURCES:"
	for key in $key_list; do
	    echo -n " $key=$(eval echo \${nr_$key})"
	done
	echo ""
    fi
}

function print_bgp_status
{
    local output="$(cat $status_dir/*.log |\
	grep '^\(BGP_[A-Z_]\+\)$' |\
	sort -r |\
	uniq -c |\
	awk '{ printf(" %s=%d", $2, $1); }')"
    if [[ "$output" != "" ]]; then
	echo "    BGP STATUS:$output"
    fi
}

# END of configuration variables and functions

param_vars="$(set | grep '^[_A-Za-z0-9]\+=' | cut -d= -f1 | grep -v "^tmp_")"

########################################################

# generic helper functions

function warn
{
    local txt="${1:-Unkown}"
    echo "WARNING: $txt" >> /dev/stderr
}

function fail
{
    local txt="${1:-Unkown failure}"
    echo "FAILURE: $txt" >> /dev/stderr
    rm -f /tmp/tmp_*.$$
    exit -1
}

function do_confirm
{
    local skip_this="$1"
    local active="${2:-$confirm}"
    local response

    (( !active )) && return 0

    [[ "$skip_this" != "" ]] && skip_this="S to skip, "
    echo -n "[CONFIRM: Press ${skip_this}Return to continue, ^C to abort] "
    read response
    ! [[ "$response" =~ ^[sS] ]]
    return $?
}

function remote
{
    local host="$1"
    local cmd="$2"
    local nofail="${3:-0}"

    (( verbose > 1 )) && echo "Executing on $host: '$cmd'" >> /dev/stderr
    [[ "${cmd## }" = "" ]] && return 0
    if ssh $sshopt root@$host "$cmd"; then
	return 0
    elif (( nofail )); then
	return $?
    else
	#fail "ssh to '$host' command '$cmd' failed with status $?"
	fail "ssh to '$host' command failed with status $?"
    fi
}

function remote_action
{
    local host="$1"
    local cmd="$2"

    if (( dry_run )); then
	echo "DRY_RUN REMOTE $host ACTION '$cmd'"
    elif (( confirm )); then
	echo "REMOTE $host ACTION '$cmd'"
	if do_confirm 1; then
	    remote "$host" "$cmd"
	else
	    echo "SKIPPING $host ACTION '$cmd'"
	fi
    else
	remote "$host" "$cmd"
    fi
}

function source_when_possible
{
    local file="$1"
    local type="$2"

    if [[ -r "$file" ]]; then
	echo "Sourcing $type file '$file'"
	. "$file" || fail "$type file $file is not parsable"
    elif (( verbose )); then
	echo "Skipping non-existent $type file '$file'"
    fi
}

for i; do
    if [[ "$i" =~ ^--verbose ]]; then
	verbose=1
    fi
done

for file in $additional_configs; do
    source_when_possible "$file" "config"
done
source_when_possible "$default_config" "config"

# Allow forceful override of any _known_ variable at the command line
for i; do
    if [[ "$i" =~ ^--[-_A-Za-z0-9]+$ ]]; then
	param="${i#--}"
	var="${param//-/_}"
        [[ "$(eval "echo \"\${$var-UNSET}\"")" = "UNSET" ]] && fail "Variable '$var' is unknown"
	eval "$var=1"
    elif [[ "$i" =~ ^--[-_A-Za-z0-9]+= ]]; then
	param="${i#--}"
	var="${param%%=*}"
	var="${var//-/_}"
	val="${param#*=}"
        [[ "$(eval "echo \"\${$var-UNSET}\"")" = "UNSET" ]] && fail "Variable '$var' is unknown"
	eval "$var=$val"
    elif [[ "$i" =~ ^-h$ ]]; then
	help=1
    elif [[ "$i" =~ ^-v$ ]]; then
	(( verbose++ ))
    elif [[ "$host_spec" = "" ]]; then
	host_spec="$i"
    elif [[ "$action" = "" ]]; then
	action="$i"
    else
	fail "bad parameter syntax '$i'"
    fi
done

for dir in $assoc_dirs; do
    if [[ -r "$dir/$assoc_file" ]]; then
	assoc_file="$dir/$assoc_file"
	break
    fi
done

function do_help
{
cat <<EOF
usage: $0 [options] <host_spec> <action>

---------------------

The following parameter variables can be either passed by the
environment, or used for hard overriding on the command line
via --variable=value syntax:

$(
    declare -A orig
    for i in $orig_vars; do
	orig[$i]=1
    done
    for i in $param_vars; do
	[[ "$i" =~ _vars$ ]] && continue
	if (( !orig[$i] )); then
	    if [[ "$(eval "echo \${$i}")" =~ ^[0-9]+$ ]]; then
		echo "$i=$(eval "echo \${$i}")"
	    else
		echo "$i=\"$(eval "echo \${$i}")\""
	    fi
	    doc_line="${doc[$i]}"
	    if [[ "$doc_line" != "" ]]; then
		echo -e "\t$doc_line"
	    fi
	fi
    done
)

---------------------

The following status functions are defined and are automatically called
upon $0 --status :

$(set | grep "^[a-z0-9_]\+ ()" | grep "^print_[a-z0-9_]\+_status")

---------------------

The following strings can be used for <host_spec>:
(see file $assoc_file)

$(cut -d: -f2 < $assoc_file | sort -u)

Hint: multiple specs may be separated by blanks, if you correctly
quote it to the shell. Example: $0 "host1 host7" "uptime"

Set operations can be performed by prefixing each spec or hostname with
"+" or "-" signs.

Example: $0 "+de.kae.bs -de.kae.bs;R08" "kernel_status"
will run on all hosts from complete datacenter "de.kae.bs" with the
exception of all hosts from Room 08.

Filtering: $0 --host-filter="store" "de.kae.bs" "kernel_status"
will only run on final target hostnames containing the substring "store".
You may also use bash regexes.

---------------------

The following pre-defined <action>s from cmd_table[] can be used (or,
give a full shell command in quotes):

$(
local i
for i in ${!cmd_table[*]}; do
    echo "$i"
done
)
EOF
}

if (( help )); then
    do_help
    exit 0
fi

function print_status
{
    local empty=0
    local failure=0
    local ok=0
    local working=0
    local file

    for file in $status_dir/*.log; do
	if ! [[ -s $file ]]; then
	    (( empty++ ))
	elif grep -q FAILURE $file; then
	    (( failure++ ))
	elif grep -q "^$txt_ok$" $file; then
	    (( ok++ ))
	else
	    (( working++ ))
	fi
    done
    echo "REMOTE SCRIPT STATUS: NotStarted=$empty Working=$working OK=$ok Fail=$failure"

    local func
    for func in $(set | grep "^[a-z0-9_]\+ ()" | grep -o "^print_[a-z0-9_]\+_status"); do
	$func
    done
}

if (( status )); then
    [[ -d "$status_dir" ]] || fail "Status directory '$status_dir' does not exist"
    sub_dir="$(ls $status_dir | grep "^run-" | sort | tail -1)"
    [[ -d "$status_dir/$sub_dir" ]] && export status_dir="$status_dir/$sub_dir"
    echo "Status from $status_dir:"
    print_status
    exit 0
fi

if (( clean )); then
    [[ -d "$status_dir" ]] || fail "Status directory '$status_dir' does not exist"
    echo "Are you sure to clean the status directory $status_dir/ including all its versioned subdirectories?"
    do_confirm 1 1
    rm -rf $status_dir
    exit 0
fi

# automatic versioning of status_dir 

export status_dir="$status_dir/run-$start_stamp"

########################################################

# compute host_list out of host_spec

function add_host
{
    local host="$1"
    local minus="$2"

    if (( minus )); then
	host_list="$(echo " $host_list " | sed "s/ $host / /g")"
    else
	host_list+=" $host"
    fi
}

function compute_host_list
{
    local host
    rm -f /tmp/tmp_*.$$
    local tmp1=/tmp/tmp_1.$$
    local tmp2=/tmp/tmp_2.$$

    [[ -r $assoc_file ]] || fail "cannot find assoc file '$assoc_file'"
    (( verbose )) && echo "Using assoc file '$assoc_file'"
    
    for host in $host_spec; do
	local minus=0
	if [[ "$host" =~ ^- ]]; then
	    host="${host/-/}"
	    minus=1
	else
	    host="${host/\+/}"
	fi
	host="${host//./\\.}"
	if grep -E ":$host\$" < $assoc_file > $tmp1; then
	    local i
	    for i in $(cut -d: -f1 < $tmp1); do
		add_host $i $minus
	    done
	elif grep -qE "^$host:" < $assoc_file; then
	    add_host $host $minus
	elif (( allow_unknown_hosts )); then
	    warn "host '$host' does not appear in $assoc_file"
	    add_host $host $minus
	else
	    fail "Keyword or hostname '$host' does not exist in $assoc_file"
	fi
    done
    rm -f /tmp/tmp_*.$$
    if [[ "$host_filter" != "" ]]; then
	local old_host_list="$host_list"
	host_list=""
	for host in $old_host_list; do
	    if [[ "$host" =~ $host_filter ]]; then
		host_list+=" $host"
	    fi
	done
    fi
    if (( skip > 0 )); then
	local old_host_list="$host_list"
	local count=0
	host_list=""
	for host in $old_host_list; do
	    (( ++count <= skip )) && continue
	    if [[ "$host" =~ $host_filter ]]; then
		host_list+=" $host"
	    fi
	done
    fi
    if (( max > 0 )); then
	local old_host_list="$host_list"
	local count=0
	host_list=""
	for host in $old_host_list; do
	    (( ++count > max )) && break
	    if [[ "$host" =~ $host_filter ]]; then
		host_list+=" $host"
	    fi
	done
    fi

    local host_count=$(echo ${host_list} | wc -w)
    if (( !host_count )); then
	fail "Resulting host list is empty - nothing can be done at all"
    fi
    if (( verbose )); then
	echo "USING FINAL host_list: ${host_list}"
    else
	echo "Will run on $host_count hosts"
    fi
}

function get_cmd
{
    if [[ "$cmd" = "" ]]; then
	if [[ "$action" = "ping" ]]; then
	    echo "Running a pure ping to $(echo "$host_list" | wc -w) hosts"
	    cmd="ping"
	elif [[ "${cmd_table[$action]}" != "" ]]; then
	    echo "Using predefined cmd_table[] action '$action'"
	    if ! [[ "$action" =~ _status ]]; then
		do_confirm 1 1
	    fi
	    cmd="$prefix_cmd${cmd_table[$action]}"
	elif [[ "$action" != "" ]]; then
	    echo ""
	    echo "Running action '$action' as a command on $(echo "$host_list" | wc -w) hosts"
	    do_confirm 1 1
	    cmd="$action"
	else
	    fail "No action given."
	fi
    else
	echo ""
	echo "Using given command '$cmd' on $(echo "$host_list" | wc -w) hosts"
	do_confirm 1 1
    fi
}

########################################################

# main program

function main
{
    mkdir -p $status_dir || fail "connot create status directory '$status_dir'"

    script_start=$(date +%s)

    if (( confirm )); then
	echo "CONFIRM mode: everything is running SEQUENTIALLY"
    else
	echo "START forking sub-processes"
    fi
    local host
    for host in $host_list; do
	if (( confirm )); then
	    if remote_action $host "$cmd" 2>&1; then
		eval echo "$txt_ok"
	    else
		eval echo "$txt_fail"
	    fi 2>&1 | tee $status_dir/$host.log
	else
	    if (( dry_run )); then
		echo "DRY_RUN REMOTE $host ACTION '$cmd'"
		eval echo "$txt_ok"
	    elif [[ "$cmd" = "ping" ]]; then
		ping -c 1 -w 10 $host
		eval echo "$txt_ok"
	    elif remote $host "$cmd" 2>&1 ; then
		eval echo "$txt_ok"
	    else
		eval echo "$txt_fail"
	    fi > $status_dir/$host.log 2>&1 &
	    while (( $(jobs | wc -l) > max_jobs_parallelism )); do
		sleep 1
	    done
	fi
    done
    (( !confirm )) && echo "DONE  forking sub-processes"
    if (( do_wait )); then
	echo "Waiting for termination of sub-processes"
	local duration=1
	while (( $( pstree $$ | wc -l ) > 2 )); do
	    print_status
	    sleep $duration
	    (( duration < 10 && duration++ ))
	done
	wait
    fi

    script_end=$(date +%s)
    echo "ESTIMATED script duration: $(( script_end - script_start )) seconds"
    print_status
}

compute_host_list
get_cmd
main
exit 0