#!/bin/bash # # This file is part of MARS project: http://schoebel.github.io/mars/ # # Copyright (C) 2015 Thomas Schoebel-Theuer # Copyright (C) 2015 1&1 Internet AG # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################ # TST autumn 2015 lab prototype # for mass switchover and other generic mass commands # Environment-specific actions are encoded into variables. # Change them (e.g. in /etc/mass-actions/mass-actions.conf) for # adaptation to any other operating environment. # # In addition, you will need an association file host-assoc.txt # containing 2 fields separated by colon: first the hostname, second # an arbitrary key value with an arbitrary meaning. It is wise to # use locations, room numbers, rack numbers, etc for that field. # What exactly is up to you. Multiple keys may be assigned to the same # host. # # Please feel free to adapt this to your needs. set -o pipefail shopt -s nullglob export LC_ALL=C export start_stamp="$(date "+%F_%T" | sed 's/:/./g')" declare -A doc orig_vars="$(set | grep '^[_A-Za-z0-9]\+=' | cut -d= -f1)" # START defaults for configuration variables default_config="${default_config:-./mass-actions.conf}" doc[default_config]="Default config file. Here you can override variables or add additional commands to the array cmd_table[]." additional_configs="${additional_configs:-/etc/mass-actions/*.conf $HOME/.mass-actions/*.conf}" doc[additional_configs]="Blank-separated list of wildcarded additional config files. Here you can override variables or add additional commands to the array cmd_table[]." status_dir="${status_dir:-./status-dir}" doc[status_dir]="Output directory where progress logfiles of remotely issued ssh commands are created. You may grep in it." # The rest is hardcoded here in case the config file does not exist dry_run=${dry_run:-0} doc[dry_run]="When enabled, remote ssh actions are only displayed instead of really executed." verbose=${verbose:-0} doc[verbose]="Increase speakyness." confirm=${confirm:-0} doc[confirm]="Each remote ssh command must be individually confirmed before it is actually executed. As a side effect, commands are running sequentially instead of parallel." do_wait=${do_wait:-1} allow_unknown_hosts=${allow_unknown_hosts:-0} help=${help:-0} status=${status:-0} clean=${clean:-0} sshopt="${sshopt:--4 -A -T -o StrictHostKeyChecking=no -o ForwardX11=no -o KbdInteractiveAuthentication=no -o VerifyHostKeyDNS=no -o ConnectTimeout=60 -o TCPKeepAlive=yes}" max_jobs_parallelism="${max_jobs_parallelism:-3000}" host_spec="${host_spec:-}" action="${action:-}" cmd="${cmd:-}" prefix_cmd="${prefix_cmd:-set -o pipefail; shopt -s nullglob; }" host_list="${host_list:-}" host_filter="${host_filter:-}" skip=${skip:-0} max=${max:-0} assoc_file="${assoc_file:-host-assoc.txt}" assoc_dirs="${assoc_dirs:-. $HOME/.mass-actions /var/cache/mass-actions /etc/mass-actions}" txt_ok="${txt_ok:-CMD OK}" txt_fail="${txt_fail:-CMD FAIL \$?}" # Command table for defining shorthand actions. # Replace or extend this for your needs. # # Hint: use /etc/mass-actions/mass-actions.conf (or put it at another place) # for overriding these example commands. # # All available action keywords can displayed by "$0 --help". # Variables starting with tmp_ are suppressed in the display and may # be used for internal structuring / better readability of complex commands. declare -A cmd_table cmd_table[test]="uptime" cmd_table[mars_status]="if [[ -d /mars ]]; then marsadm view-replstate all; else echo 'NO_MARS_HOST'; fi" cmd_table[drbd_status]="if [[ -r /proc/drbd ]]; then cat /proc/drbd; else echo 'NO_DRBD_HOST'; fi" cmd_table[cm3_status]="cm3 -us || cm3 -s" cmd_table[kernel_status]="uptime; ${cmd_table[cm3_status]}; ${cmd_table[mars_status]}; ${cmd_table[drbd_status]}; available=\"\$(ls -t /boot/vmlinuz-* | head -1 | cut -d- -f2-)\"; echo AVAILABLE_KERNEL=\$available; running=\"\$(cat /proc/version | awk '{print \$3; }')\"; echo RUNNING_KERNEL=\$running; if [[ \"\$running\" = \"\$available\" ]]; then echo KERNEL_IS_RECENT; elif [[ -r /proc/drbd ]] && grep ':Primary/' < /proc/drbd; then echo CANNOT_REBOOT_DRBD_PRIMARY; elif [[ -d /mars ]] && marsadm view-is-primary all | grep '^1\$'; then echo CANNOT_REBOOT_MARS_PRIMARY; elif [[ -x /usr/lib/1und1/scripts/is_node_in_mode_active.sh ]] && /usr/lib/1und1/scripts/is_node_in_mode_active.sh; then echo CANNOT_REBOOT_NODE_ACTIVE; else echo NEEDS_REBOOT; fi" cmd_table[cm3_switchable_status]="if [[ -d /etc/ovz ]]; then cm3_switchable=1; else cm3_switchable=0; for dummy in {0..3}; do cm3 -us; slots_needed=\"\$(cm3 -s | grep \" \(remote\\|stopped\|broken\) \" | wc -l)\"; slots_available=\"\$(cm3 -s | grep idle | wc -l)\"; if (( slots_needed <= slots_available )); then cm3_switchable=1; break; fi; sleep 7; echo CM3_REPEAT; done; if (( cm3_switchable )); then echo CM3_SWITCHOVER_POSSIBLE; else echo CM3_SWITCHOVER_NOT_POSSIBLE; fi; fi" cmd_table[mars_module_status]="uptime; mars_available=\"\$(modinfo mars | grep '^version' | awk '{ print \$2; }')\"; echo \"AVAILABLE_MARS=\$mars_available\"; mars_running=\"\$(cat /sys/module/mars/version | awk '{ print \$1; }')\"; echo \"RUNNING_MARS=\$mars_running\"; if [[ \"\$mars_running\" = \"\" ]]; then echo echo 'NO_MARS_HOST'; elif [[ \"\$mars_running\" = \"\$mars_available\" ]]; then echo MARS_IS_RECENT; elif marsadm view-is-primary all | grep '^1\$'; then echo MARS_CANNOT_RELOAD; else echo MARS_NEEDS_RELOAD; fi" cmd_table[bgp_status]="if mountpoint /kunden/homepages/; then if ping -c 1 -w 10 8.8.8.8; then echo BGP_OK; else echo BGP_FAIL; fi; else echo BGP_UNUSED; fi" cmd_table[detect_double]="if [[ -r /proc/drbd ]]; then cat /proc/drbd; if grep ' ds:' < /proc/drbd && mountpoint /mars && [[ -h /mars/uuid ]]; then marsadm view all; echo DOUBLE; else echo 'NO_MARS_HOST'; fi; else echo 'NO_DRBD_HOST'; fi" cmd_table[kernel_reboot_when_necessary]="if { ${cmd_table[kernel_status]}; } | tee -a /dev/stderr | grep -q '^NEEDS_REBOOT$'; then if [[ -r /etc/lilo.conf ]] && grep rtrfix < /etc/lilo.conf; then lilo && sleep 3 && lilo -R rtrfix && sleep 3 && sync && echo coldreboot && coldreboot; else echo reboot; reboot; fi; fi" cmd_table[mars_reload_when_necessary]="if { ${cmd_table[mars_module_status]}; } | tee -a /dev/stderr | grep -q '^MARS_NEEDS_RELOAD$'; then rmmod mars; modprobe mars; fi" cmd_table[mars_switchover]="if [[ -d /mars ]]; then marsadm up all; marsadm primary all; fi; ${cmd_table[mars_status]}" cmd_table[mars_failover]="if [[ -d /mars ]]; then marsadm pause-fetch all; marsadm attach all; marsadm primary --force all; fi; ${cmd_table[mars_status]}" cmd_table[drbd_switchover]="if [[ -r /proc/drbd ]]; then drbdadm up all; drbdadm primary all; fi; ${cmd_table[drbd_status]}" cmd_table[drbd_failover]="if [[ -r /proc/drbd ]]; then drbdadm disconnect all; drbdadm primary --force all; fi; ${cmd_table[drbd_status]}" tmp_cm3_options="--timeout=3600 --vmhandler-timeout=3600" tmp_mars_detect_others="export resources=\"\$(marsadm view-my-resources)\"; other_hosts=\"\"; for res in \$resources; do primary=\"\$(marsadm view-get-primary \$res)\"; if [[ \"\$primary\" != \"\$(hostname)\" ]] && ! [[ \"\$other_hosts\" =~ \$primary ]]; then other_hosts+=\" \$primary\"; fi; done" tmp_mars_check_switchable="if ! [[ -d /proc/sys/mars ]]; then echo 'CANNOT_START_MARS_SWITCHOVER: kernel module not loaded'; exit -1; fi; if marsadm view-is-attach all | grep -q \"^0\$\"; then echo 'CANNOT_START_MARS_SWITCHOVER: some resource not attached'; exit -1; fi; if marsadm view-is-alive all | grep -v \"^---\" | grep -v \"^1\$\"; then echo 'CANNOT_START_MARS_SWITCHOVER: network is not alive'; exit -1; fi; if marsadm view-sync-rest all | grep -v \"^---\" | grep -v \"^0\$\"; then echo 'CANNOT_START_MARS_SWITCHOVER: some resource not synced'; exit -1; fi; if marsadm view-is-split-brain all | grep -v \"^---\" | grep -v \"^0\$\"; then echo 'CANNOT_START_MARS_SWITCHOVER: some resource is in split brain'; exit -1; fi; if marsadm view-is-consistent all | grep -v \"^---\" | grep -v \"^1\$\"; then echo 'CANNOT_START_MARS_SWITCHOVER: some resource is inconsistent'; exit -1; fi" tmp_drbd_detect_others="export resources=\"\$(if [[ -d /etc/ovz/drbd.conf.d/ ]]; then (cd /etc/ovz/drbd.conf.d/ && echo \$(ls ovz*.cfg ovz*.cfg.old | cut -d. -f1 | sort -u) ); else echo \$(cm3 --list-vms | cut -d. -f1); fi)\"; if grep -q ':Secondary/' < /proc/drbd; then other_hosts=\"\$(hostname | tr ab ba)\"; fi" tmp_drbd_check_switchable="if grep \" cs:\" < /proc/drbd | grep -v \"cs:Connected .* ds:UpToDate/UpToDate\"; then echo 'CANNOT_START_DRBD_SWITCHOVER'; exit -1; fi" tmp_cm3_stop_other="ssh $sshopt root@\$host \"$prefix_cmd cm3 $tmp_cm3_options --stop all; sleep 20; count=0; for i in \\\$(cm3 --list-vms --with-status | grep -i broken | cut -d: -f1 | cut -d. -f1); do echo \"RESTOPPING BROKEN \\\$i\"; (( count++ )); sleep 20; cm3 -us; sleep 10; cm3 $tmp_cm3_options --stop \\\$i; done\"" tmp_rebuild_ovz_tmp="for dev in /dev/vg*/ovz[0-9]*tmp; do if grep \"\$(echo \$dev | sed 's:^.*/::')\" < /proc/mounts; then echo \"Cannot rebuild \$dev\"; else echo \"Rebuild \$dev\"; if mkfs.xfs -f \$dev; then mount \$dev /mnt; chmod a+rwxt /mnt; umount /mnt; fi; fi; done" #tmp_cm3_restart_local="for dummy in {0..2\}; do count=0; for i in \$(cm3 --list-vms --with-status | grep -i \"broken\|stopped\" | cut -d: -f1 | cut -d. -f1); do echo \"RESTARTING BROKEN \$i\"; (( count++ )); cm3 -us; sleep 10; cm3 $tmp_cm3_options --stop \$i; done; if (( count )); then sleep 10; cm3 $tmp_cm3_options --start all; sleep 10; fi; done" tmp_cm3_restart_local="echo skip restart" tmp_cm3_start_local="$tmp_rebuild_ovz_tmp; cm3 $tmp_cm3_options --start all; sleep 10; cm3 -us; $tmp_cm3_restart_local" tmp_cm3_status_local="${cmd_table[mars_status]}; ${cmd_table[drbd_status]}; cm3 -us; cm3 -s | grep -q 'broken\|stopped' && exit -1" tmp_mars_restart_cmd="drbdadm down all; /etc/init.d/drbd stop; sleep 3; /etc/init.d/drbd stop; sleep 3; rmmod drbd; sleep 1; modprobe mars" # Problem: ssh evaluates its arguments once more. Solution: for symmetry reasons, use eval at the local side to get the same number of evaluations. Use enough backslashes to distinguish between the different numbers of evaluation levels. tmp_mars_update_configs_resources_cmd="if which configure_InfongSpace.pl; then configure_InfongSpace.pl --update-infong \\\$res repltype=mars; elif which ui-config-modify; then ui-config-modify -c MARS_ENABLED=true; fi" tmp_mars_make_resources_primary="echo RESOURCES \$resources; for res in \$resources; do echo marsadm create-resource \$res /dev/*/\$res; marsadm create-resource \\\$res /dev/*/\\\$res || exit -1; $tmp_mars_update_configs_resources_cmd; done" tmp_mars_make_resources_secondary="echo RESOURCES \$resources; for res in \$resources; do echo marsadm join-resource \\\$res /dev/*/\\\$res; marsadm join-resource \\\$res /dev/*/\\\$res || exit -1; $tmp_mars_update_configs_resources_cmd; done" tmp_update_configs_cmd="for i in /etc/ovz/drbd.conf.d/*.cfg; do mv \\\$i \\\$i.MARS; done; if [[ -r /etc/ovz/fstab.include ]]; then for file in /etc/ovz/fstab.include /etc/fstab; do sed --in-place=.MARS 's:\(/dev/drbd[0-9]\+\) \+/vz/\([0-9]\+\):/dev/mars/ovz\\2 /vz/\\2:' \\\$file; done; fi" cmd_table[fix_mars_config]="eval \"$tmp_update_configs_cmd\"" tmp_restart_cm3_cmd="/etc/init.d/clustermanager stop; sleep 3; marsadm secondary all; /etc/init.d/clustermanager start; sleep 20" tmp_mars_make_resources="if [[ -h /mars/uuid ]]; then $tmp_mars_restart_cmd; ssh $sshopt root@\$other_hosts \"$prefix_cmd $tmp_mars_restart_cmd\"; eval \"$tmp_mars_make_resources_primary\"; eval \"$tmp_update_configs_cmd\"; sleep 10; res=SCHEISSE; ssh $sshopt root@\$other_hosts \"$prefix_cmd $tmp_mars_make_resources_secondary; $tmp_update_configs_cmd\"; $tmp_restart_cm3_cmd; ssh $sshopt root@\$other_hosts \"$prefix_cmd $tmp_restart_cm3_cmd\"; fi" tmp_mars_create_cluster="if ! [[ -h /mars/uuid ]]; then ssh $sshopt root@\$other_hosts \"mount /mars; marsadm create-cluster\"; marsadm join-cluster \$other_hosts; fi" tmp_mars_migrate="mount /mars; if [[ \"\$other_hosts\" != \"\" ]] && [[ \"\$resources\" != \"\" ]] && [[ -r /proc/drbd ]] && grep ' ro:' < /proc/drbd && mountpoint /mars && ! grep 'ro:Primary/' < /proc/drbd && ! grep -o -i 'ds:[a-z/]\+' < /proc/drbd | grep -v 'UpToDate/UpToDate'; then echo \"---- MIGRATING \$(hostname) (\$other_hosts) [\$resources] ------\"; $tmp_mars_create_cluster; $tmp_mars_make_resources; fi" #tmp_mars_migrate="echo WEGLASSEN" cmd_table[cm3_get_resources]="if [[ -d /sys/module/mars/ ]] ; then $tmp_mars_check_switchable; $tmp_mars_detect_others; elif [[ -r /proc/drbd ]]; then $tmp_drbd_check_switchable; $tmp_drbd_detect_others; else echo 'NO_CM3_RUNNING'; exit 0; fi; for res in \$resources; do echo \"\$res:\$(ls /dev/*/\$res | grep -v /mars | tail -1)\"; done" cmd_table[cm3_switchover]="${cmd_table[cm3_get_resources]}; if [[ \"\$resources\" = \"\" ]]; then echo NO_RESOURCES_EXIST; exit 0; fi; echo \"other_hosts='\$other_hosts'\"; ${cmd_table[cm3_switchable_status]}; if (( !cm3_switchable )); then exit -1; fi; for host in \$other_hosts; do echo \"---- STOPPING \$host ------\"; $tmp_cm3_stop_other; sleep 10; done; $tmp_mars_migrate; echo \"---- STARTING \$(hostname) ------\"; sleep 10; $tmp_cm3_start_local; sleep 10; $tmp_cm3_status_local; ${cmd_table[bgp_status]}; exit 0" cmd_table[repair_ovz_drbd]="/etc/init.d/drbd stop; /etc/init.d/clustermanager stop; /etc/init.d/drbd stop; rmmod mars; umount /mars; for i in /etc/ovz/drbd.conf.d/*.cfg.MARS /etc/ovz/fstab.include.MARS /etc/fstab.MARS; do mv \$i \${i/.MARS/}; done; /etc/init.d/drbd start; /etc/init.d/clustermanager start; mkfs.ext4 /dev/vg00/mars; mount /mars" # The following functions may be overridden in the config file. # When new functions are declared, their function names must follow # the convention print_[a-z0-9_]+_status() # # Any new functions are automatically detected and included. # # Typically, they will grep in the output of previously defined remote commands # and display some statistics about the contents. # # Important: these functions should not print anything when no data # is available. function print_ping_status { local output="$(cat $status_dir/*.log |\ grep -o " packets transmitted, [0-9]\+ received" |\ awk '{ print $3; }' |\ sort -n |\ uniq -c |\ awk '{ printf(" %s=%d", $2, $1); }')" if [[ "$output" != "" ]]; then echo " PING STATUS:$output" fi } function print_ssh_status { local msg_list="Host.key.verification.failed Permission.denied Connection.refused Connection.timed.out Could.not.resolve.hostname unknown.host" local output="$(cat $status_dir/*.log |\ grep -o "\(${msg_list// /\\|}\)" |\ sed 's/ /_/g' |\ sort |\ uniq -c |\ awk '{ printf(" %s=%d", $2, $1); }')" if [[ "$output" != "" ]]; then echo " SSH STATUS:$output" fi } function print_uptime_status { local day_limits="0 1 7 30 365" local load_limits="0 1 3 10 30 100 300" local count=0 local limit for limit in $day_limits; do eval local days_$limit=$( cat $status_dir/*.log |\ grep -o "up [0-9]\+ days," |\ awk "{ if (\$2 >= $limit) { print \$2} }" |\ wc -l) (( days_$limit && count++ )) done for limit in $load_limits; do eval local load_$limit=$( cat $status_dir/*.log |\ grep -o "load average: [0-9]\+" |\ awk "{ if (\$3 >= $limit) { print \$3} }" |\ wc -l) (( load_$limit && count++ )) done if (( count )); then echo -n " UPTIME:" for limit in $day_limits; do echo -n " >${limit}_days=$(eval echo "\${days_$limit}")" done echo "" echo -n " LOADAVG:" for limit in $load_limits; do echo -n " >${limit}=$(eval echo "\${load_$limit}")" done echo "" fi } function print_kernel_status { local msg_list="KERNEL_IS_RECENT CANNOT_REBOOT[A-Z_]* NEEDS_REBOOT" local output="$(cat $status_dir/*.log |\ grep -o "^\(${msg_list// /\\|}\)$" |\ sort |\ uniq -c |\ awk '{ printf(" %s=%d", $2, $1); }')" if [[ "$output" != "" ]]; then echo " KERNEL STATUS:$output" fi } function print_mars_status { local msg_list="NO_MARS_HOST MARS_IS_RECENT MARS_CANNOT_RELOAD[A-Z_]* MARS_NEEDS_RELOAD" local output="$(cat $status_dir/*.log |\ grep -o "^\(${msg_list// /\\|}\)$" |\ sort |\ uniq -c |\ awk '{ printf(" %s=%d", $2, $1); }')" if [[ "$output" != "" ]]; then echo " MARS STATUS:$output" fi local msg_list="ModuleNotLoaded UnResponsive NotJoined NotStarted EmergencyMode Replicating NotYetPrimary PausedSync Syncing PausedFetch PausedReplay NoPrimaryDesignated PrimaryUnreachable Replaying" local msg for msg in $msg_list; do eval "local $msg=0" done local count=0 local var for var in $(cat $status_dir/*.log | grep -o "^\(${msg_list// /\\|}\)$"); do (( count++ )) eval "(( $var++ ))" done if (( count )); then echo -n " MARS RESOURCES:" for msg in $msg_list; do if (( $(eval echo \${$msg}) )); then echo -n " $msg=$(eval echo \${$msg})" fi done echo "" fi } function print_drbd_status { local output="$(cat $status_dir/*.log |\ grep -i -o 'NO_DRBD_HOST\| cs:[a-z]\+\| ro:[a-z/]\+\| ds:[a-z/]\+' |\ sed 's/^ *[a-z]\+://' |\ sort |\ uniq -c |\ awk '{ printf(" %s=%d", $2, $1); }')" if [[ "$output" != "" ]]; then echo " DRBD RESOURCES:$output" fi } function print_cm3_status { local msg_list="NO_CM3_RUNNING NO_RESOURCES_EXIST CANNOT_START_DRBD_SWITCHOVER CANNOT_START_MARS_SWITCHOVER CM3_SWITCHOVER_POSSIBLE CM3_SWITCHOVER_NOT_POSSIBLE" local found=0 local var; for var in $msg_list; do eval "local ${var//./_}=0"; done local var for var in $(cat $status_dir/*.log | grep -o "^\(${msg_list// /\\|}\)" | sed 's/ /_/g'); do (( found++ )) eval "(( ${var//./_}++ ))" done if (( found )); then echo -n " CM3 STATUS:" local msg for msg in $msg_list; do if (( $(eval echo \${${msg//\./_}}) )); then echo -n " ${msg//\./_}=$(eval echo \${${msg//\./_}})" fi done echo "" fi local key_list="started stopped active remote broken disabled" found=0 local key for key in $key_list; do eval "local nr_$key=0" done for file in $status_dir/*.log; do # determine the last line, in case there are multiple invocations # of "cm3 -s" in the same logfile. line="$(grep -n "VM *.*STATE *NODE *STORAGE" < $file | tail -1 | cut -d: -f1)" if [[ "$line" != "" ]]; then (( found++ )) for key in $(tail -n +$line < $file | grep -o " \(${key_list// /\\|}\) .*" | awk '{ print $1; rest=$2; while (rest = gensub("[^,]*,?", "", "", rest)) { print $1; } }'); do eval "(( nr_$key++ ))" done fi done if (( found )); then echo -n " CM3 RESOURCES:" for key in $key_list; do echo -n " $key=$(eval echo \${nr_$key})" done echo "" fi } function print_bgp_status { local output="$(cat $status_dir/*.log |\ grep '^\(BGP_[A-Z_]\+\)$' |\ sort -r |\ uniq -c |\ awk '{ printf(" %s=%d", $2, $1); }')" if [[ "$output" != "" ]]; then echo " BGP STATUS:$output" fi } # END of configuration variables and functions param_vars="$(set | grep '^[_A-Za-z0-9]\+=' | cut -d= -f1 | grep -v "^tmp_")" ######################################################## # generic helper functions function warn { local txt="${1:-Unkown}" echo "WARNING: $txt" >> /dev/stderr } function fail { local txt="${1:-Unkown failure}" echo "FAILURE: $txt" >> /dev/stderr rm -f /tmp/tmp_*.$$ exit -1 } function do_confirm { local skip_this="$1" local active="${2:-$confirm}" local response (( !active )) && return 0 [[ "$skip_this" != "" ]] && skip_this="S to skip, " echo -n "[CONFIRM: Press ${skip_this}Return to continue, ^C to abort] " read response ! [[ "$response" =~ ^[sS] ]] return $? } function remote { local host="$1" local cmd="$2" local nofail="${3:-0}" (( verbose > 1 )) && echo "Executing on $host: '$cmd'" >> /dev/stderr [[ "${cmd## }" = "" ]] && return 0 if ssh $sshopt root@$host "$cmd"; then return 0 elif (( nofail )); then return $? else #fail "ssh to '$host' command '$cmd' failed with status $?" fail "ssh to '$host' command failed with status $?" fi } function remote_action { local host="$1" local cmd="$2" if (( dry_run )); then echo "DRY_RUN REMOTE $host ACTION '$cmd'" elif (( confirm )); then echo "REMOTE $host ACTION '$cmd'" if do_confirm 1; then remote "$host" "$cmd" else echo "SKIPPING $host ACTION '$cmd'" fi else remote "$host" "$cmd" fi } function source_when_possible { local file="$1" local type="$2" if [[ -r "$file" ]]; then echo "Sourcing $type file '$file'" . "$file" || fail "$type file $file is not parsable" elif (( verbose )); then echo "Skipping non-existent $type file '$file'" fi } for i; do if [[ "$i" =~ ^--verbose ]]; then verbose=1 fi done for file in $additional_configs; do source_when_possible "$file" "config" done source_when_possible "$default_config" "config" # Allow forceful override of any _known_ variable at the command line for i; do if [[ "$i" =~ ^--[-_A-Za-z0-9]+$ ]]; then param="${i#--}" var="${param//-/_}" [[ "$(eval "echo \"\${$var-UNSET}\"")" = "UNSET" ]] && fail "Variable '$var' is unknown" eval "$var=1" elif [[ "$i" =~ ^--[-_A-Za-z0-9]+= ]]; then param="${i#--}" var="${param%%=*}" var="${var//-/_}" val="${param#*=}" [[ "$(eval "echo \"\${$var-UNSET}\"")" = "UNSET" ]] && fail "Variable '$var' is unknown" eval "$var=$val" elif [[ "$i" =~ ^-h$ ]]; then help=1 elif [[ "$i" =~ ^-v$ ]]; then (( verbose++ )) elif [[ "$host_spec" = "" ]]; then host_spec="$i" elif [[ "$action" = "" ]]; then action="$i" else fail "bad parameter syntax '$i'" fi done for dir in $assoc_dirs; do if [[ -r "$dir/$assoc_file" ]]; then assoc_file="$dir/$assoc_file" break fi done function do_help { cat < --------------------- The following parameter variables can be either passed by the environment, or used for hard overriding on the command line via --variable=value syntax: $( declare -A orig for i in $orig_vars; do orig[$i]=1 done for i in $param_vars; do [[ "$i" =~ _vars$ ]] && continue if (( !orig[$i] )); then if [[ "$(eval "echo \${$i}")" =~ ^[0-9]+$ ]]; then echo "$i=$(eval "echo \${$i}")" else echo "$i=\"$(eval "echo \${$i}")\"" fi doc_line="${doc[$i]}" if [[ "$doc_line" != "" ]]; then echo -e "\t$doc_line" fi fi done ) --------------------- The following status functions are defined and are automatically called upon $0 --status : $(set | grep "^[a-z0-9_]\+ ()" | grep "^print_[a-z0-9_]\+_status") --------------------- The following strings can be used for : (see file $assoc_file) $(cut -d: -f2 < $assoc_file | sort -u) Hint: multiple specs may be separated by blanks, if you correctly quote it to the shell. Example: $0 "host1 host7" "uptime" Set operations can be performed by prefixing each spec or hostname with "+" or "-" signs. Example: $0 "+de.kae.bs -de.kae.bs;R08" "kernel_status" will run on all hosts from complete datacenter "de.kae.bs" with the exception of all hosts from Room 08. Filtering: $0 --host-filter="store" "de.kae.bs" "kernel_status" will only run on final target hostnames containing the substring "store". You may also use bash regexes. --------------------- The following pre-defined s from cmd_table[] can be used (or, give a full shell command in quotes): $( local i for i in ${!cmd_table[*]}; do echo "$i" done ) EOF } if (( help )); then do_help exit 0 fi function print_status { local empty=0 local failure=0 local ok=0 local working=0 local file for file in $status_dir/*.log; do if ! [[ -s $file ]]; then (( empty++ )) elif grep -q FAILURE $file; then (( failure++ )) elif grep -q "^$txt_ok$" $file; then (( ok++ )) else (( working++ )) fi done echo "REMOTE SCRIPT STATUS: NotStarted=$empty Working=$working OK=$ok Fail=$failure" local func for func in $(set | grep "^[a-z0-9_]\+ ()" | grep -o "^print_[a-z0-9_]\+_status"); do $func done } if (( status )); then [[ -d "$status_dir" ]] || fail "Status directory '$status_dir' does not exist" sub_dir="$(ls $status_dir | grep "^run-" | sort | tail -1)" [[ -d "$status_dir/$sub_dir" ]] && export status_dir="$status_dir/$sub_dir" echo "Status from $status_dir:" print_status exit 0 fi if (( clean )); then [[ -d "$status_dir" ]] || fail "Status directory '$status_dir' does not exist" echo "Are you sure to clean the status directory $status_dir/ including all its versioned subdirectories?" do_confirm 1 1 rm -rf $status_dir exit 0 fi # automatic versioning of status_dir export status_dir="$status_dir/run-$start_stamp" ######################################################## # compute host_list out of host_spec function add_host { local host="$1" local minus="$2" if (( minus )); then host_list="$(echo " $host_list " | sed "s/ $host / /g")" else host_list+=" $host" fi } function compute_host_list { local host rm -f /tmp/tmp_*.$$ local tmp1=/tmp/tmp_1.$$ local tmp2=/tmp/tmp_2.$$ [[ -r $assoc_file ]] || fail "cannot find assoc file '$assoc_file'" (( verbose )) && echo "Using assoc file '$assoc_file'" for host in $host_spec; do local minus=0 if [[ "$host" =~ ^- ]]; then host="${host/-/}" minus=1 else host="${host/\+/}" fi host="${host//./\\.}" if grep -E ":$host\$" < $assoc_file > $tmp1; then local i for i in $(cut -d: -f1 < $tmp1); do add_host $i $minus done elif grep -qE "^$host:" < $assoc_file; then add_host $host $minus elif (( allow_unknown_hosts )); then warn "host '$host' does not appear in $assoc_file" add_host $host $minus else fail "Keyword or hostname '$host' does not exist in $assoc_file" fi done rm -f /tmp/tmp_*.$$ if [[ "$host_filter" != "" ]]; then local old_host_list="$host_list" host_list="" for host in $old_host_list; do if [[ "$host" =~ $host_filter ]]; then host_list+=" $host" fi done fi if (( skip > 0 )); then local old_host_list="$host_list" local count=0 host_list="" for host in $old_host_list; do (( ++count <= skip )) && continue if [[ "$host" =~ $host_filter ]]; then host_list+=" $host" fi done fi if (( max > 0 )); then local old_host_list="$host_list" local count=0 host_list="" for host in $old_host_list; do (( ++count > max )) && break if [[ "$host" =~ $host_filter ]]; then host_list+=" $host" fi done fi local host_count=$(echo ${host_list} | wc -w) if (( !host_count )); then fail "Resulting host list is empty - nothing can be done at all" fi if (( verbose )); then echo "USING FINAL host_list: ${host_list}" else echo "Will run on $host_count hosts" fi } function get_cmd { if [[ "$cmd" = "" ]]; then if [[ "$action" = "ping" ]]; then echo "Running a pure ping to $(echo "$host_list" | wc -w) hosts" cmd="ping" elif [[ "${cmd_table[$action]}" != "" ]]; then echo "Using predefined cmd_table[] action '$action'" if ! [[ "$action" =~ _status ]]; then do_confirm 1 1 fi cmd="$prefix_cmd${cmd_table[$action]}" elif [[ "$action" != "" ]]; then echo "" echo "Running action '$action' as a command on $(echo "$host_list" | wc -w) hosts" do_confirm 1 1 cmd="$action" else fail "No action given." fi else echo "" echo "Using given command '$cmd' on $(echo "$host_list" | wc -w) hosts" do_confirm 1 1 fi } ######################################################## # main program function main { mkdir -p $status_dir || fail "connot create status directory '$status_dir'" script_start=$(date +%s) if (( confirm )); then echo "CONFIRM mode: everything is running SEQUENTIALLY" else echo "START forking sub-processes" fi local host for host in $host_list; do if (( confirm )); then if remote_action $host "$cmd" 2>&1; then eval echo "$txt_ok" else eval echo "$txt_fail" fi 2>&1 | tee $status_dir/$host.log else if (( dry_run )); then echo "DRY_RUN REMOTE $host ACTION '$cmd'" eval echo "$txt_ok" elif [[ "$cmd" = "ping" ]]; then ping -c 1 -w 10 $host eval echo "$txt_ok" elif remote $host "$cmd" 2>&1 ; then eval echo "$txt_ok" else eval echo "$txt_fail" fi > $status_dir/$host.log 2>&1 & while (( $(jobs | wc -l) > max_jobs_parallelism )); do sleep 1 done fi done (( !confirm )) && echo "DONE forking sub-processes" if (( do_wait )); then echo "Waiting for termination of sub-processes" local duration=1 while (( $( pstree $$ | wc -l ) > 2 )); do print_status sleep $duration (( duration < 10 && duration++ )) done wait fi script_end=$(date +%s) echo "ESTIMATED script duration: $(( script_end - script_start )) seconds" print_status } compute_host_list get_cmd main exit 0