From eaba9cf1bfef58b7d5b9090b589e2bbb1bf19d39 Mon Sep 17 00:00:00 2001 From: Thomas Schoebel-Theuer Date: Tue, 24 Jul 2018 08:59:19 +0200 Subject: [PATCH] doc: update help --- docu/football-verbose.help | 313 ++++++++++++++++++++++++++++++++++++- docu/football.help | 44 ++++++ docu/screener-verbose.help | 3 +- 3 files changed, 351 insertions(+), 9 deletions(-) diff --git a/docu/football-verbose.help b/docu/football-verbose.help index d582a42f..734aa782 100644 --- a/docu/football-verbose.help +++ b/docu/football-verbose.help @@ -12,6 +12,8 @@ Actions for resource migration: Run the sequence migrate_prepare ; migrate_wait ; migrate_finish; migrate_cleanup. +Dto for testing of phases: + ./football.sh migrate_prepare [] Allocate LVM space at the targets and start MARS replication. @@ -30,6 +32,8 @@ Actions for inplace FS shrinking: ./football.sh shrink Run the sequence shrink_prepare ; shrink_finish ; shrink_cleanup. +Dto for testing of phases: + ./football.sh shrink_prepare [] Allocate temporary LVM space (when possible) and create initial raw FS copy. @@ -61,6 +65,17 @@ Combined actions: Actions for (manual) repair in emergency situations: + ./football.sh manual_handover + This is useful in place of going to the machines and starting + handover on their command line. You dont need to log in. + All hooks (e.g. for downtime / reporting / etc) are automatically + called. + Notice: it will only work when there is already a replica + at , and when further constraints such as + clustermanager constraints will allow it. + For a full Football game between different clusters, use + "migrate" instead. + ./football.sh manual_migrate_config [] Transfer only the cluster config, without changing the MARS replicas. This does no resource stopping / restarting. @@ -125,6 +140,24 @@ General features: some sysadmins says "screener.sh continue $resource" or attaches to the sessions and presses the RETURN key. +Configuration: + + You can place shell variable definitions for overriding any + tunables into the following locations: + + football_includes=/usr/lib/mars/plugins /etc/mars/plugins /home/schoebel/mars/football-master.git/plugins /home/schoebel/.mars/plugins ./plugins + + football_confs=/usr/lib/mars/confs /etc/mars/confs /home/schoebel/mars/football-master.git/confs /home/schoebel/.mars/confs ./confs + + football_creds=/usr/lib/mars/creds /etc/mars/creds /home/schoebel/mars/football-master.git/creds /home/schoebel/mars/football-master.git /home/schoebel/.mars/creds ./creds + + Filenames should match the following patterns: + + football-*.preconf Here you may change paths and enable_* variables. + football-*.conf Inteded for main parameters. + football-*.postconf For late overrides after sourcing modules. + football-*.reconf Modify runtime parameters during waits. + ## football_includes # List of directories where football-*.sh and football-*.conf # files can be found. @@ -140,6 +173,12 @@ General features: # List of directories where various credential files can be found. football_creds="${football_creds:-/usr/lib/mars/creds /etc/mars/creds $script_dir/creds $script_dir $HOME/.mars/creds ./creds}" + ## trap_signals + # List of signal names which should be trapped. + # Traps are importnatn for housekeeping, e.g. automatic + # removal of locks. + trap_signals="${trap_signals:-SIGINT}" + ## dry_run # When set, actions are only simulated. dry_run=${dry_run:-0} @@ -176,9 +215,9 @@ General features: football_backup_dir="${football_backup_dir:-$football_logdir/backups}" ## screener - # When enabled, handover execution to the screener. + # When enabled, delegate execution to the screener. # Very useful for running Football in masses. - screener="${screener:-0}" + screener="${screener:-1}" ## min_space # When testing / debugging with extremely small LVs, it may happen @@ -192,10 +231,23 @@ General features: # Use this for repeated refreshes of the dentry cache after some time. cache_repeat_lapse="${cache_repeat_lapse:-120}" # Minutes + ## remote_ping + # Before using ssh, ping the target. + # This is only useful in special cases. + remote_ping="${remote_ping:-0}" + + ## ping_opts + # Options for ping checks. + ping_opts="${ping_opts:--W 1 -c 1}" + ## ssh_opt # Useful for customization to your ssh environment. ssh_opt="${ssh_opt:--4 -A -o StrictHostKeyChecking=no -o ForwardX11=no -o KbdInteractiveAuthentication=no -o VerifyHostKeyDNS=no}" + ## ssh_auth + # Useful for extra -i options. + ssh_auth="${ssh_auth:-}" + ## rsync_opt # The rsync options in general. # IMPORTANT: some intermediate progress report is absolutely needed, @@ -299,6 +351,25 @@ General features: # This may be used to establish a uniform default running location. post_hand="${post_hand:-}" + ## tmp_suffix + # Only for experts. + tmp_suffix="${tmp_suffix:--tmp}" + + ## shrink_suffix_old + # Suffix for backup LVs. These are kept for wome time until + # *_cleanup operations will remove them. + shrink_suffix_old="${shrink_suffix_old:--preshrink}" + + ## start_regex + # At which $operation the hook football_start + # shoule be called + start_regex="${start_regex:-^(migrate_prepare|migrate|migrate+|shrink_prepare|shrink)}" + + ## finished_regex + # At which $operation the hook football_finished + # shoule be called + finished_regex="${finished_regex:-^(migrate_finish|migrate|migrate+|shrink_finish|shrink)}" + ## lock_break_timeout # When remote ssh commands are failing, remote locks may sustain forever. # Avoid deadlocks by breaking remote locks after this timeout has elapsed. @@ -317,6 +388,123 @@ General features: # lead to waits instead of aborts. startup_when_locked="${startup_when_locked:-1}" + ## resource_pre_check + # Useful for debugging of container problems. + # Normally not needed. + resource_pre_check="${resource_pre_check:-0}" + + ## condition_check_interval + # How often conditions should be re-evaluated. + condition_check_interval="${condition_check_interval:-180}" # Seconds + + ## limit_syncs + # Limit the number of actually running syncs by waiting + # until less than this number of syncs are running at any + # target host. + limit_syncs="${limit_syncs:-4}" + + ## limit_shrinks + # Limit the number of actually running shrinks by waiting + # until less than this number of shrinks are running at any + # target host. + limit_shrinks="${limit_shrinks:-1}" + + ## count_shrinks_by_tmp_mount + # Only count the temporary mounts. + # Otherwise, LVs are counted. The latter may yield false positives + # because LVs may be created in advance (e.g. at another cluster member) + count_shrinks_by_tmp_mount="${count_shrinks_by_tmp_mount:-1}" + + ## limit_mars_logfile + # Dont handover when too much logfile data is missing at the + # new primary site. + limit_mars_logfile="${limit_mars_logfile:-1024}" # MiB + + ## optimize_dentry_cache + # Don't umount the temporary shrink space unnecessarily. + # Try to shutdown the VM / container without umounting. + # Important for high speed. + optimize_dentry_cache="${optimize_dentry_cache:-1}" + + ## mkfs_cmd + # Tunable for creation of new filesystems. + mkfs_cmd="${mkfs_cmd:-mkfs.xfs -s size=4096 -d agcount=1024}" + + ## mount_opts + # Options for temporary mounts. + # Not used for ordinary clustermanager operations. + mount_opts="${mount_opts:--o rw,nosuid,noatime,attr2,inode64,usrquota}" + + ## reuse_mount + # Assume that already existing temporary mounts are the correct ones. + # This will speed up interrupted and repeated runs by factors. + reuse_mount="${reuse_mount:-1}" + + ## reuse_lv + # Assume that temporary LVs are reusable. + reuse_lv="${reuse_lv:-1}" + + ## reuse_lv_check + # When set, this command is executed for checking whether + # the LV can be reused. + reuse_lv_check="${reuse_lv_check:-xfs_db -c sb -c print -r}" + + ## do_quota + # Transfer xfs quota information. + # 0 = off + # 1 = global xfs quota transfer + # 2 = additionally local one + do_quota="${do_quota:-2}" + + ## xfs_dump_dir + # Temporary space for keeping xfs quota dumps. + xfs_dump_dir="${xfs_dump_dir:-$football_backup_dir/xfs-quota-$start_stamp}" + + ## xfs_quota_enable + # Command for re-enabling the quota system after shrink. + xfs_quota_enable="${xfs_quota_enable:-xfs_quota -x -c enable}" + + ## xfs_dump and xfs_restore + # Commands for transfer of xfs quota information. + xfs_dump="${xfs_dump:-xfs_quota -x -c dump}" + xfs_restore="${xfs_restore:-xfs_quota -x -c restore}" + + ## fs_resize_cmd + # Command for online filesystem expansion. + fs_resize_cmd="${fs_resize_cmd:-xfs_growfs -d}" + + ## migrate_two_phase + # This is useful when the new hardware has a better replication network, + # e.g. 10GBit uplink instead of 1GBit. + # Instead of starting two or more syncs in parallel on the old hardware, + # run the syncs in two phases: + # 1. migrate data to the new primary only. + # 1b. handover to new primary. + # 2. now start migration of data to the new secondaries, over the better + # network attachment of the new hardware. + migrate_two_phase="${migrate_two_phase:-0}" + + ## migrate_always_all + # By default, migrate+shrink creates only 1 replica during the initial + # migration. + # When setting this, all replicas are created, which improves resilience, + # but worsens network performance. + migrate_always_all="${migrate_always_all:-0}" + + ## migrate_early_cleanup + # Early cleanup of old replicas when using migrate_always_all or + # migrate_two_phase. + # Only reasonable when combined with migrate+shrink. + # This is slightly less safe, but saves time when you want to + # decommission old hardware as fast as popssible. + # Early cleanup of the old replicase will only be done when + # at least 2 replicas are available at the new (target) side. + # These two new replicas can be created either by + # a) migrate_always_all=1 or + # b) migrate_two_phase=1 or automatically selected (or not) via + # c) auto_two_phase=1 + migrate_early_cleanup="${migrate_early_cleanup:-1}" + ## user_name # Normally automatically derived from ssh agent or from $LOGNAME. # Please override this only when really necessary. @@ -343,6 +531,34 @@ PLUGIN football-1and1config # instance (clustermw & siblings). enable_1and1config="${enable_1and1config:-$(if [[ "$0" =~ tetris ]]; then echo 1; else echo 0; fi)}" + ## runstack_host + # To be provided in a *.conf or *.preconf file. + runstack_host="${runstack_host:-}" + + ## runstack_cmd + # Command to be provided in a *.conf file. + runstack_cmd="${runstack_cmd:-}" + + ## runstack_ping + # Only call runstack when the container is pingable. + runstack_ping="${runstack_ping:-1}" + + ## dastool_host + # To be provided in a *.conf or *.preconf file. + dastool_host="${dastool_host:-}" + + ## dastool_cmd + # Command to be provided in a *.conf file. + dastool_cmd="${dastool_cmd:-}" + + ## update_host + # To be provided in a *.conf or *.preconf file. + update_host="${update_host:-}" + + ## update_cmd + # Command to be provided in a *.conf file. + update_cmd="${update_cmd:-}" + PLUGIN football-cm3 @@ -363,6 +579,15 @@ Specific actions for plugin football-cm3: Call through to the clustertool via REST. Useful for manual inspection and repair. +Specific features with plugin football-cm3: + + - Parameter syntax "cluster123" instead of "icpu456 icpu457" + This is an alternate specification syntax, which is + automatically replaced with the real machine names. + It tries to minimize datacenter cross-traffic by + taking the new $target_primary at the same datacenter + location where the container is currenty running. + ## enable_cm3 # ShaHoLin-specifc plugin for working with the infong platform # (istore, icpu, infong) via 1&1-specific clustermanager cm3 @@ -387,6 +612,12 @@ Specific actions for plugin football-cm3: # not respond. check_ping_rounds="${check_ping_rounds:-5}" + ## additional_runstack + # Do an additional runstack after startup of the new container. + # In turn, this will only do something when source and target are + # different. + additional_runstack="${additional_runstack:-1}" + ## workaround_firewall # Documentation of technical debt for later generations: # This is needed since July 2017. In the many years before, no firewalling @@ -441,9 +672,15 @@ Specific actions for plugin football-cm3: # The script will fail when some of these is encountered. forbidden_bz_ids="${forbidden_bz_ids:-}" + ## auto_two_phase + # When this is set, override the global migrate_two_phase parameter + # at runtime by ShaHoLin-specific checks + auto_two_phase="${auto_two_phase:-1}" + ## clustertool_host # URL prefix of the internal configuation database REST interface. - clustertool_host="${clustertool_host:-http://clustermw:3042}" + # Set this via *.preconf config files. + clustertool_host="${clustertool_host:-}" ## clustertool_user # Username for clustertool access. @@ -525,30 +762,90 @@ Specific actions for plugin football-cm3: # ShaHoLin-internal monitis_downtime_duration="${monitis_downtime_duration:-20}" # Minutes + ## shaholin_customer_report_cmd + # Action script when the hardware has improved. + shaholin_customer_report_cmd="${shaholin_customer_report_cmd:-}" + + ## shaholin_min_cpus and shaholin_dst_cpus + shaholin_src_cpus="${shaholin_src_cpus:-4}" + shaholin_dst_cpus="${shaholin_dst_cpus:-32}" + ## shaholin_finished_log # ShaHoLin-specific logfile, reporting _only_ successful completion # of an action. shaholin_finished_log="${shaholin_finished_log:-$football_logdir/shaholin-finished.log}" + ## shaholin_action + # OPTIONAL: specific action script with parameters. + shaholin_action="${shaholin_action:-}" + + ## auto_handover + # Load-balancing accross locations. + # Works only together with the new syntax "cluster123". + # Depending on the number of syncs currently running, this + # will internally add --pre-hand and --post_hand options + # dynamically at runtime. This will spread much of the sync + # traffic to per-datacenter local behaviour. + # Notice: this may produce more total customer downtime when + # running a high parallelism degree. + # Thus it tries to reduce unnecessary handovers to other locations. + auto_handover="${auto_handover:-1}" + + +PLUGIN football-ticket + + Generic plugin for creating and updating tickets, + e.g. Jira tickets. + + You will need to hook in some external scripts which are + then creating / updating the tickets. + + Comment texts may be provided with following conventions: + + comment.$ticket_state.txt + comment.$ticket_phase.$ticket_state.txt + + Directories where comments may reside: + + football_creds=/usr/lib/mars/creds /etc/mars/creds /home/schoebel/mars/football-master.git/creds /home/schoebel/mars/football-master.git /home/schoebel/.mars/creds ./creds + football_confs=/usr/lib/mars/confs /etc/mars/confs /home/schoebel/mars/football-master.git/confs /home/schoebel/.mars/confs ./confs + football_includes=/usr/lib/mars/plugins /etc/mars/plugins /home/schoebel/mars/football-master.git/plugins /home/schoebel/.mars/plugins ./plugins + + ## enable_ticket + enable_ticket="${enable_ticket:-$(if [[ "$0" =~ tetris ]]; then echo 1; else echo 0; fi)}" + ## ticket - # OPTIONAL: the meaning is ShaHoLin specific. - # This can be used for updating JIRA tickets. + # OPTIONAL: the meaning is installation specific. + # This can be used for identifying JIRA tickets. # Can be set on the command line like "./tetris.sh $args --ticket=TECCM-4711 ticket="${ticket:-}" ## ticket_get_cmd # Optional: when set, this script can be used for retrieving ticket IDs # in place of commandline option --ticket= + # Retrieval should be unique by resource names. + # You may use any defined bash varibale by escaping them like + # $res . + # Example: ticket_get_cmd="my-ticket-getter-script.pl "$res"" ticket_get_cmd="${ticket_get_cmd:-}" + ## ticket_create_cmd + # Optional: when set, this script can be used for creating new tickets. + # It will be called when $ticket_get_cmd does not retrieve anything. + # Example: ticket_create_cmd="my-ticket-create-script.pl "$res" "$target_primary"" + # Afterwards, the new ticket needs to be retrievable via $ticket_get_cmd. + ticket_create_cmd="${ticket_create_cmd:-}" + ## ticket_update_cmd # This can be used for calling an external command which updates # the ticket(s) given by the $ticket parameter. + # Example: ticket_update_cmd="my-script.pl "$ticket" "$res" "$ticket_phase" "$ticket_state"" ticket_update_cmd="${ticket_update_cmd:-}" - ## shaholin_action - # OPTIONAL: specific action script with parameters. - shaholin_action="${shaholin_action:-}" + ## ticket_require_comment + # Only update a ticket when a comment file exists in one of the + # directories $football_creds $football_confs $football_includes + ticket_require_comment="${ticket_require_comment:-1}" PLUGIN football-basic diff --git a/docu/football.help b/docu/football.help index 2f3e9257..1d74d0c0 100644 --- a/docu/football.help +++ b/docu/football.help @@ -11,6 +11,8 @@ Actions for resource migration: Run the sequence migrate_prepare ; migrate_wait ; migrate_finish; migrate_cleanup. +Dto for testing of phases: + ./football.sh migrate_prepare [] Allocate LVM space at the targets and start MARS replication. @@ -29,6 +31,8 @@ Actions for inplace FS shrinking: ./football.sh shrink Run the sequence shrink_prepare ; shrink_finish ; shrink_cleanup. +Dto for testing of phases: + ./football.sh shrink_prepare [] Allocate temporary LVM space (when possible) and create initial raw FS copy. @@ -60,6 +64,17 @@ Combined actions: Actions for (manual) repair in emergency situations: + ./football.sh manual_handover + This is useful in place of going to the machines and starting + handover on their command line. You dont need to log in. + All hooks (e.g. for downtime / reporting / etc) are automatically + called. + Notice: it will only work when there is already a replica + at , and when further constraints such as + clustermanager constraints will allow it. + For a full Football game between different clusters, use + "migrate" instead. + ./football.sh manual_migrate_config [] Transfer only the cluster config, without changing the MARS replicas. This does no resource stopping / restarting. @@ -150,6 +165,35 @@ Specific actions for plugin football-cm3: Call through to the clustertool via REST. Useful for manual inspection and repair. +Specific features with plugin football-cm3: + + - Parameter syntax "cluster123" instead of "icpu456 icpu457" + This is an alternate specification syntax, which is + automatically replaced with the real machine names. + It tries to minimize datacenter cross-traffic by + taking the new $target_primary at the same datacenter + location where the container is currenty running. + + +PLUGIN football-ticket + + Generic plugin for creating and updating tickets, + e.g. Jira tickets. + + You will need to hook in some external scripts which are + then creating / updating the tickets. + + Comment texts may be provided with following conventions: + + comment.$ticket_state.txt + comment.$ticket_phase.$ticket_state.txt + + Directories where comments may reside: + + football_creds=/usr/lib/mars/creds /etc/mars/creds /home/schoebel/mars/football-master.git/creds /home/schoebel/mars/football-master.git /home/schoebel/.mars/creds ./creds + football_confs=/usr/lib/mars/confs /etc/mars/confs /home/schoebel/mars/football-master.git/confs /home/schoebel/.mars/confs ./confs + football_includes=/usr/lib/mars/plugins /etc/mars/plugins /home/schoebel/mars/football-master.git/plugins /home/schoebel/.mars/plugins ./plugins + PLUGIN football-basic diff --git a/docu/screener-verbose.help b/docu/screener-verbose.help index dccdcc0c..ee847af2 100644 --- a/docu/screener-verbose.help +++ b/docu/screener-verbose.help @@ -280,10 +280,11 @@ Options: # Useful for basic debugging of setup problems etc. use_screenlog="${use_screenlog:-0}" - ## waiting_txt and delay_txt + ## waiting_txt and delay_txt and condition_txt # RTFS Don't use this, unless you know what you are doing. waiting_txt="${waiting_txt:-SCREENER_waiting_WAIT}" delayed_txt="${delayed_txt:-SCREENER_delayed_WAIT}" + condition_txt="${condition_txt:-SCREENER_condition_WAIT}" ## critical_status # This is the "magic" exit code indicating _criticality_