Merge branch 'mars0.1.y' into mars0.1a.y

This commit is contained in:
Thomas Schoebel-Theuer 2018-09-28 07:46:51 +02:00
commit 7a8f6c2baf
11 changed files with 1253 additions and 42 deletions

View File

@ -293,6 +293,15 @@ Hint: branch 0.1a will get a merge from here, and then get the
(except Football related ones) will then go to 0.1b.
Finally, when 0.1a is stable, I will close this branch.
mars0.1stable62
* Minor fix: race between join-resource and log-rotate.
* Minor fix: report split brain logfile amount only when
actually detectable.
* Minor improvement: shift annoying error message over
to Orphan state detection.
* Football: update to Football-2.0-RC12
* doc: some updates.
mars0.1stable61
* Minor fix: in very rare cases where some symlinks are missing,
don't abort in try_to_avoid_splitbrain().

Binary file not shown.

View File

@ -12,7 +12,7 @@ Actions for resource migration:
Run the sequence
migrate_prepare ; migrate_wait ; migrate_finish; migrate_cleanup.
Dto for testing of phases:
Dto for testing (do not rely on it):
./football.sh migrate_prepare <resource> <target_primary> [<target_secondary>]
Allocate LVM space at the targets and start MARS replication.
@ -32,7 +32,7 @@ Actions for inplace FS shrinking:
./football.sh shrink <resource> <percent>
Run the sequence shrink_prepare ; shrink_finish ; shrink_cleanup.
Dto for testing of phases:
Dto for testing (do not rely on it):
./football.sh shrink_prepare <resource> [<percent>]
Allocate temporary LVM space (when possible) and create initial
@ -114,6 +114,10 @@ Actions for (manual) repair in emergency situations:
Manually lock or unlock an item at all of the given hosts, in
an atomic fashion. In most cases, use "ALL" for the item.
Only for testing / development (no stable interfaces):
./football.sh manual_call_hook <name> <args>
Global maintenance:
./football.sh lv_cleanup <resource>
@ -253,7 +257,7 @@ Configuration:
# IMPORTANT: some intermediate progress report is absolutely needed,
# because otherwise a false-positive TIMEOUT may be assumed when
# no output is generated for several hours.
rsync_opt="${rsync_opt:- -aSH --info=progress2,STATS}"
rsync_opt="${rsync_opt:- -aH --inplace --info=progress2,STATS}"
## rsync_opt_prepare
# Additional rsync options for preparation and updating
@ -338,6 +342,16 @@ Configuration:
# of a failed command.
serious_status="${serious_status:-198}"
## interrupted_status
# This is the "magic" exit code indicating a manual interruption
# (e.g. keypress Ctl-c)
interrupted_status="${interrupted_status:-190}"
## illegal_status
# This is the "magic" exit code indicating an illegal command
# (e.g. syntax error, illegal arguments, etc)
illegal_status="${illegal_status:-191}"
## pre_hand or --pre-hand=
# Set this to do an ordinary handover to a new start position
# (in the source cluster) before doing anything else.
@ -370,6 +384,10 @@ Configuration:
# shoule be called
finished_regex="${finished_regex:-^(migrate_finish|migrate|migrate+|shrink_finish|shrink)}"
## call_finished
# Whether to call the hook football_failed at failures.
call_finished="${call_finished:-1}"
## lock_break_timeout
# When remote ssh commands are failing, remote locks may sustain forever.
# Avoid deadlocks by breaking remote locks after this timeout has elapsed.
@ -393,10 +411,24 @@ Configuration:
# Normally not needed.
resource_pre_check="${resource_pre_check:-0}"
## enable_background_reporting
# Progress reporting to screener.
# Runs in the background, in parallel to forground processes
# like rsync or tar.
enable_background_reporting="${enable_background_reporting:-1}"
## condition_check_interval
# How often conditions should be re-evaluated.
condition_check_interval="${condition_check_interval:-180}" # Seconds
## lease_time
# Any intents (e.g. for creation of new resources) are recorded.
# This is needed for race avoidance, when multiple resources
# are migrated in _parallel_ to the _same_ target.
# This might lead to livelocks when there would be no lease time
# after which the intents are regarded as "invalid".
lease_time="${lease_time:-3600}" # seconds
## limit_syncs
# Limit the number of actually running syncs by waiting
# until less than this number of syncs are running at any
@ -420,6 +452,16 @@ Configuration:
# new primary site.
limit_mars_logfile="${limit_mars_logfile:-1024}" # MiB
## shrink_min_ram_gb
# When set, check that the target machines for shrinking
# have enough RAM.
# Rationale: even incremental rsync needs the Dentry cache of the
# kernel. When there is not enough RAM, and when there are some millions
# of inodes, the customer downtime may rise to some hours or even some days
# instead of some minutes (only when the detnry+inode cache does not
# fit into kernel memory <<<=== this is the cruscial point)
shrink_min_ram_gb="${shrink_min_ram_gb:-0}" # GiB
## optimize_dentry_cache
# Don't umount the temporary shrink space unnecessarily.
# Try to shutdown the VM / container without umounting.
@ -469,6 +511,23 @@ Configuration:
xfs_dump="${xfs_dump:-xfs_quota -x -c dump}"
xfs_restore="${xfs_restore:-xfs_quota -x -c restore}"
## shortcut_tar_percent
# Percentage when a shrink space should no longer be considered
# as "inital" (or empty).
shortcut_tar_percent="${shortcut_tar_percent:-5}"
## max_rsync_downtime
# When set, check the _expected_ duration of customer downtime.
# if it takes longer than this limit, abort without causing
# customer downtime.
# Afterward, sysadmins need to decide what to do:
# For example, move the resource to faster hardware with more RAM, or similar.
max_rsync_downtime="${max_rsync_downtime:-0}" # seconds
## merge_shrink_secondaries
# This is only needed when targets are not yet pre-merged.
merge_shrink_secondaries="${merge_shrink_secondaries:-0}"
## fs_resize_cmd
# Command for online filesystem expansion.
fs_resize_cmd="${fs_resize_cmd:-xfs_growfs -d}"
@ -601,11 +660,18 @@ Specific features with plugin football-cm3:
# und thus must be pingable over network.
skip_resource_ping="${skip_resource_ping:-0}"
## date_lock
# Don't enter critical sections at certain days of the week,
# and/or during certain hours.
# This is a regex matching against "date +%u_%H"
date_lock="${date_lock:-}"
## business_hours
# When set, critical sections are only entered during certain
# days of the week, and/or during certain hours.
# This is a regex matching against "date +%u_%H".
# Example regex: [1-5]_(0[8-9]|1[0-8])
# This means Monday to Friday from 8 to 18 o'clock.
business_hours="${business_hours:-}"
## cm3_stop_safeguard_cmd
# Workaround for a bug.
# Sometimes a systemd unit does not go away.
cm3_stop_safeguard_cmd="${cm3_stop_safeguard_cmd:-{ sleep 2; try=0; while (( try++ < 10 )) && systemctl show $res.scope | grep ActiveState | grep =active; do systemctl stop $res.scope; sleep 6; done; if mountpoint /vol/$res; then umount /vol/$res; fi; }}"
## check_ping_rounds
# Number of pings to try before a container is assumed to
@ -653,7 +719,9 @@ Specific features with plugin football-cm3:
# a BigCluster constisting of several thousands of machines.
# When a future version of mars0.1b.y (or 0.2.y) will allow this,
# this can be disabled.
do_split_cluster="${do_split_cluster:-1}"
# do_split_cluster >= 2 means that the resulting MARS clusters should
# not exceed these number of members, when possible.
do_split_cluster="${do_split_cluster:-2}"
## forbidden_hosts
# Regex for excluding hostnames from any Football actions.
@ -760,7 +828,26 @@ Specific features with plugin football-cm3:
## monitis_downtime_duration
# ShaHoLin-internal
monitis_downtime_duration="${monitis_downtime_duration:-20}" # Minutes
monitis_downtime_duration="${monitis_downtime_duration:-60}" # Minutes
## orwell_downtime_script
# ShaHoLin-internal
orwell_downtime_script="${orwell_downtime_script:-}"
## orwell_tz
# Deal with differences in clock timezones.
orwell_tz="${orwell_tz:-Europe/Berlin}"
## orwell_downtime_duration
# ShaHoLin-internal
orwell_downtime_duration="${orwell_downtime_duration:-20}" # Minutes
## orwell_workaround_sleep
# Workaround for a race condition in Orwell.
# Try to ensure that another check has been executed before
# the downtime is removed.
# 0 = dont remove the downtime at all.
orwell_workaround_sleep="${orwell_workaround_sleep:-300}" # Seconds
## shaholin_customer_report_cmd
# Action script when the hardware has improved.
@ -770,14 +857,46 @@ Specific features with plugin football-cm3:
shaholin_src_cpus="${shaholin_src_cpus:-4}"
shaholin_dst_cpus="${shaholin_dst_cpus:-32}"
## ip_renumber_cmd
# Cross-call with another independent project.
ip_renumber_cmd="${ip_renumber_cmd:-}"
## shaholin_finished_log
# ShaHoLin-specific logfile, reporting _only_ successful completion
# of an action.
shaholin_finished_log="${shaholin_finished_log:-$football_logdir/shaholin-finished.log}"
## shaholin_action
## update_cmd
# OPTIONAL: specific action script with parameters.
shaholin_action="${shaholin_action:-}"
update_cmd="${update_cmd:-}"
## update_host
# To be provided in a *.conf or *.preconf file.
update_host="${update_host:-}"
## parse_ticket
# Regex for identifying tickets from script outputs or arguments
parse_ticket="${parse_ticket:-TECCM-[0-9]+}"
## prefer_parsed_ticket
# Workaround bugs from getting inconsistent ticket IDs from different sources.
prefer_parsed_ticket="${prefer_parsed_ticket:-0}"
## translate_db_state
# Whether to use the following mapping definitions.
translate_db_state="${translate_db_state:-0}"
## db_state_*
# Map logical names to the ones in the database.
db_state_init="${db_state_init:-}"
db_state_prepare="${db_state_prepare:-}"
db_state_finish="${db_state_finish:-}"
db_state_cleanup="${db_state_cleanup:-}"
db_state_done="${db_state_done:-}"
## use_type_for_ticket
# Internal ticketing convention.
use_type_for_ticket="${use_type_for_ticket:-1}"
## auto_handover
# Load-balancing accross locations.
@ -791,6 +910,11 @@ Specific features with plugin football-cm3:
# Thus it tries to reduce unnecessary handovers to other locations.
auto_handover="${auto_handover:-1}"
## preferred_location
# When set, override any other pre-handover to this location.
# Useful for maintenance of a whole datacenter.
preferred_location="${preferred_location:-}"
PLUGIN football-ticket
@ -847,6 +971,32 @@ PLUGIN football-ticket
# directories $football_creds $football_confs $football_includes
ticket_require_comment="${ticket_require_comment:-1}"
## ticket_for_migrate
# Optional 1&1-specific: separate ticket for migrate.
# Useful when migrate+shink need to post into separate tickets.
ticket_for_migrate="${ticket_for_migrate:-}"
## ticket_for_shrink
# Optional 1&1-specific: separate ticket for migrate.
# Useful when migrate+shink need to post into separate tickets.
ticket_for_shrink="${ticket_for_shrink:-}"
## ticket_prefer_cached
# Workaround a bug in ticket ID retrieval:
# Trust my own cached values more than trust the "inconsistent read".
ticket_prefer_cached="${ticket_prefer_cached:-1}"
## ticket_code
# List of operation:res:shard
ticket_code="${ticket_code:-}"
## get_ticket_code
get_ticket_code="${get_ticket_code:-}"
## max_start_ticket
# Maximum number of instances to start per call
max_start_ticket="${max_start_ticket:-1}"
PLUGIN football-basic
@ -1006,8 +1156,13 @@ PLUGIN football-waiting
# By setting this, you can delay the cleanup operations for some time.
# This way, you are keeping the old LV contents as a kind of "backup"
# for some limited time.
# HINT: dont set to wait_before_cleanuplarge values, because it can
# seriously slow down Football.
#
# HINT1: dont set wait_before_cleanup to very large values, because it can
# seriously slow down Football.
#
# HINT2: the waiting time starts when the last MARS replica was created.
# Only when the syncing times are _smaller_ than this value,
# an _additional_ delay will be produced.
enable_cleanup_delayed="${enable_cleanup_delayed:-0}"
wait_before_cleanup="${wait_before_cleanup:-180}" # Minutes

View File

@ -11,7 +11,7 @@ Actions for resource migration:
Run the sequence
migrate_prepare ; migrate_wait ; migrate_finish; migrate_cleanup.
Dto for testing of phases:
Dto for testing (do not rely on it):
./football.sh migrate_prepare <resource> <target_primary> [<target_secondary>]
Allocate LVM space at the targets and start MARS replication.
@ -31,7 +31,7 @@ Actions for inplace FS shrinking:
./football.sh shrink <resource> <percent>
Run the sequence shrink_prepare ; shrink_finish ; shrink_cleanup.
Dto for testing of phases:
Dto for testing (do not rely on it):
./football.sh shrink_prepare <resource> [<percent>]
Allocate temporary LVM space (when possible) and create initial
@ -113,6 +113,10 @@ Actions for (manual) repair in emergency situations:
Manually lock or unlock an item at all of the given hosts, in
an atomic fashion. In most cases, use "ALL" for the item.
Only for testing / development (no stable interfaces):
./football.sh manual_call_hook <name> <args>
Global maintenance:
./football.sh lv_cleanup <resource>

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -13,11 +13,15 @@ Synopsis:
./screener.sh --help [--verbose]
./screener.sh list-running
./screener.sh list-waiting
./screener.sh list-interrupted
./screener.sh list-illegal
./screener.sh list-timeouted
./screener.sh list-failed
./screener.sh list-critical
./screener.sh list-serious
./screener.sh list-done
./screener.sh list
./screener.sh list-archive
./screener.sh list-screens
./screener.sh run <file.csv> [<condition_list>]
./screener.sh start <screen_id> <cmd> <args...>
@ -162,6 +166,9 @@ Cleanup / bookkeeping:
./screener.sh clear-critical <screen_id>
./screener.sh clear-serious <screen_id>
./screener.sh clear-interrupted <screen_id>
./screener.sh clear-illegal <screen_id>
./screener.sh clear-timeouted <screen_id>
./screener.sh clear-failed <screen_id>
Mark the status as "done" and move the logfile away.
@ -227,7 +234,8 @@ Options:
## session_timeout
# Detect hanging sessions when they don't produce any output anymore
# for a longer time. Hanging sessions are then marked as failed or critical.
# for a longer time. Hanging sessions are then marked as either
# 'timeout' or 'critical'.
session_timeout="${session_timeout:-$(( 3600 * 3 ))}" # seconds
## screener_logdir or logdir
@ -250,6 +258,11 @@ Options:
# from $screener_logdir/*/ when this period is exceeded.
screener_log_purge_period="${screener_log_purge_period:-30}" # Days
## screener_log_purge_archive
# When set, the logfiles will be moved to $screener_logdir/archive/
# Otherwise they will be deleted.
screener_log_purge_archive="${screener_log_purge_archive:-1}"
## dry_run
# Dont actually start screen sessions when set.
dry_run="${dry_run:-0}"
@ -296,6 +309,21 @@ Options:
# of a failed command.
serious_status="${serious_status:-198}"
## interrupted_status
# This is the "magic" exit code indicating a manual interruption
# (e.g. keypress Ctl-c)
interrupted_status="${interrupted_status:-190}"
## illegal_status
# This is the "magic" exit code indicating an illegal command
# (e.g. syntax error, illegal arguments, etc)
illegal_status="${illegal_status:-191}"
## timeouted_status
# This is the "magic" internal code indicating a
# hanging session (see $session_timeout).
timeouted_status="${timeouted_status:-195}"
## less_cmd
# Used at $0 less $id
less_cmd="${less_cmd:-less -r}"
@ -326,6 +354,11 @@ Options:
export user_name="${user_name:-$(ssh-add -l | grep -o '[^ ]+@[^ ]+' | sort -u | tail -1)}"
export user_name="${user_name:-$LOGNAME}"
## screener_break_timeout
# Avoid deadlocks by breaking a screener lock after this timeout has elapsed.
# NOTICE: these type of locks are only intended for short-term locking.
screener_break_timeout="${screener_break_timeout:-30}" # seconds
## tmp_dir and tmp_stub
# Where temporary files are residing
tmp_dir="${tmp_dir:-/tmp}"

View File

@ -12,11 +12,15 @@ Synopsis:
./screener.sh --help [--verbose]
./screener.sh list-running
./screener.sh list-waiting
./screener.sh list-interrupted
./screener.sh list-illegal
./screener.sh list-timeouted
./screener.sh list-failed
./screener.sh list-critical
./screener.sh list-serious
./screener.sh list-done
./screener.sh list
./screener.sh list-archive
./screener.sh list-screens
./screener.sh run <file.csv> [<condition_list>]
./screener.sh start <screen_id> <cmd> <args...>
@ -161,6 +165,9 @@ Cleanup / bookkeeping:
./screener.sh clear-critical <screen_id>
./screener.sh clear-serious <screen_id>
./screener.sh clear-interrupted <screen_id>
./screener.sh clear-illegal <screen_id>
./screener.sh clear-timeouted <screen_id>
./screener.sh clear-failed <screen_id>
Mark the status as "done" and move the logfile away.

@ -1 +1 @@
Subproject commit 5364ccf7e376e509ff8decb207cd0a8a96d29e22
Subproject commit ac9cdb926c9355f3fedc50b5ae4b4cc06276165b

View File

@ -364,7 +364,6 @@ const char *rot_keys[] = {
"err-replay-stop",
// from _check_logging_status()
"inf-replay-tolerance",
"err-replay-size",
NULL,
};
@ -3504,8 +3503,6 @@ int _check_logging_status(struct mars_rotate *rot, int *log_nr, long long *oldpo
*newpos = rot->aio_info.current_size;
if (unlikely(rot->aio_info.current_size < *oldpos_start)) {
MARS_ERR_TO(rot->log_say, "oops, bad replay position attempted at logfile '%s' (file length %lld should never be smaller than requested position %lld, is your filesystem corrupted?) => please repair this by hand\n", rot->aio_dent->d_path, rot->aio_info.current_size, *oldpos_start);
make_rot_msg(rot, "err-replay-size", "oops, bad replay position attempted at logfile '%s' (file length %lld should never be smaller than requested position %lld, is your filesystem corrupted?) => please repair this by hand", rot->aio_dent->d_path, rot->aio_info.current_size, *oldpos_start);
status = -EBADF;
goto done;
}

View File

@ -1485,14 +1485,22 @@ sub detect_splitbrain {
# dynamic programming
return $detected_splits{$res} if defined($detected_splits{$res});
my $basedir = "$mars/resource-$res";
my $retry = 2;
my $ok = 1;
my @list = glob("$mars/resource-$res/replay-*");
my @hosts = map { $_ =~ s:.*/replay-::; $_ } @list;
AGAIN:
foreach my $host1 (@hosts) {
foreach my $host2 (@hosts) {
next if $host1 ge $host2;
my ($point, $split, $size1, $size2) = get_common_ancestor($basedir, $host1, $host2);
if ($split) {
# Workaround races.
if ($retry > 0) {
$retry--;
sleep(2);
next AGAIN;
}
$ok = 0;
if ($do_report) {
my $age = "";
@ -1508,8 +1516,10 @@ sub detect_splitbrain {
$age = " age ~" . seconds2human(mars_time() - $stamp) if $stamp;
}
lwarn "SPLIT BRAIN of resource '$res' after logfile '$point'$age\n";
lwarn " hostA = '$host1' logfile_amount='$size1' (" . number2human($size1) . ")\n";
lwarn " hostB = '$host2' logfile_amount='$size2' (" . number2human($size2) . ")\n";
if ($point) {
lwarn " hostA = '$host1' logfile_amount='$size1' (" . number2human($size1) . ")\n";
lwarn " hostB = '$host2' logfile_amount='$size2' (" . number2human($size2) . ")\n";
}
} else {
return $ok;
}
@ -2516,7 +2526,7 @@ sub create_res {
mkdir("$resdir/actual-$host");
my $todo = "$resdir/todo-$host";
mkdir($todo);
set_link("1", "$todo/attach");
set_link("0", "$todo/attach");
set_link("1", "$todo/connect");
set_link("1", "$todo/sync");
set_link("1", "$todo/allow-replay");
@ -2549,8 +2559,17 @@ sub create_res {
set_link("0", "$resdir/syncstatus-$host");
finish_links();
rsync_cmd($primary, "--max-size=1 --update $file $primary:$mars/resource-$res/", 1);
# Re-read the primary replaylink because it might have log-rotated in the meantime
wait_cluster($cmd, $res, $primary);
my $replay = get_link("$resdir/replay-$primary");
if ($replay =~ m/^log-([0-9]+)-/) {
$replay_nr = $1;
_set_replaylink($resdir, $replay_nr, $primary, "");
}
lprint "successfully joined resource '$res'\n";
}
set_link("1", "$todo/attach");
finish_links();
_systemd_trigger($cmd);
}
@ -4130,12 +4149,21 @@ sub eval_fn {
my $peer = parse_macro($arg1, $env);
$peer = $$env{"host"} unless $peer;
my $replay = get_link($$env{"resdir"} . "/replay-$peer", 1);
$replay =~ m/^(log-[^,]+),/;
$replay =~ m/^(log-[^,]+),([0-9]*)/;
my $logfile = $$env{"resdir"} . "/" . $1;
if (-r $logfile) {
return 0;
my $logpos = $2;
if (! -r $logfile) {
return 1;
}
return 1;
my @stat = stat($logfile);
if (!@stat) {
return 1;
}
my $size= $stat[7];
if ($size < $logpos) {
return 1;
}
return 0;
}
if (/^is[-_]?(almost[-_]?)?consistent$/) {
my $almost = $1;