mirror of
https://github.com/schoebel/mars
synced 2025-01-20 06:20:52 +00:00
10335 lines
319 KiB
Perl
Executable File
10335 lines
319 KiB
Perl
Executable File
#!/usr/bin/perl -w
|
|
#
|
|
# MARS Long Distance Replication Software
|
|
#
|
|
# This file is part of MARS project: http://schoebel.github.io/mars/
|
|
#
|
|
# Copyright (C) 2010-2014 Thomas Schoebel-Theuer
|
|
# Copyright (C) 2011-2014 1&1 Internet AG
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
use strict;
|
|
use English;
|
|
use warnings;
|
|
|
|
umask 0077;
|
|
|
|
##################################################################
|
|
|
|
# global defaults
|
|
|
|
my $parallel = -999;
|
|
my $single_step = 0;
|
|
my $inject_phase = 0;
|
|
my $compat_deletions = 1;
|
|
my $compat_alivelinks = 1;
|
|
my $threshold = 10 * 1024 * 1024;
|
|
my $window = 60;
|
|
my $keep_backups = 24 * 7;
|
|
my $verbose = 0;
|
|
my $max_deletions = 512;
|
|
my $thresh_logfiles = 10;
|
|
my $thresh_logsize = 5; # GB
|
|
my $dry_run = 0;
|
|
|
|
# All paths should be overridable from outside
|
|
my $etc_marsadm = $ENV{ETC_MARSADM} ?
|
|
$ENV{ETC_MARSADM} :
|
|
"/etc/marsadm";
|
|
|
|
my @MARS_PATH = $ENV{MARS_PATH} ?
|
|
split(/:/, $ENV{MARS_PATH}) :
|
|
(
|
|
".",
|
|
defined($ENV{HOME}) ? "$ENV{HOME}/.marsadm" : "",
|
|
$etc_marsadm,
|
|
"/usr/lib/marsadm",
|
|
"/usr/local/lib/marsadm",
|
|
);
|
|
|
|
##################################################################
|
|
|
|
# messaging
|
|
|
|
my %skip_res;
|
|
my $error_count = 0;
|
|
my $notify = "";
|
|
my $child_prefix = "";
|
|
my $logger = "/usr/bin/logger";
|
|
|
|
sub llog {
|
|
my ($text) = @_;
|
|
if ($notify) {
|
|
$text =~ s/"/\\"/g;
|
|
system("$logger -t marsadm \"$notify $text\"");
|
|
}
|
|
}
|
|
|
|
sub lprint {
|
|
my ($text) = @_;
|
|
$OUTPUT_AUTOFLUSH = 1;
|
|
print $child_prefix . $text;
|
|
llog($text);
|
|
}
|
|
|
|
sub lprint_stderr {
|
|
my ($text) = @_;
|
|
$OUTPUT_AUTOFLUSH = 1;
|
|
print STDERR $child_prefix . $text;
|
|
llog($text);
|
|
}
|
|
|
|
sub lskip {
|
|
my ($res, $text) = @_;
|
|
# Already set %skip_res here, so it doesn't count as error
|
|
$skip_res{$res} = 1;
|
|
lprint_stderr "SKIPPING: $text";
|
|
# trigger an exception
|
|
die "\n";
|
|
}
|
|
|
|
sub ldie {
|
|
my ($text) = @_;
|
|
if ($verbose > 2) {
|
|
my $i = 1;
|
|
for (;;) {
|
|
my ($package, $filename, $line, $subroutine, $hasargs, $wantarray, $evaltext, $is_require, $hints, $bitmask, $hinthash) = caller($i++);
|
|
last unless defined($subroutine);
|
|
lprint_stderr "$line:$subroutine\n";
|
|
}
|
|
}
|
|
$error_count++;
|
|
lprint_stderr "DYING: $text";
|
|
llog("DYING: $text");
|
|
die "\n";
|
|
}
|
|
|
|
sub lwarn {
|
|
my ($text) = @_;
|
|
lprint_stderr "WARNING: $text";
|
|
llog("WARNING: $text");
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# basic helpers
|
|
|
|
sub lamport_glob {
|
|
my $expr = shift;
|
|
my @result =();
|
|
my @raw_glob = glob($expr);
|
|
foreach my $path (@raw_glob) {
|
|
my $val = readlink($path);
|
|
next if (defined($val) && $val eq ".deleted");
|
|
push @result, $path;
|
|
}
|
|
return @result;
|
|
}
|
|
|
|
sub safe_creat {
|
|
my $path = shift;
|
|
if (-l $path) {
|
|
my $val = readlink($path);
|
|
unlink($path) if $val eq ".deleted";
|
|
}
|
|
my $fh = undef;
|
|
use IO::Handle;
|
|
use Fcntl;
|
|
my $status = sysopen($fh, $path, O_CREAT | O_EXCL | O_NOFOLLOW | O_RDONLY);
|
|
close($fh);
|
|
return (defined($status) && $status);
|
|
}
|
|
|
|
sub safe_touch {
|
|
my ($path, $stamp) = @_;
|
|
# Workaround for non-implemented undef parameter at utime()
|
|
# [should be implemented beyond perl 5.8.0 according to "man perlfunc",
|
|
# but seems to not work as documented.]
|
|
$stamp = time() unless defined($stamp);
|
|
use IO::Handle;
|
|
if (-l $path) {
|
|
my $val = readlink($path);
|
|
return 0 if $val eq ".deleted";
|
|
# Perl doesn't seem to support AT_SYMLINK_NOFOLLOW
|
|
my $opt = $stamp ? "-d \"\@$stamp\"" : "";
|
|
my $status = system("touch -h $opt \"$path\"");
|
|
# additional systemd-trigger when relevant
|
|
if ($path =~ m:/(primary|systemd):p) {
|
|
my $trig_path = "$PREMATCH/systemd-trigger";
|
|
unless ($trig_path =~ m:/userspace/:) {
|
|
utime($stamp, $stamp, $trig_path) || safe_creat($trig_path);
|
|
}
|
|
}
|
|
return $status;
|
|
}
|
|
my $status = utime($stamp, $stamp, $path);
|
|
return $status;
|
|
}
|
|
|
|
sub link_exists {
|
|
my $path = shift;
|
|
return 0 unless -l $path;
|
|
my $val = readlink($path);
|
|
return 0 unless defined($val);
|
|
return 0 if $val eq ".deleted";
|
|
return 1;
|
|
}
|
|
|
|
sub file_exists {
|
|
my $path = shift;
|
|
if (-l $path) {
|
|
my $val = readlink($path);
|
|
return 0 if $val eq ".deleted";
|
|
}
|
|
return 1 if -f $path;
|
|
return 0;
|
|
}
|
|
|
|
sub any_exists {
|
|
my $path = shift;
|
|
return 1 if link_exists($path);
|
|
return 1 if file_exists($path);
|
|
return 0;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# global variables
|
|
|
|
my $Id = '$Id$ ';
|
|
my $user_version = 0.1;
|
|
my $marsadm_version = 2.9; # some rough hint at newer features
|
|
my $mars = "/mars";
|
|
my $host = `uname -n` or ldie "cannot determine my network node name\n";
|
|
chomp $host;
|
|
check_id($host);
|
|
my $real_host = $host;
|
|
my $backup_dir = "$mars/backups-" . time();
|
|
my $force = 0;
|
|
my $ignore_sync = 0;
|
|
my $cron_mode = 0;
|
|
my $timeout = 600;
|
|
my $phase_nr = 0;
|
|
my $ssh_port = 22;
|
|
my $ssh_opts = "-A -o StrictHostKeyChecking=no -o ConnectTimeout=5";
|
|
my $ssh_probe = "uname -a";
|
|
my $rsync_opts = "-av --exclude='.deleted-*'";
|
|
my $systemd_enabled = 1;
|
|
my $kernel_version = 0;
|
|
my $kernel_features_version = -1;
|
|
my $kernel_strategy_version = -1;
|
|
my $kernel_flags_version = ~0x0;
|
|
|
|
my %known_ips;
|
|
|
|
##################################################################
|
|
|
|
# general helpers
|
|
|
|
my $allow_fail_action = undef;
|
|
|
|
# Failure compensation.
|
|
# This is called when do_one_res() fails
|
|
sub fail_action {
|
|
my ($cmd, $res) = @_;
|
|
lprint "FAILURE cmd='$cmd' res='$res'\n";
|
|
$error_count++;
|
|
return unless $allow_fail_action;
|
|
lprint "===== BEGIN FAILURE COMPENSATION cmd='$cmd' res='$res'\n";
|
|
# prevent recursive ldie
|
|
eval {
|
|
&$allow_fail_action(@_);
|
|
my $sub_status = $?;
|
|
lprint "COMPENSATION sub_status='$sub_status'\n";
|
|
1;
|
|
} or
|
|
(
|
|
lwarn "DOUBLE FAILURE detected.\n"
|
|
);
|
|
lprint "===== END FAILURE COMPENSATION cmd='$cmd' res='$res'\n";
|
|
}
|
|
|
|
sub _device_name {
|
|
my ($res, $peer) = @_;
|
|
$peer = $host unless defined($peer);
|
|
my $name = get_link("$mars/resource-$res/device-$peer", 1);
|
|
$name = $res if (!$name || $name eq "(none)");
|
|
return $name;
|
|
}
|
|
|
|
sub device_name {
|
|
my $name = _device_name(@_);
|
|
$name = "/dev/mars/$name" if (defined($name) && $name);
|
|
return $name;
|
|
}
|
|
|
|
sub device_exists {
|
|
my ($res, $peer) = @_;
|
|
$peer = $host unless defined($peer);
|
|
return 0 if $peer eq "(none)";
|
|
if ($peer eq $real_host) {
|
|
# Silent fallback to local detection for old kernel module versions
|
|
my $buildtag = get_alive_link("buildtag", $peer, 1);
|
|
if (!$buildtag) {
|
|
# VERY old MARS modules dont report their version
|
|
$buildtag = `cut -d' ' -f1 < /proc/sys/mars/version`;
|
|
# Sometimes "never touch a running system" is a BAD strategy...
|
|
lwarn "Please upgrade your EXTREMELY OLD module version '$buildtag'\n" if $buildtag;
|
|
}
|
|
if (defined($buildtag) && $buildtag =~ m/([0-9]+)($|\()/ && $1 <= 97) {
|
|
my $name = device_name($res, $peer);
|
|
my $val = (-b $name) ? 1 : 0;
|
|
return $val;
|
|
}
|
|
}
|
|
my $lnk = "$mars/resource-$res/actual-$peer/if-on";
|
|
my $val = get_link($lnk, 2);
|
|
# backwards compatibility to old kernels
|
|
my $lnk_old = "$mars/resource-$res/actual-$peer/device-$peer";
|
|
my $val_old = get_link($lnk_old, 2);
|
|
if (defined($val_old) && $val_old ne "" &&
|
|
(!defined($val) || $val eq "" ||
|
|
get_link_stamp($lnk_old) > get_link_stamp($lnk))) {
|
|
$val = $val_old;
|
|
}
|
|
if (!defined($val) || $val eq "") {
|
|
# Fallback to local device
|
|
my $name = device_name($res, $peer);
|
|
if ($peer eq $real_host) {
|
|
lwarn "Unexpected fallback to local device detection $name for $peer\n";
|
|
$val = (-b $name) ? 1 : 0;
|
|
} else {
|
|
lwarn "Cannot determine device presence for $peer\n";
|
|
}
|
|
}
|
|
return $val;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# ssh helpers
|
|
# deprecated, to disappear in a future release
|
|
|
|
my %ssh_ips;
|
|
my %ssh_ports;
|
|
|
|
sub make_ssh_cmd {
|
|
my ($peer, $no_login) = @_;
|
|
return "" unless $ssh_port;
|
|
my $ssh = "ssh $ssh_opts";
|
|
if (!defined($ssh_ports{$peer})) {
|
|
my $port;
|
|
my $real_peer;
|
|
my $peer_ip = get_link("$mars/ips/ip-$peer", 2);
|
|
# workaround firewall hell at some installations...
|
|
RETRY:
|
|
for my $this_peer ($peer, $peer_ip) {
|
|
next unless defined($this_peer);
|
|
next unless $this_peer;
|
|
# check whether machine is reachable
|
|
if (system("ping -c1 $this_peer")) {
|
|
lwarn "cannot ping '$this_peer'\n";
|
|
next;
|
|
}
|
|
# first try given parameters, then port 22, then ssh_config defaults
|
|
my %seen = ();
|
|
foreach my $try_port ($ssh_port, 22, 0) {
|
|
next if defined($seen{$try_port});
|
|
$seen{$try_port} = 1;
|
|
if ($try_port && system("if which nc; then nc -v -4 -w 5 \"$this_peer\" $try_port < /dev/null > /dev/null; fi")) {
|
|
lwarn "SSH port '$try_port' for '$this_peer' is not reachable\n";
|
|
next;
|
|
}
|
|
my $opt_port = $try_port ? "-p $try_port" : "";
|
|
if (!system("$ssh $opt_port root\@$this_peer \"$ssh_probe\"")) {
|
|
($real_peer, $port) = ($this_peer, $try_port);
|
|
last RETRY;
|
|
}
|
|
lwarn "SSH to '$this_peer' does not work on port '$try_port'\n";
|
|
}
|
|
}
|
|
ldie "SSH to '$peer' does not work\n" unless defined($port);
|
|
$ssh_ips{$peer} = $real_peer;
|
|
$ssh_ports{$peer} = $port;
|
|
}
|
|
my $real_peer = $ssh_ips{$peer};
|
|
$real_peer = $peer if !defined($real_peer);
|
|
my $port = $ssh_ports{$peer};
|
|
$ssh .= " -p $port" if $port > 0;
|
|
return ($real_peer, $port, $ssh) if defined($no_login);
|
|
return "$ssh root\@$real_peer"
|
|
}
|
|
|
|
sub ssh_cmd {
|
|
my ($peer, $cmd, $no_fail) = @_;
|
|
return 1 unless $ssh_port;
|
|
if ($peer ne $real_host) {
|
|
$cmd =~ s/'/\'/g;
|
|
$cmd = make_ssh_cmd($peer) . " '$cmd'";
|
|
} else {
|
|
$cmd =~ s/'/\'/g;
|
|
$cmd = "bash -c '$cmd'";
|
|
}
|
|
ldie "empty command\n" unless $cmd;
|
|
my $status = system($cmd);
|
|
if ($status and !defined($no_fail)) {
|
|
ldie "SSH to '$peer' command '$cmd' failed\n";
|
|
}
|
|
return $status;
|
|
}
|
|
|
|
sub rsync_cmd {
|
|
my ($peer, $cmd, $no_fail) = @_;
|
|
return 1 unless $ssh_port;
|
|
my ($real_peer, $port, $ssh) = make_ssh_cmd($peer, 1);
|
|
$cmd =~ s/(?<![-a-zA-Z0-9_])$peer(?![-a-zA-Z0-9_])/$real_peer/g if (defined($real_peer) && $real_peer && $real_peer ne $peer);
|
|
my $rsync_cmd = "rsync -e '$ssh' $rsync_opts $cmd";
|
|
if ($dry_run) {
|
|
lprint "DRY_RUN: would execute the following command:\n$rsync_cmd\n";
|
|
$rsync_cmd .= " --dry-run";
|
|
}
|
|
finish_links();
|
|
# Repeat the action because some symlinks might be updated (or even vanish)
|
|
# when the peer is actively running. There seems to exist no rsync option
|
|
# for ignoring all of these errors.
|
|
my $cycle = 0;
|
|
my $status = 0;
|
|
do {
|
|
$status = system($rsync_cmd);
|
|
} while ($status != 0 && $status != 24 && $cycle++ < 3);
|
|
ldie "Cannot get remote symlink tree via rsync, status=$status\n" if (!defined($no_fail) && $status && $status != 24);
|
|
return $status;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# external hook scripts
|
|
|
|
sub _try_exec {
|
|
my ($path, $cmd, $args, $abort) = @_;
|
|
return unless -x $path;
|
|
lprint "RUNNING HOOK $path $cmd $args\n" if $verbose;
|
|
unless ($dry_run) {
|
|
my $status = system("$path $cmd $args");
|
|
lprint "HOOK status=$status\n" if $verbose;
|
|
if ($status) {
|
|
lwarn "HOOK $path $cmd $args\nFAILURE $status\n";
|
|
ldie if $abort;
|
|
}
|
|
}
|
|
}
|
|
|
|
sub _try_exec_dir {
|
|
my ($subdir, $cmd, $args, $abort) = @_;
|
|
return unless -d $subdir;
|
|
foreach my $sub_path (lamport_glob("$subdir/*")) {
|
|
next if $sub_path =~ m:~$:;
|
|
_try_exec($sub_path, $cmd, $args, $abort);
|
|
}
|
|
}
|
|
|
|
sub call_hook {
|
|
my $abort = shift;
|
|
my $type = shift;
|
|
my $cmd = shift;
|
|
my $args = join(" ", @_);
|
|
|
|
my $pattern = "hook-$type";
|
|
foreach my $dir (@MARS_PATH) {
|
|
-d $dir || next;
|
|
_try_exec("$dir/$pattern-$cmd", $cmd, $args, $abort);
|
|
_try_exec("$dir/$pattern", $cmd, $args, $abort);
|
|
_try_exec_dir("$dir/$pattern-$cmd.d", $cmd, $args, $abort);
|
|
_try_exec_dir("$dir/$pattern.d", $cmd, $args, $abort);
|
|
}
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# syntactic scanning / parsing
|
|
|
|
my $match_comment = qr'#[^\n]*|//\h[^\n]*|/\*(?:[^*]|\*[^/])*\*/|\\\n\s*';
|
|
my $match_nobrace = qr'(?:[^{}\\]|\\.)*'s;
|
|
my $match_inner = $match_nobrace;
|
|
my $match_brace = qr"\{$match_inner\}"s;
|
|
for (my $i = 0; $i < 20; $i++) {
|
|
$match_inner = qr"$match_nobrace(?:$match_brace$match_nobrace)*"s;
|
|
$match_brace = qr"\{$match_inner\}"s;
|
|
}
|
|
my $match_fn_head = qr"\%([^\\\s{}()\[\]]*)(?=\{)"s;
|
|
my $match_fn = qr"$match_fn_head(?:\{($match_inner)\})"s;
|
|
|
|
# keep this in sync with mars.h definition of MREF_FLAGS
|
|
my %STATUS_FLAGS =
|
|
(
|
|
"UPTODATE" => 0x00000001,
|
|
"READING" => 0x00000002,
|
|
"WRITING" => 0x00000004,
|
|
"WRITE" => 0x00000008,
|
|
"MAY_WRITE" => 0x00000010,
|
|
"SKIP_SYNC" => 0x00000020,
|
|
"NODATA" => 0x00000040,
|
|
);
|
|
|
|
my %FEATURES_FLAGS =
|
|
(
|
|
"CHKSUM_MD5_OLD" => 0x00010000,
|
|
"CHKSUM_MD5" => 0x00020000,
|
|
"CHKSUM_CRC32C" => 0x00040000,
|
|
"CHKSUM_CRC32" => 0x00080000,
|
|
"CHKSUM_SHA1" => 0x00100000,
|
|
|
|
"COMPRESS_LZO" => 0x01000000,
|
|
"COMPRESS_LZ4" => 0x02000000,
|
|
"COMPRESS_ZLIB" => 0x04000000,
|
|
);
|
|
|
|
my %FLAGS_FEATURES;
|
|
|
|
my $chksum_features = 0x0;
|
|
my $compress_features = 0x0;
|
|
|
|
sub featuresflags2txt {
|
|
my ($flags, $skip_unknown) = @_;
|
|
$flags = 0x0 unless (defined($flags) && $flags ne "");
|
|
$flags = hex($flags) if $flags =~ m/^0x/;
|
|
my $txt = "";
|
|
for (my $mask = 0x1; $mask < (1 << 32); $mask <<= 1) {
|
|
my $bit = $flags & $mask;
|
|
next unless $bit;
|
|
if (defined($FLAGS_FEATURES{$bit})) {
|
|
$txt .= "|" if $txt;
|
|
$txt .= $FLAGS_FEATURES{$bit};
|
|
} elsif (!defined($skip_unknown)) {
|
|
$txt .= "|" if $txt;
|
|
$txt .= sprintf("0x%x", $bit);
|
|
}
|
|
}
|
|
return $txt;
|
|
}
|
|
|
|
sub txt2featuresflags {
|
|
my ($cmd, $txt) = @_;
|
|
my $flags = 0x0;
|
|
foreach my $this_txt (split(/\s*[|]\s*/, $txt)) {
|
|
chomp $this_txt;
|
|
$this_txt =~ s/^\s+//;
|
|
ldie "Digest names must start with 'CHKSUM_'" if ($cmd =~ m/digest/ && $this_txt !~ m/^CHKSUM/);
|
|
if (defined($FEATURES_FLAGS{$this_txt})) {
|
|
$flags |= $FEATURES_FLAGS{$this_txt};
|
|
} elsif ($this_txt =~ m/^0x[0-9a-f]+/) {
|
|
$flags |= hex($this_txt);
|
|
} elsif ($this_txt =~ m/^0[0-9]+/) {
|
|
$flags |= oct($this_txt);
|
|
} elsif ($this_txt =~ m/^[0-9]+$/) {
|
|
$flags |= $this_txt;
|
|
} else {
|
|
ldie "Unknown symbolic feature flag '$this_txt'\n";
|
|
}
|
|
}
|
|
return sprintf("0x%08x", $flags);
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# Resource lists and their peers
|
|
|
|
# 2-dimensional caches for cartesian product
|
|
|
|
my %total_resources;
|
|
my %member_resources;
|
|
my %guest_resources;
|
|
my %any_resources;
|
|
|
|
my %total_peers;
|
|
my %member_peers;
|
|
my %guest_peers;
|
|
my %any_peers;
|
|
|
|
my $cache_dir = "$mars/cache-$real_host";
|
|
|
|
sub __read_cache {
|
|
my ($filename, $hash) = @_;
|
|
open(my $fh, "<", $filename) or return 0;
|
|
while (my $line = <$fh>) {
|
|
chomp $line;
|
|
my ($key1, $key2, $val) = split(" ", $line, 3);
|
|
$$hash{$key1}{$key2} = $val;
|
|
}
|
|
close($fh);
|
|
lprint "READ CACHE $filename\n" if $verbose >= 2;
|
|
return 1;
|
|
}
|
|
|
|
sub _read_cache {
|
|
return 0 if $kernel_strategy_version < 4;
|
|
my $inval_path = "$cache_dir/invalid";
|
|
if (-l $inval_path) {
|
|
_reset_resources();
|
|
return 0;
|
|
}
|
|
my $ok =
|
|
__read_cache("$cache_dir/total_resources.cache", \%total_resources) &&
|
|
__read_cache("$cache_dir/member_resources.cache", \%member_resources) &&
|
|
__read_cache("$cache_dir/guest_resources.cache", \%guest_resources) &&
|
|
__read_cache("$cache_dir/any_resources.cache", \%any_resources) &&
|
|
__read_cache("$cache_dir/total_peers.cache", \%total_peers) &&
|
|
__read_cache("$cache_dir/member_peers.cache", \%member_peers) &&
|
|
__read_cache("$cache_dir/guest_peers.cache", \%guest_peers) &&
|
|
__read_cache("$cache_dir/any_peers.cache", \%any_peers);
|
|
if (!$ok) {
|
|
_reset_resources();
|
|
}
|
|
return $ok;
|
|
}
|
|
|
|
sub __write_cache {
|
|
my ($filename, $hash) = @_;
|
|
my $tmpname = "$filename.tmp.$$";
|
|
local $OFS = " ";
|
|
local $ORS = "\n";
|
|
open(my $fh, ">", $tmpname) or return 0;
|
|
foreach my $key1 (keys(%$hash)) {
|
|
my $sub_hash = $$hash{$key1};
|
|
foreach my $key2 (keys(%$sub_hash)) {
|
|
my $val = $$sub_hash{$key2};
|
|
if (!print $fh ($key1, $key2, $val)) {
|
|
unlink($tmpname);
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
if (!close($fh)) {
|
|
unlink($tmpname);
|
|
return 0;
|
|
}
|
|
rename($tmpname, $filename);
|
|
return 1;
|
|
}
|
|
|
|
sub _write_cache {
|
|
mkdir($cache_dir);
|
|
my $ok =
|
|
__write_cache("$cache_dir/total_resources.cache", \%total_resources) &&
|
|
__write_cache("$cache_dir/member_resources.cache", \%member_resources) &&
|
|
__write_cache("$cache_dir/guest_resources.cache", \%guest_resources) &&
|
|
__write_cache("$cache_dir/any_resources.cache", \%any_resources) &&
|
|
__write_cache("$cache_dir/total_peers.cache", \%total_peers) &&
|
|
__write_cache("$cache_dir/member_peers.cache", \%member_peers) &&
|
|
__write_cache("$cache_dir/guest_peers.cache", \%guest_peers) &&
|
|
__write_cache("$cache_dir/any_peers.cache", \%any_peers);
|
|
if (!$ok) {
|
|
_reset_resources();
|
|
}
|
|
return $ok;
|
|
}
|
|
|
|
sub _scan_caches {
|
|
return if %total_peers;
|
|
return if _read_cache();
|
|
# Reset all 2-dimensional hashes
|
|
%total_resources = ();
|
|
%member_resources = ();
|
|
%guest_resources = ();
|
|
%any_resources = ();
|
|
%total_peers = ();
|
|
%member_peers = ();
|
|
%guest_peers = ();
|
|
%any_peers = ();
|
|
my $ips_glob = "$mars/ips/ip-*";
|
|
my $resource_glob = "$mars/resource-*";
|
|
my $active_glob = "$mars/resource-*/{device,data,replay}-*";
|
|
my $probe_dir = "$mars/probe-$real_host";
|
|
if (-d $probe_dir) {
|
|
$ips_glob .= " $probe_dir$ips_glob";
|
|
$resource_glob .= " $probe_dir$resource_glob";
|
|
$active_glob .= " $probe_dir$active_glob";
|
|
}
|
|
# Add all known hosts to %total_peers but _not_ to %any_peers.
|
|
# Reason: some hosts might not be member/guest of any resource
|
|
foreach my $path (lamport_glob($ips_glob)) {
|
|
$path =~ m:/ip-(.*):;
|
|
my $this_peer = $1;
|
|
$total_peers{$this_peer} = {};
|
|
}
|
|
# Add all known resources to %total_resources but _not_ to %any_resources.
|
|
# Reason: some resources might exist but have no members / guests.
|
|
foreach my $path (lamport_glob($resource_glob)) {
|
|
$path =~ m:/resource-(.*):;
|
|
my $this_res = $1;
|
|
$total_resources{$this_res} = {};
|
|
}
|
|
# Now we look at all relevant combinations between resources and hosts
|
|
my @total_paths = lamport_glob($active_glob);
|
|
my %paths;
|
|
foreach my $path (@total_paths) {
|
|
# %paths hash: make $probe_dir variants globally unique
|
|
$path =~ s/^$probe_dir//;
|
|
$paths{$path} = 1;
|
|
}
|
|
foreach my $path (@total_paths) {
|
|
next unless $path =~ m:/resource-([^/]+?)/[a-z]+-(.+):;
|
|
my $this_res = $1;
|
|
my $this_peer = $2;
|
|
# dynamic programming
|
|
next if $total_resources{$this_res}{$this_peer};
|
|
# remember result combinations
|
|
$total_resources{$this_res}{$this_peer} = 1;
|
|
$total_peers{$this_peer}{$this_res} = 1;
|
|
my $is_any = $paths{"$mars/resource-$this_res/device-$this_peer"};
|
|
if ($is_any) {
|
|
$any_resources{$this_res}{$this_peer} = 1;
|
|
$any_peers{$this_peer}{$this_res} = 1;
|
|
}
|
|
my $is_member =
|
|
$paths{"$mars/resource-$this_res/data-$this_peer"} ||
|
|
$paths{"$mars/resource-$this_res/replay-$this_peer"};
|
|
if ($is_member) {
|
|
$member_resources{$this_res}{$this_peer} = 1;
|
|
$member_peers{$this_peer}{$this_res} = 1;
|
|
next;
|
|
}
|
|
my $is_guest =
|
|
$is_any &&
|
|
get_link("$mars/resource-$this_res/actual-$this_peer/prosumer-on", 2) ||
|
|
get_link("$mars/resource-$this_res/todo-$this_peer/exports", 2) =~ m:(^|\+)$this_peer($|\+):;
|
|
if ($is_guest) {
|
|
$guest_resources{$this_res}{$this_peer} = 1;
|
|
$guest_peers{$this_peer}{$this_res} = 1;
|
|
next;
|
|
}
|
|
# Notice: _candidates_ for guests are over here.
|
|
# They can be determined by set_minus(%any_peers,%member_peers)
|
|
}
|
|
if ($verbose) {
|
|
lprint "====== found " .
|
|
scalar(keys(%total_peers)) . " total and " .
|
|
scalar(keys(%member_peers)) . " participating and " .
|
|
scalar(keys(%guest_peers)) . " guest " .
|
|
"peers\n";
|
|
lprint "====== found" .
|
|
scalar(keys(%total_resources)) . " total and " .
|
|
scalar(keys(%member_resources)) . " participating and " .
|
|
scalar(keys(%guest_resources)) . " guest " .
|
|
"resources\n";
|
|
}
|
|
if (!_write_cache()) {
|
|
lwarn "cannot write peer cache\n";
|
|
}
|
|
}
|
|
|
|
sub _reset_resources {
|
|
system("rm -rf $cache_dir/*.cache");
|
|
%total_peers = ();
|
|
}
|
|
|
|
sub is_member {
|
|
my ($res, $peer) = @_;
|
|
_scan_caches() unless %total_peers;
|
|
return $member_resources{$res}{$peer};
|
|
}
|
|
|
|
sub is_guest {
|
|
my ($res, $peer) = @_;
|
|
_scan_caches() unless %total_peers;
|
|
return $guest_resources{$res}{$peer};
|
|
}
|
|
|
|
sub is_any {
|
|
my ($res, $peer) = @_;
|
|
_scan_caches() unless %total_peers;
|
|
return $any_resources{$res}{$peer};
|
|
}
|
|
|
|
|
|
sub alphanum_cmp {
|
|
my ($aa, $bb) = ($a, $b);
|
|
$aa =~ s/([0-9]+)/sprintf("%012d",$1)/eg;
|
|
$bb =~ s/([0-9]+)/sprintf("%012d",$1)/eg;
|
|
return $aa cmp $bb;
|
|
}
|
|
|
|
sub reverse_cmp {
|
|
my ($aa, $bb) = ($b, $a);
|
|
$aa =~ s/([0-9]+)/sprintf("%012d",$1)/eg;
|
|
$bb =~ s/([0-9]+)/sprintf("%012d",$1)/eg;
|
|
return $aa cmp $bb;
|
|
}
|
|
|
|
sub get_total_resources {
|
|
my $peer = shift;
|
|
_scan_caches() unless %total_peers;
|
|
if ($peer) {
|
|
my $projection = $total_peers{$peer};
|
|
return sort alphanum_cmp keys(%$projection);
|
|
} else {
|
|
return sort alphanum_cmp keys(%total_resources);
|
|
}
|
|
}
|
|
|
|
sub get_member_resources {
|
|
my $peer = shift;
|
|
_scan_caches() unless %total_peers;
|
|
if ($peer) {
|
|
my $projection = $member_peers{$peer};
|
|
return sort alphanum_cmp keys(%$projection);
|
|
} else {
|
|
return sort alphanum_cmp keys(%member_resources);
|
|
}
|
|
}
|
|
|
|
sub get_guest_resources {
|
|
my $peer = shift;
|
|
_scan_caches() unless %total_peers;
|
|
if ($peer) {
|
|
my $projection = $guest_peers{$peer};
|
|
return sort alphanum_cmp keys(%$projection);
|
|
} else {
|
|
return sort alphanum_cmp keys(%guest_resources);
|
|
}
|
|
}
|
|
|
|
sub get_any_resources {
|
|
my $peer = shift;
|
|
_scan_caches() unless %total_peers;
|
|
if ($peer) {
|
|
my $projection = $any_peers{$peer};
|
|
return sort alphanum_cmp keys(%$projection);
|
|
} else {
|
|
return sort alphanum_cmp keys(%any_resources);
|
|
}
|
|
}
|
|
|
|
sub get_total_peers {
|
|
my $res = shift;
|
|
_scan_caches() unless %total_peers;
|
|
if ($res) {
|
|
my $projection = $total_resources{$res};
|
|
return sort alphanum_cmp keys(%$projection);
|
|
} else {
|
|
return sort alphanum_cmp keys(%total_peers);
|
|
}
|
|
}
|
|
|
|
sub get_member_peers {
|
|
my $res = shift;
|
|
_scan_caches() unless %total_peers;
|
|
if ($res) {
|
|
my $projection = $member_resources{$res};
|
|
return sort alphanum_cmp keys(%$projection);
|
|
} else {
|
|
return sort alphanum_cmp keys(%member_peers);
|
|
}
|
|
}
|
|
|
|
sub get_guest_peers {
|
|
my $res = shift;
|
|
_scan_caches() unless %total_peers;
|
|
if ($res) {
|
|
my $projection = $guest_resources{$res};
|
|
return sort alphanum_cmp keys(%$projection);
|
|
} else {
|
|
return sort alphanum_cmp keys(%guest_peers);
|
|
}
|
|
}
|
|
|
|
sub get_any_peers {
|
|
my $res = shift;
|
|
_scan_caches() unless %total_peers;
|
|
if ($res) {
|
|
my $projection = $any_resources{$res};
|
|
return sort alphanum_cmp keys(%$projection);
|
|
} else {
|
|
return sort alphanum_cmp keys(%any_peers);
|
|
}
|
|
}
|
|
|
|
sub key_intersect {
|
|
my ($hash1, $hash2) = @_;
|
|
my %h1 = %$hash1;
|
|
my %h2 = %$hash2;
|
|
my %result = %h1;
|
|
foreach my $key (keys(%h2)) {
|
|
delete $result{$key};
|
|
}
|
|
return %result;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# HACK for 1&1 specific cm3 detection, to disappear (hopefully)
|
|
|
|
my $systemd_recursive_call = defined($ENV{SYSTEMD_RECURSIVE_CALL}) ?
|
|
$ENV{SYSTEMD_RECURSIVE_CALL} :
|
|
"clustermanagerd|nodeagent\\.real";
|
|
my $cm3_checked = 0;
|
|
my $cm3_recursive = 0;
|
|
|
|
sub is_called_recursive {
|
|
if (!$cm3_checked) {
|
|
$cm3_checked = 1;
|
|
my $ppid = getppid();
|
|
if ($ppid == 1) {
|
|
return 1;
|
|
}
|
|
my $tree = `/usr/bin/pstree $ppid`;
|
|
my $recursive_call = ($tree && $tree =~ m{$systemd_recursive_call});
|
|
$cm3_recursive = $recursive_call;
|
|
}
|
|
return $cm3_recursive;
|
|
}
|
|
|
|
sub _systemd_enabled {
|
|
if (!$cm3_checked && is_called_recursive()) {
|
|
$systemd_enabled = 0;
|
|
}
|
|
return $systemd_enabled;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# dynamic systemd control
|
|
|
|
my $systemd_subdir = defined($ENV{SYSTEMD_SUBDIR}) ? $ENV{SYSTEMD_SUBDIR} : "systemd-templates";
|
|
|
|
my $generated_scripts_subdir = defined($ENV{SYSTEMD_SCRIPTS_SUBDIR}) ?
|
|
$ENV{SYSTEMD_SCRIPTS_SUBDIR} :
|
|
"systemd-generated-scripts";
|
|
|
|
my $predefined_unit_path = "/etc/systemd/system,/run/systemd/system,/usr/lib/systemd/system";
|
|
|
|
my $systemd_system_dirs =
|
|
# prefer the "offical" systemd path as documented in "man systemd.unit"
|
|
defined($ENV{SYSTEMD_UNIT_PATH}) ?
|
|
join(",", split(":", $ENV{SYSTEMD_UNIT_PATH})) .
|
|
(
|
|
# ending in ":" means to append the predefined path (as documented)
|
|
$ENV{SYSTEMD_UNIT_PATH} =~ m/:$/ ?
|
|
$predefined_unit_path :
|
|
""
|
|
) :
|
|
# allow mars-specific overrides
|
|
defined($ENV{SYSTEMD_SYSTEM_DIRS}) ?
|
|
$ENV{SYSTEMD_SYSTEM_DIRS} :
|
|
$predefined_unit_path;
|
|
|
|
my $systemd_target_dir = defined($ENV{SYSTEMD_TARGET_DIR}) ? $ENV{SYSTEMD_TARGET_DIR} : "/run/systemd/system";
|
|
|
|
my $systemd_suffixes = defined($ENV{SYSTEMD_SUFFIXES}) ?
|
|
$ENV{SYSTEMD_SUFFIXES} :
|
|
"service,socket,device,mount,automount,swap,target,path,timer,slice,scope,script";
|
|
|
|
# see man systemd.special
|
|
my $systemd_predefined = defined($ENV{SYSTEMD_PREDEFINED}) ?
|
|
$ENV{SYSTEMD_PREDEFINED} :
|
|
"basic.target,bluetooth.target,cryptsetup-pre.target,cryptsetup.target,ctrl-alt-del.target," .
|
|
"default.target,emergency.target,exit.target,final.target,getty.target,graphical.target," .
|
|
"halt.target,hibernate.target,hybrid-sleep.target,initrd-fs.target,initrd-root-device.target," .
|
|
"initrd-root-fs.target,kbrequest.target,kexec.target,local-fs-pre.target,local-fs.target," .
|
|
"machines.target multi-user.target,network-online.target,network-pre.target,network.target," .
|
|
"nss-lookup.target,nss-user-lookup.target,paths.target,poweroff.target,printer.target," .
|
|
"reboot.target,remote-cryptsetup.target,remote-fs-pre.target,remote-fs.target,rescue.target," .
|
|
"rpcbind.target,runlevel2.target,runlevel3.target,runlevel4.target,runlevel5.target," .
|
|
"shutdown.target,sigpwr.target,sleep.target,slices.target,smartcard.target,sockets.target," .
|
|
"sound.target,suspend.target,swap.target,sysinit.target,syslog.socket,system-update.target," .
|
|
"time-sync.target,timers.target,umount.target," .
|
|
"system.slice,user.slice,machine.slice," .
|
|
"dbus.service,dbus.socket,display-manager.service,system-update-cleanup.service";
|
|
|
|
my %predefined_unit;
|
|
foreach my $name (split(",", $systemd_predefined)) {
|
|
$predefined_unit{$name} = 1;
|
|
}
|
|
|
|
my $systemctl = defined($ENV{SYSTEMCTL}) ? $ENV{SYSTEMCTL} : "systemctl";
|
|
my $systemd_escape = defined($ENV{SYSTEMD_ESCAPE}) ? $ENV{SYSTEMD_ESCAPE} : "@";
|
|
my $systemd_incape = defined($ENV{SYSTEMD_INCAPE}) ? $ENV{SYSTEMD_INCAPE} : "\\^";
|
|
my $systemd_dependencies = defined($ENV{SYSTEMD_DEPENDENCIES}) ?
|
|
$ENV{SYSTEMD_DEPENDENCIES} :
|
|
"Unit|Service|Slice|Sockets|Requires|Requisite|Wants|BindsTo|PartOf|Conflicts|Before|After|OnFailure|PropagatesReloadTo|ReloadPropagatedFrom|JoinsNamespaceOf|RequiresMountsFor|Alias|WantedBy|RequiredBy|Also|DefaultInstance|# ALSO";
|
|
my $systemd_lock_file = defined($ENV{SYSTEMD_LOCK_FILE}) ? $ENV{SYSTEMD_LOCK_FILE} : "/tmp/systemd.lock";
|
|
|
|
my %recursive_locks;
|
|
my $lock_fh;
|
|
|
|
sub systemd_lock {
|
|
my ($suffix, $try_lock) = @_;
|
|
my $lock_file = $systemd_lock_file;
|
|
$lock_file .= "." . $suffix if defined($suffix) && $suffix;
|
|
my $lock_status = $recursive_locks{$lock_file}++;
|
|
if ($lock_status) {
|
|
return 0;
|
|
}
|
|
lprint "TRYING '$lock_file'\n" if $verbose > 1;
|
|
use IO::Handle;
|
|
use Fcntl;
|
|
my $max_time = $timeout > 0 ? $timeout : 30;
|
|
my $count = 0;
|
|
retry:
|
|
for (;;) {
|
|
my $test_pid = 1;
|
|
if (open(my $IN, "<", $lock_file)) {
|
|
my $pid_exists = 0;
|
|
my @pid_list = ();
|
|
while ($test_pid = <$IN>) {
|
|
chomp $test_pid;
|
|
next unless $test_pid;
|
|
if (-d "/proc/$test_pid") {
|
|
$pid_exists++;
|
|
} else {
|
|
push @pid_list, $test_pid;
|
|
}
|
|
}
|
|
if (!$pid_exists) {
|
|
# race prevention: wait until situation is stable
|
|
if ($count-- >= -3) {
|
|
sleep(1);
|
|
goto retry;
|
|
}
|
|
unlink($lock_file);
|
|
lwarn "breaking lock $lock_file, pids {" .
|
|
join(",", @pid_list) .
|
|
"} are no longer alive.\n";
|
|
$count = 0;
|
|
goto retry;
|
|
}
|
|
my $mtime = get_stamp($IN);
|
|
close($IN);
|
|
# Check for timeout
|
|
if ($count > $max_time ||
|
|
(defined($mtime) && $mtime && $mtime + $max_time < time())) {
|
|
lwarn "breaking lock $lock_file after $max_time seconds\n";
|
|
unlink($lock_file);
|
|
$count = 0;
|
|
}
|
|
}
|
|
$lock_fh = undef;
|
|
my $status = sysopen($lock_fh, $lock_file, O_CREAT|O_EXCL|O_TRUNC|O_WRONLY);
|
|
last if defined($status) && $status;
|
|
if (defined($try_lock) && $try_lock && !$force) {
|
|
lprint "FAILED '$lock_file'\n" if $verbose > 1;
|
|
return 1;
|
|
}
|
|
$count++;
|
|
sleep(1);
|
|
}
|
|
print $lock_fh "$$\n";
|
|
$lock_fh->flush();
|
|
lprint "LOCK '$lock_file'\n" if $verbose > 1;
|
|
return 0;
|
|
}
|
|
|
|
sub systemd_unlock {
|
|
my ($suffix) = @_;
|
|
close($lock_fh) if defined($lock_fh);
|
|
$lock_fh = undef;
|
|
my $lock_file = $systemd_lock_file;
|
|
$lock_file .= "." . $suffix if defined($suffix) && $suffix;
|
|
if (--$recursive_locks{$lock_file} > 0) {
|
|
return;
|
|
}
|
|
unlink($lock_file);
|
|
lprint "UNLOCK '$lock_file'\n" if $verbose > 1;
|
|
}
|
|
|
|
my %template_names;
|
|
my %template_files;
|
|
my $nr_templates = 0;
|
|
my $template_max_stamp = 0;
|
|
|
|
sub get_template_files {
|
|
if (!%template_files) {
|
|
foreach my $dir (@MARS_PATH) {
|
|
my $subdir = "$dir/$systemd_subdir";
|
|
$subdir = $dir unless -d $subdir;
|
|
next unless -d $subdir;
|
|
lprint "==== scanning template directory '$subdir'\n" if $verbose;
|
|
foreach my $template_file (lamport_glob("$subdir/*.{$systemd_suffixes}")) {
|
|
my $template_name = $template_file;
|
|
$template_name =~ s:^.*/::;
|
|
next unless $template_name;
|
|
# Only the first hit will win when the same template is in multiple dirs.
|
|
next if defined($template_files{$template_name});
|
|
$template_names{$template_file} = $template_name;
|
|
$template_files{$template_name} = $template_file;
|
|
lprint "== found template '$template_name' at '$template_file'\n" if $verbose > 1;
|
|
my $mtime = get_stamp($template_file);
|
|
$template_max_stamp = $mtime if $mtime > $template_max_stamp;
|
|
$nr_templates++;
|
|
}
|
|
}
|
|
}
|
|
lprint "==== found $nr_templates templates\n" if $verbose;
|
|
return sort alphanum_cmp keys(%template_names);
|
|
}
|
|
|
|
my $stable_pid;
|
|
|
|
sub get_instance_files {
|
|
my ($dir) = @_;
|
|
$stable_pid = $$ unless $stable_pid;
|
|
my $glob = "$dir/{*.{$systemd_suffixes},.pre.$stable_pid.*.{$systemd_suffixes}.tmp}";
|
|
my %instance_files;
|
|
foreach my $instance_file (lamport_glob($glob)) {
|
|
my ($mtime, $text) = _get_file($instance_file);
|
|
next unless $text =~ m/^\#\#\# GENERATED FROM: (.+)$/m;
|
|
my $template_file = $1;
|
|
my $instance_name = $instance_file;
|
|
$instance_name =~ s:^.*/::;
|
|
$instance_files{$instance_name} = [$instance_file, $mtime, $template_file];
|
|
}
|
|
return %instance_files;
|
|
}
|
|
|
|
my %systemd_names;
|
|
my %systemd_files;
|
|
|
|
sub get_systemd_files {
|
|
if (!%systemd_names) {
|
|
foreach my $systemd_file (lamport_glob("{$systemd_system_dirs}/*.{$systemd_suffixes}")) {
|
|
next if $systemd_file =~ m:$systemd_target_dir:;
|
|
next if $systemd_file =~ m:$etc_marsadm/$generated_scripts_subdir:;
|
|
my $systemd_name = $systemd_file;
|
|
$systemd_name =~ s:^.*/::;
|
|
$systemd_names{$systemd_file} = $systemd_name;
|
|
$systemd_files{$systemd_name} = $systemd_file;
|
|
}
|
|
}
|
|
return sort alphanum_cmp keys(%systemd_names);
|
|
}
|
|
|
|
sub systemctl {
|
|
my ($args, $verb) = @_;
|
|
$verb = $verbose unless defined($verb);
|
|
my $cmd = "$systemctl $args";
|
|
systemd_lock();
|
|
lprint "executing: '$cmd'\n" if $verb > 1;
|
|
my $status;
|
|
eval {
|
|
$status = system($cmd);
|
|
};
|
|
systemd_unlock();
|
|
return $status;
|
|
}
|
|
|
|
my %failed;
|
|
|
|
sub _systemd_escape {
|
|
my ($txt) = @_;
|
|
my $replac = `systemd-escape --path "$txt"`;
|
|
chomp $replac;
|
|
return $replac;
|
|
}
|
|
|
|
sub subst_systemd_vars {
|
|
my ($env, $text) = @_;
|
|
my $parsed = "";
|
|
while ($text =~ m/[$systemd_escape]([A-Za-z_][-A-Za-z0-9_]*)?[{]($match_inner)[}]/ps) {
|
|
my $name = $1 || "";
|
|
my $body = $2;
|
|
$parsed .= $PREMATCH;
|
|
my $rest = $POSTMATCH;
|
|
my $this_escape = 0;
|
|
my $replac;
|
|
$_ = $name;
|
|
PRE_SWITCH: {
|
|
if (/^escvar$/) {
|
|
$name = "";
|
|
$this_escape = 1;
|
|
last PRE_SWITCH;
|
|
}
|
|
if (/^esc$/) {
|
|
$name = "verbatim";
|
|
$this_escape = 1;
|
|
last PRE_SWITCH;
|
|
}
|
|
}
|
|
$_ = $name;
|
|
SWITCH: {
|
|
if (/^eval$/) {
|
|
$replac = parse_macro($body, $env);
|
|
last SWITCH;
|
|
}
|
|
if (/^$/) {
|
|
my $varname = parse_macro($body, $env);
|
|
$replac = $$env{$varname};
|
|
if (!defined($replac)) {
|
|
lwarn "variable '$varname' is undefined\n" unless defined($failed{$varname});
|
|
$failed{$varname} = 1;
|
|
$replac = "UNDEFINED($varname)";
|
|
}
|
|
lprint " subst '$systemd_escape\{$varname\}' => '$replac'\n" if $verbose > 9;
|
|
last SWITCH;
|
|
}
|
|
if (/^verbatim$/) {
|
|
$replac = $body;
|
|
last SWITCH;
|
|
}
|
|
lwarn "systemd function '$name' is undefined\n";
|
|
$replac = $body;
|
|
}
|
|
if ($this_escape) {
|
|
my $orig = $replac;
|
|
$replac = _systemd_escape($replac);
|
|
lprint " escape '$orig' => '$replac'\n" if $verbose > 9;
|
|
}
|
|
$parsed .= $replac;
|
|
$text = $rest;
|
|
}
|
|
return ($env, $parsed . $text);
|
|
}
|
|
|
|
sub match_systemd_vars {
|
|
my ($env, $pattern, $text) = @_;
|
|
($env, $pattern) = subst_systemd_vars($env, $pattern);
|
|
($env, $text) = subst_systemd_vars($env, $text);
|
|
my @names;
|
|
my $regex = "";
|
|
while ($pattern =~ m/[$systemd_incape][{]([A-Za-z_][A-Za-z0-9_]*)[}]/ps) {
|
|
my $name = $1;
|
|
my $pre = $PREMATCH;
|
|
my $post = $POSTMATCH;
|
|
push @names, $name;
|
|
$regex .= $pre . "(.+)";
|
|
$pattern = $post;
|
|
}
|
|
$regex .= $pattern;
|
|
$regex =~ s/\\/\\\\/g;
|
|
my $nr = 1;
|
|
if ($text =~ m/^$regex$/p) {
|
|
foreach my $name (@names) {
|
|
my $val;
|
|
eval "\$val = \$$nr;";
|
|
$$env{$name} = $val;
|
|
$nr++;
|
|
}
|
|
return ($env, $text);
|
|
}
|
|
return (undef, $text);
|
|
}
|
|
|
|
sub _make_var_name {
|
|
my ($file_name) = @_;
|
|
chomp $file_name;
|
|
$file_name =~ s:^.*/::;
|
|
my $abs_dir = $systemd_target_dir;
|
|
if ($file_name =~ m/\.script$/) {
|
|
$abs_dir = "$etc_marsadm/$generated_scripts_subdir";
|
|
}
|
|
mkdir $abs_dir;
|
|
$stable_pid = $$ unless $stable_pid;
|
|
my $res_file = "$abs_dir/$file_name";
|
|
my $pre_file = "$abs_dir/.pre.$stable_pid.$file_name.tmp";
|
|
my $tmp_file = "$abs_dir/.tmp.$stable_pid.$file_name.tmp";
|
|
return ($res_file, $pre_file, $tmp_file);
|
|
}
|
|
|
|
sub _get_file {
|
|
my ($filename) = @_;
|
|
my $IN;
|
|
local $/; # slurp
|
|
if (!open($IN, "<", $filename)) {
|
|
lwarn "cannot open file '$filename' ($!)\n";
|
|
return (0, "");
|
|
}
|
|
my $mtime = get_stamp($IN);
|
|
my $text = <$IN>;
|
|
close($IN);
|
|
return ($mtime, $text);
|
|
}
|
|
|
|
my %referenced_units;
|
|
my %shortcut_units;
|
|
|
|
sub _scan_refs {
|
|
my ($text) = @_;
|
|
while ($text =~ s/^[#][#][#]\s*REF:\s*(.*)//m) {
|
|
my $next_unit = $1;
|
|
# Remember the encountered name
|
|
$referenced_units{$next_unit} = 1;
|
|
$text = $POSTMATCH;
|
|
}
|
|
}
|
|
|
|
sub _instantiate_systemd_unit {
|
|
my ($env, $template_file, $out_name) = @_;
|
|
my ($res_file, $pre_file, $tmp_file) = _make_var_name($out_name);
|
|
lprint "==== Translate systemd template '$template_file' => '$res_file'\n" if $verbose;
|
|
my ($mtime, $text) = _get_file($template_file);
|
|
if (!$text) {
|
|
lwarn "cannot get template '$template_file'\n";
|
|
return (0, "");
|
|
}
|
|
# Check timestamps
|
|
my $old_text;
|
|
my $old_mtime;
|
|
my $IN;
|
|
if (open($IN, "<", $res_file)) {
|
|
$old_mtime = get_stamp($IN);
|
|
local $/; # slurp
|
|
my $old_text = <$IN>;
|
|
close($IN);
|
|
if ($old_mtime && $old_mtime == $mtime) {
|
|
lprint "== systemd unit '$res_file' has unchanged mtime=$mtime\n" if $verbose;
|
|
$shortcut_units{$res_file} = 1;
|
|
_scan_refs($old_text);
|
|
return (1, $res_file);
|
|
}
|
|
lprint "== systemd unit '$res_file' mtime $old_mtime => $mtime\n" if $verbose;
|
|
}
|
|
my %this_references;
|
|
my $header;
|
|
$header = "### GENERATED FROM: $template_file\n";
|
|
$header .= "### GENERATED NAME: $out_name\n";
|
|
$header .= "### GENERATED TO: $res_file\n";
|
|
$header .= "### TEMPLATE MTIME: $mtime\n";
|
|
$header .= "###\n";
|
|
($env, $text) = subst_systemd_vars($env, $text);
|
|
my $scan = $text;
|
|
while ($scan =~ m/^\s*($systemd_dependencies)\s*=\s*(.*?)$/mp) {
|
|
my $next_unit_list = $2;
|
|
$scan = $POSTMATCH;
|
|
while ($next_unit_list =~ m/[^\s]+/) {
|
|
my $next_unit = $MATCH;
|
|
$next_unit_list = $POSTMATCH;
|
|
# some units like mount units may be specified as paths.
|
|
$next_unit = _systemd_escape($next_unit) if $next_unit =~ m:/:;
|
|
next if $this_references{$next_unit};
|
|
$this_references{$next_unit} = 1;
|
|
lprint "-- '$template_file' found reference to '$next_unit'\n" if $verbose > 2;
|
|
# Remember the encountered name
|
|
$referenced_units{$next_unit} = 1;
|
|
$header .= "### REF: $next_unit\n";
|
|
}
|
|
}
|
|
$header .= "###\n";
|
|
if ($text =~ m/^([#][!].+)/p) {
|
|
my $hash_bang = $1;
|
|
$text = $hash_bang . "\n" . $header . $POSTMATCH;
|
|
} else {
|
|
$text = $header . $text;
|
|
}
|
|
# new $text is finished, write when necessary.
|
|
if ($old_text && $old_text eq $text) {
|
|
lprint "== systemd unit '$res_file' has not changed\n" if $verbose;
|
|
$shortcut_units{$res_file} = 1;
|
|
_scan_refs($text);
|
|
return (1, $res_file);
|
|
}
|
|
if (!open(OUT, ">", $tmp_file)) {
|
|
lwarn "cannot create '$tmp_file'\n";
|
|
unlink($tmp_file);
|
|
return (0, "");
|
|
}
|
|
unless (print OUT $text) {
|
|
lwarn "cannot write '$tmp_file'\n";
|
|
close(OUT);
|
|
unlink($tmp_file);
|
|
return (0, "");
|
|
}
|
|
unless (close(OUT)) {
|
|
lwarn "cannot close '$tmp_file'\n";
|
|
unlink($tmp_file);
|
|
return (0, "");
|
|
}
|
|
# I would like to use 0400 instead, but this leads to masses of warnings like
|
|
# Configuration file /run/systemd/system/daemon-reload.service is marked world-inaccessible.
|
|
# This has no effect as configuration data is accessible via APIs without restrictions.
|
|
# Proceeding anyway.
|
|
my $perm = 0444;
|
|
if ($res_file =~ m/\.script$/) {
|
|
$perm = 0544;
|
|
}
|
|
chmod($perm, $tmp_file);
|
|
utime($mtime, $mtime, $tmp_file);
|
|
unless (rename($tmp_file, $pre_file)) {
|
|
lwarn "cannot rename '$tmp_file' to '$pre_file'\n";
|
|
unlink($tmp_file);
|
|
return (0, "");
|
|
}
|
|
return (1, $res_file);
|
|
}
|
|
|
|
my %generated_units;
|
|
|
|
sub make_systemd_unit {
|
|
my ($cmd, $res, $target, $force_generate) = @_;
|
|
return 0 if $predefined_unit{$target};
|
|
# dynamic programming
|
|
$res = "" unless defined($res);
|
|
my $gen_key = "$cmd.$res.$target";
|
|
if (defined($generated_units{$gen_key})) {
|
|
lprint "systemd unit '$gen_key' already generated\n" if $verbose;
|
|
return $generated_units{$gen_key};
|
|
}
|
|
my @res_list;
|
|
if ($res) {
|
|
@res_list = ($res);
|
|
} else {
|
|
@res_list = get_any_resources($host);
|
|
}
|
|
my ($found_env, $found_template_file, $found_subst);
|
|
lprint "==== searching templates for '$target'\n" if $verbose;
|
|
foreach my $template_file (get_template_files()) {
|
|
my $template_name = $template_names{$template_file};
|
|
next unless $template_name;
|
|
# avoid exhaustive search
|
|
if ($template_name =~ m/^([A-Za-z0-9_.]+)/) {
|
|
my $prefix = $1;
|
|
next unless $target =~ m/^$prefix/;
|
|
}
|
|
foreach my $res (@res_list) {
|
|
($template_name, my $env) = make_env($cmd, $res, $template_name);
|
|
my $subst = $template_name;
|
|
(my $new_env, $subst) = match_systemd_vars($env, $template_name, $target);
|
|
if ($new_env) {
|
|
($found_env, $found_template_file, $found_subst) = ($new_env, $template_file, $subst);
|
|
goto found;
|
|
}
|
|
}
|
|
}
|
|
found:
|
|
if (!$found_template_file) {
|
|
foreach my $template_file (get_template_files()) {
|
|
my $template_name = $template_names{$template_file};
|
|
next unless $template_name;
|
|
foreach my $res (@res_list) {
|
|
($template_name, my $env) = make_env($cmd, $res, $template_name);
|
|
my $subst = $template_name;
|
|
(my $new_env, $subst) = match_systemd_vars($env, $template_name, $target);
|
|
if ($subst) {
|
|
# Check if already installed somewhere else
|
|
get_systemd_files() unless %systemd_files;
|
|
if (defined($systemd_files{$subst})) {
|
|
lprint "systemd unit '$subst' is already present at '$systemd_files{$subst}'\n" if $verbose;
|
|
$generated_units{$gen_key} = 0;
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
lwarn "cannot find any systemd template for target unit '$target'\n";
|
|
$generated_units{$gen_key} = 0;
|
|
return 0;
|
|
}
|
|
my ($out_env, $out_name) = subst_systemd_vars($found_env, $found_subst);
|
|
lprint "==== instantiating template '$found_template_file' as '$out_name'\n" if $verbose;
|
|
my ($nr, $file, $name) = _instantiate_systemd_unit($out_env, $found_template_file, $out_name);
|
|
$generated_units{$gen_key} = $nr;
|
|
return $nr;
|
|
}
|
|
|
|
sub set_systemd_want_phase1 {
|
|
my ($cmd, $res, $new) = @_;
|
|
if ($new ne "(none)") {
|
|
my $host_path = "$mars/ips/ip-$new";
|
|
unless (get_link($host_path, 1)) {
|
|
lwarn "The hostname '$new' does not exist.\n";
|
|
ldie "Refusing to set systemd-want.\n" unless $force;
|
|
}
|
|
}
|
|
my $unit_glob = "$mars/resource-$res/systemd-*-unit";
|
|
unless (lamport_glob($unit_glob)) {
|
|
lwarn "Resource '$res' has no configured systemd units.\n";
|
|
lwarn "First configure the resource with marsadm set-systemd-unit.\n";
|
|
ldie "Refusing to set systemd-want.\n" unless $force;
|
|
}
|
|
my $want_path = "$mars/resource-$res/systemd-want";
|
|
my $old = get_link($want_path, 1);
|
|
return 0 if ($old && $old eq $new);
|
|
set_link($new, $want_path);
|
|
finish_links();
|
|
return 0;
|
|
}
|
|
|
|
sub set_systemd_want_phase2 {
|
|
my ($cmd, $res) = @_;
|
|
_systemd_trigger($cmd);
|
|
return 0;
|
|
}
|
|
|
|
sub set_systemd_want_phase3 {
|
|
my ($cmd, $res) = @_;
|
|
_trigger(3);
|
|
my $want_path = "$mars/resource-$res/systemd-want";
|
|
my $new = get_link($want_path, 1);
|
|
if ($force) {
|
|
lprint "De/activation of '$want_path' -> '$new' is not checked due to --force\n";
|
|
return 0;
|
|
}
|
|
if ($new eq "(none)") {
|
|
my $fail = 0;
|
|
my $touch = 0;
|
|
foreach my $peer (get_any_peers($res)) {
|
|
my $open_count_path = "$mars/resource-$res/actual-$peer/open-count";
|
|
my $device_in_use = get_link($open_count_path, 1);
|
|
if ($device_in_use) {
|
|
my $name = _device_name($res, $peer);
|
|
lprint "Device '$name' is in use at '$peer'\n";
|
|
if ($peer eq $real_host) {
|
|
$fail = 1;
|
|
} else {
|
|
# mistrust unreachable peers
|
|
my ($dead_count, $alive_count, $unknown_count) = is_cluster_recent($cmd, $res, $peer);
|
|
if ($alive_count) {
|
|
$fail = 1;
|
|
} else {
|
|
lwarn "however peer '$peer' is not reachable, continuing for livelock avoidance.\n";
|
|
$touch++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if ($fail || $touch) {
|
|
lprint "Re-trigger $want_path\n";
|
|
safe_touch($want_path);
|
|
} else {
|
|
lprint "All peer devices are closed.\n";
|
|
}
|
|
return $fail;
|
|
}
|
|
my $name = _device_name($res, $new);
|
|
my $dev_present = device_exists($res, $new);
|
|
if (!$dev_present) {
|
|
lwarn "device '$name' is not present at '$new'\n";
|
|
return 0;
|
|
}
|
|
my $fail = 0;
|
|
my $open_count_path = "$mars/resource-$res/actual-$new/open-count";
|
|
my $device_in_use = get_link($open_count_path, 1);
|
|
if (!$device_in_use) {
|
|
lprint "Device '$name' not yet opened at '$new'\n";
|
|
safe_touch($want_path);
|
|
# mistrust unreachable peer
|
|
if ($new eq $real_host) {
|
|
$fail = 1;
|
|
} else {
|
|
my ($dead_count, $alive_count, $unknown_count) = is_cluster_recent($cmd, $res, $new);
|
|
if ($alive_count) {
|
|
$fail = 1;
|
|
} else {
|
|
lwarn "however peer '$new' is not reachable, continuing for livelock avoidance.\n";
|
|
}
|
|
}
|
|
} else {
|
|
lprint "Device '$name' is open at '$new'.\n";
|
|
}
|
|
return $fail;
|
|
}
|
|
|
|
sub systemd_present {
|
|
my ($cmd, $res) = @_;
|
|
return "" unless _systemd_enabled();
|
|
my $unit_glob = "$mars/resource-$res/systemd-*-unit";
|
|
return "" unless lamport_glob($unit_glob);
|
|
my $want_path = "$mars/resource-$res/systemd-want";
|
|
my $want = get_link($want_path, 2);
|
|
return $want;
|
|
}
|
|
|
|
sub get_systemd_want {
|
|
my $txt = systemd_present(@_);
|
|
lprint "$txt\n";
|
|
}
|
|
|
|
sub systemd_exists {
|
|
my ($unit_list) = @_;
|
|
return 0 unless _systemd_enabled();
|
|
foreach my $unit (split(/ +/, $unit_list)) {
|
|
my $check_cmd = "list-unit-files \"$unit\" | wc -l";
|
|
my $count = `$systemctl $check_cmd`;
|
|
if (!defined($count) || !$count || $count <= 0) {
|
|
lprint "nothing to do for systemd, unit file '$unit' does not exist.\n";
|
|
return 0;
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
sub systemd_unit_enabled {
|
|
my ($unit_list) = @_;
|
|
return 1 unless _systemd_enabled();
|
|
foreach my $unit (split(/ +/, $unit_list)) {
|
|
# .script is assumed as always enabled
|
|
next if $unit =~ m/\.script$/;
|
|
my $check_cmd = "is-enabled '$unit' > /dev/null 2>&1";
|
|
my $status = systemctl($check_cmd, 0);
|
|
if ($status) {
|
|
lprint "systemd unit '$unit' is not existing or not enabled.\n";
|
|
return $status;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
sub _check_unit_marker {
|
|
my ($file, $marker) = @_;
|
|
my ($mtime, $text) = _get_file($file);
|
|
my $found = ($text =~ m/^[#]\s*$marker/m);
|
|
return $found;
|
|
}
|
|
|
|
my %systemctl_pid = ();
|
|
|
|
sub _systemd_op_wait {
|
|
my ($kill_after_timeout) = @_;
|
|
return 0 unless %systemctl_pid;
|
|
my $errors = 0;
|
|
my $success = 0;
|
|
my $old_timeout = $timeout;
|
|
my $signal = "SIGTERM";
|
|
my $backoff = 1;
|
|
for (;;) {
|
|
my @wait_list = keys(%systemctl_pid);
|
|
last unless @wait_list;
|
|
foreach my $pid (@wait_list) {
|
|
use POSIX ":sys_wait_h";
|
|
my $check_pid = waitpid($pid, WNOHANG);
|
|
my $status = $?;
|
|
if ($status > 0) {
|
|
lwarn "UNIT CHILD $pid terminated with status=$status\n";
|
|
delete $systemctl_pid{$pid};
|
|
$backoff = 1;
|
|
$errors++;
|
|
} elsif ($check_pid == $pid) {
|
|
lprint_stderr "UNIT CHILD $pid terminated successfully\n";
|
|
delete $systemctl_pid{$pid};
|
|
$backoff = 1;
|
|
$success++;
|
|
}
|
|
}
|
|
last unless %systemctl_pid;
|
|
my $reached = sleep_timeout($backoff++, 1);
|
|
if ($reached && $kill_after_timeout) {
|
|
kill($signal, keys(%systemctl_pid));
|
|
$errors++;
|
|
}
|
|
last if $signal eq "SIGKILL"; # give up
|
|
$signal = "SIGKILL";
|
|
$backoff = 1;
|
|
$timeout = $old_timeout;
|
|
}
|
|
%systemctl_pid = ();
|
|
$timeout = $old_timeout;
|
|
if ($errors) {
|
|
lwarn "systemctl: there were were $success successes and $errors errors.\n";
|
|
} else {
|
|
lprint "There were $success successful systemctl operations.\n";
|
|
}
|
|
return $errors;
|
|
}
|
|
|
|
sub _systemd_op {
|
|
my ($op, $unit, $do_fork) = @_;
|
|
return 0 unless _systemd_enabled();
|
|
my $has_forked = 0;
|
|
if ($do_fork && !$child_prefix && $op =~ m/start|stop/) {
|
|
my $pid = fork();
|
|
if (defined($pid)) {
|
|
if ($pid) {
|
|
# parent
|
|
lprint_stderr "UNIT CHILD $pid: $op '$unit'\n";
|
|
$systemctl_pid{$pid} = "$op $unit";
|
|
return 0;
|
|
} else {
|
|
# child: simply continue
|
|
$child_prefix = "UNIT $op: ";
|
|
$has_forked = 1;
|
|
}
|
|
}
|
|
}
|
|
my $status = 0;
|
|
# special case: .script templates are to be executed directly
|
|
if ($unit =~ m/\.script$/) {
|
|
my $dir = "";
|
|
foreach my $test_dir ("$etc_marsadm/$generated_scripts_subdir",
|
|
split(",", $systemd_system_dirs)) {
|
|
next unless -x "$test_dir/$unit";
|
|
$dir = $test_dir;
|
|
last;
|
|
}
|
|
$dir .= "/" if $dir;
|
|
my $cmd = "'$dir$unit' $op";
|
|
lprint "--- running script: '$cmd'\n";
|
|
$status = system($cmd);
|
|
if ($status) {
|
|
lwarn "script '$cmd' failed, status=$status\n";
|
|
} else {
|
|
lprint "--- script status=$status\n";
|
|
}
|
|
goto done;
|
|
}
|
|
if (systemctl("cat '$unit' > /dev/null 2>&1", 0)) {
|
|
lwarn "systemd unit $unit does not exist.\n";
|
|
goto done;
|
|
}
|
|
my $ctl_cmd = "is-failed --quiet '$unit'";
|
|
my $fail_status = systemctl($ctl_cmd, 0);
|
|
if (!$fail_status) {
|
|
my $ctl_cmd = "reset-failed '$unit'";
|
|
$status = systemctl($ctl_cmd);
|
|
lprint "--- resetting failed unit '$unit': status=$status\n";
|
|
}
|
|
if ($op eq "start" || $op eq "restart") {
|
|
if (systemd_unit_enabled($unit)) {
|
|
goto done;
|
|
}
|
|
}
|
|
$ctl_cmd = "$op '$unit'";
|
|
lprint "--- running systemctl command: $ctl_cmd\n";
|
|
$status = systemctl($ctl_cmd);
|
|
if ($status) {
|
|
lwarn "command '$systemctl $ctl_cmd' failed, status=$status\n";
|
|
} else {
|
|
lprint "--- systemctl status=$status\n";
|
|
}
|
|
done:
|
|
# confine to 8bit
|
|
if ($status < 0 || $status > 255) {
|
|
$status = 255;
|
|
lprint "--- correcting status=$status\n";
|
|
}
|
|
exit($status) if $has_forked;
|
|
return $status;
|
|
}
|
|
|
|
sub report_systemd_status {
|
|
my ($cmd, $res, $status, $oper, $peer) = @_;
|
|
$peer = $host unless $peer;
|
|
my $response_path = "$mars/resource-$res/userspace/systemd-status-$oper-$peer";
|
|
# avoid useless path-triggers when nothing has changed
|
|
my $old_val = get_link($response_path, 1);
|
|
if ($old_val eq "" || $old_val != $status) {
|
|
set_link($status, $response_path);
|
|
}
|
|
}
|
|
|
|
sub systemd_activate {
|
|
my ($cmd, $res, $override, $fail_abort) = @_;
|
|
return 0 unless _systemd_enabled();
|
|
my $want_path = "$mars/resource-$res/systemd-want";
|
|
my $want = get_link($want_path, 2);
|
|
lprint "====== want '$want' for '$want_path'\n" if $verbose;
|
|
if (!$want) {
|
|
lprint "Nothing to (de)activate: $want_path does not exist\n" if $verbose;
|
|
return 0;
|
|
}
|
|
my $do_activate = $want eq $host;
|
|
if ($do_activate) {
|
|
# Check for device existence
|
|
if (!device_exists($res, $want)) {
|
|
my $name = device_name($res, $want);
|
|
lprint "==== device '$name' is not preset at '$want'\n" if $verbose;
|
|
$do_activate = 0;
|
|
}
|
|
}
|
|
if (defined($override)) {
|
|
if ($override != $do_activate) {
|
|
lprint "Overriding unit activate=$do_activate with $override\n" if $verbose;
|
|
$do_activate = $override;
|
|
}
|
|
} elsif ($do_activate) {
|
|
my $primary = _get_designated_primary($res);
|
|
if ($primary ne $host) {
|
|
# Do not activate for now
|
|
lprint "Overriding systemd-want: '$host' is not designated primary,\n";
|
|
lprint "==== Do not modify resource '$res'\n" if $verbose;
|
|
return 0;
|
|
}
|
|
}
|
|
my $oper = $do_activate ? "start" : "stop";
|
|
my $unit_path = "$mars/resource-$res/systemd-$oper-unit";
|
|
my $unit = get_link($unit_path, 2);
|
|
if (!$unit) {
|
|
lprint "Nothing to (de)activate: $unit_path does not exist\n" if $verbose;
|
|
return 0;
|
|
}
|
|
my $ctl_cmd = "show \"$unit\"";
|
|
my $op = "show";
|
|
if ($do_activate) {
|
|
$unit =~ s/ .*//;
|
|
if (systemd_unit_enabled($unit)) {
|
|
return 0;
|
|
}
|
|
lprint "==== Activate resource '$res' unit '$unit'\n" if $verbose;
|
|
$op = "start";
|
|
} else {
|
|
$unit =~ s/.* //;
|
|
lprint "==== Deactivate resource '$res' unit '$unit'\n" if $verbose;
|
|
$op = "stop";
|
|
}
|
|
my $status = _systemd_op($op, $unit, !$fail_abort);
|
|
report_systemd_status($cmd, $res, $status, $oper);
|
|
finish_links();
|
|
if ($status && defined($fail_abort) && $fail_abort) {
|
|
lwarn "systemd unit '$unit' operation $op failed, status=$status\n";
|
|
return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
sub __systemd_commit {
|
|
my ($work_dir, $do_delete) = @_;
|
|
lprint "==== Commit '$work_dir'\n" if $verbose;
|
|
# Internal destination code:
|
|
# -2 = needs stop + disable (e.g. deleted)
|
|
# -1 = needs disable, but no status change
|
|
# 0 = modified, no status change (for whatever reason)
|
|
# 1 = new, to enable, no start
|
|
# 2 = new, needs enable + start
|
|
# absent = no modification
|
|
my %changes;
|
|
my %files = get_instance_files($work_dir);
|
|
my %renames;
|
|
my %deletes;
|
|
my $need_reload = 0;
|
|
foreach my $target (sort alphanum_cmp keys(%files)) {
|
|
next if $shortcut_units{"$work_dir/$target"};
|
|
if ($target =~ m/^\.pre\.[0-9]+\.(.+?)\.tmp$/) {
|
|
my $old_target = $1;
|
|
my $new_target = $target;
|
|
my ($new_instance, $new_mtime, $new_template) = @{$files{$new_target}};
|
|
if (defined($files{$old_target})) {
|
|
lprint "-- '$old_target' is not new\n" if $verbose > 3;
|
|
my ($old_instance, $old_mtime, $old_template) = @{$files{$old_target}};
|
|
if ($old_mtime == $new_mtime) {
|
|
lprint "-- '$old_target' equal mtime=$new_mtime\n" if $verbose > 2;
|
|
$deletes{new_target} = 1;
|
|
next;
|
|
}
|
|
lprint "-- '$old_target' changed mtime from $old_mtime to $new_mtime\n" if $verbose;
|
|
}
|
|
$renames{$new_target} = $old_target;
|
|
if (_check_unit_marker($new_instance, "ALWAYS_DISABLED")) {
|
|
lprint "-- '$old_target' must remain disabled\n" if $verbose > 2;
|
|
$changes{$old_target} = -1;
|
|
next;
|
|
} elsif (_check_unit_marker($new_instance, "ALWAYS_START")) {
|
|
lprint "-- '$old_target' must be started\n" if $verbose > 2;
|
|
$changes{$old_target} = 2;
|
|
$need_reload++;
|
|
next;
|
|
} else {
|
|
lprint "-- '$old_target' will be enabled, but not started\n" if $verbose > 2;
|
|
$changes{$old_target} = 1;
|
|
$need_reload++;
|
|
}
|
|
next;
|
|
}
|
|
$stable_pid = $$ unless $stable_pid;
|
|
my $old_target = $target;
|
|
my $new_target = ".pre.$stable_pid.$old_target.tmp";
|
|
my ($old_instance, $old_mtime, $old_template) = @{$files{$old_target}};
|
|
if (!defined($files{$new_target})) {
|
|
if (!$do_delete) {
|
|
lprint "-- ignoring '$old_target'\n" if $verbose > 2;
|
|
next;
|
|
}
|
|
$deletes{$old_target} = 1;
|
|
if (_check_unit_marker($old_instance, "KEEP_RUNNING")) {
|
|
lprint "-- deleted '$old_target' is KEEP_RUNNING\n" if $verbose > 2;
|
|
$changes{$old_target} = -1;
|
|
next;
|
|
}
|
|
lprint "-- marking deleted '$old_target' for removal\n" if $verbose > 2;
|
|
$changes{$old_target} = -2;
|
|
$need_reload++;
|
|
next;
|
|
}
|
|
if (_check_unit_marker($old_instance, "ALWAYS_DISABLED")) {
|
|
lprint "-- '$old_target' is ALWAYS_DISABLED\n" if $verbose > 2;
|
|
$changes{$old_target} = -1;
|
|
next;
|
|
}
|
|
my ($new_instance, $new_mtime, $new_template) = @{$files{$new_target}};
|
|
my $ok = ($old_mtime == $new_mtime);
|
|
if ($ok) {
|
|
lprint "-- '$old_target' was not modified\n" if $verbose > 2;
|
|
next;
|
|
}
|
|
lprint "-- '$old_target' was modified\n" if $verbose > 2;
|
|
$changes{$old_target} = 0;
|
|
$need_reload++;
|
|
}
|
|
# Cleanup the old situation.
|
|
# This needs to be done in per-operation cycles,
|
|
# because there may be inter-unit dependencies.
|
|
lprint "==== Stopping old / deleted units\n" if $verbose;
|
|
foreach my $unit (sort alphanum_cmp keys(%changes)) {
|
|
my $op = $changes{$unit};
|
|
if ($op < -1) {
|
|
_systemd_op("stop", $unit);
|
|
}
|
|
}
|
|
lprint "==== Disabling old / deleted units\n" if $verbose;
|
|
foreach my $unit (sort alphanum_cmp keys(%changes)) {
|
|
my $op = $changes{$unit};
|
|
if ($op < 0) {
|
|
_systemd_op("disable", $unit);
|
|
}
|
|
}
|
|
# Commit
|
|
foreach my $file (keys(%deletes)) {
|
|
my $path = "$work_dir/$file";
|
|
lprint "--- unlink '$path'\n" if $verbose > 2;
|
|
unlink($path);
|
|
}
|
|
foreach my $src (keys(%renames)) {
|
|
my $dst = $renames{$src};
|
|
my $src_path = "$work_dir/$src";
|
|
my $dst_path = "$work_dir/$dst";
|
|
lprint "--- rename '$src_path' '$dst_path'\n" if $verbose > 2;
|
|
rename($src_path, $dst_path);
|
|
}
|
|
%generated_units = ();
|
|
# Tell the new situation to systemd.
|
|
# This needs to be done in per-operation cycles,
|
|
# because there may be inter-unit dependencies.
|
|
if ($need_reload) {
|
|
lprint "==== Restart systemd\n" if $verbose;
|
|
systemctl("daemon-reload");
|
|
}
|
|
lprint "==== Enabling new units\n" if $verbose;
|
|
foreach my $unit (sort alphanum_cmp keys(%changes)) {
|
|
my $op = $changes{$unit};
|
|
if ($op > 0) {
|
|
_systemd_op("enable", $unit);
|
|
}
|
|
}
|
|
lprint "==== Starting new units\n" if $verbose;
|
|
foreach my $unit (sort alphanum_cmp keys(%changes)) {
|
|
my $op = $changes{$unit};
|
|
if ($op > 1) {
|
|
_systemd_op("start", $unit, 1);
|
|
}
|
|
}
|
|
lprint "==== Done commit '$work_dir'\n" if $verbose;
|
|
}
|
|
|
|
sub systemd_commit {
|
|
my ($do_delete) = @_;
|
|
# We need separate target directories for templates and for scripts.
|
|
# Reason: /run does not allow script execution on many systems.
|
|
__systemd_commit($systemd_target_dir, $do_delete);
|
|
_systemd_op_wait();
|
|
my $script_dir = "$etc_marsadm/$generated_scripts_subdir";
|
|
__systemd_commit($script_dir, $do_delete);
|
|
_systemd_op_wait();
|
|
}
|
|
|
|
# THINK:
|
|
# Would per-resource incremental template generation be better?
|
|
# Yes, it would scale better, but would it be _correct_?
|
|
#
|
|
# The problem is that the macro processor is Turing-complete.
|
|
#
|
|
# Thus there might be non-trivial cross-dependencies between generated
|
|
# unit files. Potentially even cross-resource dependencies.
|
|
# These might for example depend on non-trivial if conditions,
|
|
# potentially even purely dynamic environment variables, or whatever.
|
|
#
|
|
# At the moment, the complete transitive closure is re-computed once
|
|
# a small detail has changed. This is on the safe side, but not optimal.
|
|
# There is certainly room for improvement. However be cautious
|
|
# with respect to correctness under all cirumstances.
|
|
#
|
|
# Knuth is cited: "I can do it in half the time if it doesn't have
|
|
# to be correct".
|
|
|
|
sub __systemd_generate_all {
|
|
my ($cmd, $res, $force_generate) = @_;
|
|
return unless -d $mars;
|
|
return unless -d $etc_marsadm;
|
|
mkdir($systemd_target_dir);
|
|
mkdir("$etc_marsadm/$generated_scripts_subdir");
|
|
lprint "Generate all templates for '$res'.\n";
|
|
# Determine all template files.
|
|
get_template_files();
|
|
# Always add all plain templates
|
|
my %done_units;
|
|
%referenced_units = ();
|
|
my $count = 0;
|
|
foreach my $template_name (sort alphanum_cmp keys(%template_files)) {
|
|
next if $template_name =~ m/($systemd_incape|$systemd_escape)/;
|
|
$count += make_systemd_unit($cmd, "UNDEFINED_RESOURCE", $template_name);
|
|
$done_units{$template_name} = 1;
|
|
}
|
|
# Determine all participating resource names.
|
|
my @res_list = get_any_resources($host);
|
|
# Create initial systemd units
|
|
foreach my $res (@res_list) {
|
|
foreach my $unit_link (lamport_glob("$mars/resource-$res/systemd-*-unit")) {
|
|
my $target = get_link($unit_link);
|
|
next unless $target;
|
|
$count += make_systemd_unit($cmd, $res, $target);
|
|
$done_units{$target} = 1;
|
|
}
|
|
}
|
|
# Compute the transitive closure of referenced units
|
|
lprint "== adding transitive units for $count start units.\n" if $verbose;
|
|
for (;;) {
|
|
my $old_count = $count;
|
|
foreach my $target (sort alphanum_cmp keys(%referenced_units)) {
|
|
next if $done_units{$target};
|
|
$count += make_systemd_unit($cmd, undef, $target);
|
|
$done_units{$target} = 1;
|
|
}
|
|
last if ($count <= $old_count);
|
|
}
|
|
lprint "== $count units generated.\n" if $verbose;
|
|
# Check and commit the new situation
|
|
systemd_commit();
|
|
}
|
|
|
|
sub __systemd_activate_ops {
|
|
my $cmd = shift;
|
|
# Barrier, for safety
|
|
_systemd_op_wait();
|
|
# Activate the listed units.
|
|
my @res_list = get_any_resources($host);
|
|
foreach my $res (@res_list) {
|
|
systemd_activate($cmd, $res);
|
|
}
|
|
_systemd_op_wait();
|
|
}
|
|
|
|
sub __systemd_trigger {
|
|
my $cmd = shift;
|
|
return unless _systemd_enabled();
|
|
# ensure that trigger file exists
|
|
mkdir("$mars/userspace") unless -d "$mars/userspace";
|
|
my $trigger = "$mars/userspace/systemd-trigger";
|
|
lprint "Triggering '$trigger' for '$cmd'\n" if $verbose;
|
|
safe_touch($trigger) || safe_creat($trigger);
|
|
}
|
|
|
|
sub _systemd_trigger {
|
|
my ($cmd, $force_generate) = @_;
|
|
systemd_lock();
|
|
lprint "Direct template generation\n" if $verbose;
|
|
# Continue with unlock in case of any deaths inbetween
|
|
eval {
|
|
__systemd_generate_all($cmd, "", $force_generate);
|
|
};
|
|
__systemd_activate_ops($cmd);
|
|
systemd_unlock();
|
|
}
|
|
|
|
sub systemd_trigger_extern {
|
|
my ($cmd, $res) = @_;
|
|
return unless -d $systemd_target_dir;
|
|
systemd_lock();
|
|
my $called_external = ($cmd =~ m/extern/);
|
|
my $failed_lock = systemd_lock(undef, $called_external);
|
|
if ($failed_lock) {
|
|
lwarn "another action is currently running, and $cmd was called externally\n";
|
|
return;
|
|
}
|
|
if ($called_external) {
|
|
$cm3_checked = 1;
|
|
$systemd_enabled = 1;
|
|
} elsif (is_called_recursive()) {
|
|
return 0;
|
|
}
|
|
my $force_generate = !$called_external;
|
|
# Continue with unlock in case of any deaths inbetween
|
|
eval {
|
|
__systemd_generate_all($cmd, $res, $force_generate);
|
|
};
|
|
__systemd_activate_ops($cmd);
|
|
systemd_unlock();
|
|
}
|
|
|
|
sub _get_systemd_unit {
|
|
my ($cmd, $res) = @_;
|
|
my $unit_start_path = "$mars/resource-$res/systemd-start-unit";
|
|
my $unit_stop_path = "$mars/resource-$res/systemd-stop-unit";
|
|
my $start_unit = get_link($unit_start_path, 2);
|
|
my $stop_unit = get_link($unit_stop_path, 2);
|
|
if (!$start_unit) {
|
|
lprint_stderr "Nothing to show: start unit $unit_start_path does not exist\n" if $verbose;
|
|
return "";
|
|
}
|
|
if (!$stop_unit) {
|
|
lwarn "Strange: stop unit $unit_stop_path does not exist\n";
|
|
return $start_unit;
|
|
}
|
|
return "$start_unit $stop_unit";
|
|
}
|
|
|
|
sub get_systemd_unit {
|
|
my $unit = _get_systemd_unit(@_);
|
|
lprint "$unit\n" if $unit;
|
|
}
|
|
|
|
sub _get_default_unit {
|
|
my ($cmd, $res, $marker) = @_;
|
|
my $found = "";
|
|
foreach my $template_file (get_template_files()) {
|
|
my $template_name = $template_names{$template_file};
|
|
next unless $template_name;
|
|
if (!_check_unit_marker($template_file, $marker)) {
|
|
next;
|
|
}
|
|
$found = $template_name;
|
|
my ($dummy, $start_env) = make_env($cmd, $res, $template_name);
|
|
my ($env, $subst) = subst_systemd_vars($start_env, $template_name);
|
|
$found = $subst if $subst;
|
|
last;
|
|
}
|
|
return $found;
|
|
}
|
|
|
|
sub set_systemd_unit_phase1 {
|
|
my ($cmd, $res, $start_name, $stop_name) = @_;
|
|
ldie "Start unit name is undefined\n" unless defined($start_name);
|
|
# typically, this matches to capital DEFAULT
|
|
my $default_regex = qr{^[A-Z][A-Z_0-9]*$};
|
|
my $use_default = "";
|
|
if ($start_name =~ m{$default_regex}) {
|
|
$use_default = $start_name;
|
|
my $marker = "${start_name}_START";
|
|
$start_name = _get_default_unit($cmd, $res, $marker);
|
|
lprint "Marker '$marker' leads to start template '$start_name'\n";
|
|
}
|
|
if ($use_default && !$stop_name) {
|
|
my $marker = "${use_default}_STOP";
|
|
$stop_name = _get_default_unit($cmd, $res, $marker);
|
|
lprint "Marker '$marker' (derived from start marker) leads to stop template '$stop_name'\n";
|
|
} elsif ($stop_name && $stop_name =~ m{$default_regex}) {
|
|
my $marker = "${stop_name}_STOP";
|
|
$stop_name = _get_default_unit($cmd, $res, $marker);
|
|
lprint "Marker '$marker' leads to stop template '$stop_name'\n";
|
|
}
|
|
unless ($stop_name) {
|
|
lprint "Unspecified stop unit: ALSO use start unit/template '$start_name' for stopping.\n";
|
|
$stop_name = $start_name;
|
|
}
|
|
my $unit_start_path = "$mars/resource-$res/systemd-start-unit";
|
|
my $unit_stop_path = "$mars/resource-$res/systemd-stop-unit";
|
|
# Convenience: try to fix any non-escaped resource names
|
|
if ($start_name =~ m/($res)/p) {
|
|
my $pre = $PREMATCH;
|
|
my $post = $POSTMATCH;
|
|
my $replac = _systemd_escape($res);
|
|
$start_name = $pre . $replac . $post;
|
|
}
|
|
if ($stop_name =~ m/($res)/p) {
|
|
my $pre = $PREMATCH;
|
|
my $post = $POSTMATCH;
|
|
my $replac = _systemd_escape($res);
|
|
$stop_name = $pre . $replac . $post;
|
|
}
|
|
my $want_path = "$mars/resource-$res/systemd-want";
|
|
if ($start_name) {
|
|
set_link($start_name, $unit_start_path);
|
|
my $target = _get_designated_primary($res, 1);
|
|
if ($target) {
|
|
set_link($target, $want_path);
|
|
}
|
|
} else {
|
|
_create_delete($unit_start_path);
|
|
_create_delete($want_path);
|
|
}
|
|
if ($stop_name) {
|
|
set_link($stop_name, $unit_stop_path);
|
|
} else {
|
|
_create_delete($unit_stop_path);
|
|
_create_delete($want_path);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
sub set_systemd_unit_phase2 {
|
|
my ($cmd, $res) = @_;
|
|
finish_links();
|
|
_systemd_trigger($cmd, 1);
|
|
return 0;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# path correction
|
|
|
|
sub correct_path {
|
|
my ($path) = @_;
|
|
# actual switches
|
|
$path =~ s:(/is-attach)[a-z]*$:$1ed:;
|
|
$path =~ s:(/is-)(fetch)[a-z]*$:$1copy:;
|
|
$path =~ s:(/is-)(apply)[a-z]*$:$1replay:;
|
|
$path =~ s:(/is-(copy|replay|sync))[a-z]*$:$1ing:;
|
|
$path =~ s:(/is-)(primary|secondary)[a-z]*$:$1primary:;
|
|
# todo switches
|
|
$path =~ s:(/fetch)[a-z]*$:/connect:;
|
|
$path =~ s:(/apply)[a-z]*$:/allow-replay:;
|
|
$path =~ s:(/replay)[a-z]*$:/allow-replay:;
|
|
$path =~ s:(/todo-.*/(primary|secondary))[a-z]*$:/primary:;
|
|
return $path;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# low-level infrastructure
|
|
|
|
my @link_list = ();
|
|
my %link_hash;
|
|
|
|
sub get_link {
|
|
my ($path, $unchecked) = @_;
|
|
my $result = readlink($path);
|
|
if (!defined($result)) {
|
|
ldie "cannot read symlink '$path'\n" unless $unchecked;
|
|
lwarn "cannot read symlink '$path'\n" if $unchecked == 1;
|
|
$result = "";
|
|
}
|
|
$result = "" if $result eq ".deleted";
|
|
return $result;
|
|
}
|
|
|
|
sub get_link_stamp {
|
|
my ($path) = @_;
|
|
my @stat = lstat($path);
|
|
return 0 if (!@stat);
|
|
return $stat[9];
|
|
}
|
|
|
|
sub get_stamp {
|
|
my ($path_or_handle) = @_;
|
|
my @stat = stat($path_or_handle);
|
|
return 0 if (!@stat);
|
|
return $stat[9];
|
|
}
|
|
|
|
sub is_recent {
|
|
my ($stamp, $wind) = @_;
|
|
return 0 unless ($stamp && $stamp =~ m/^\s*[0-9.]/);
|
|
$wind = $window * 2 unless $wind;
|
|
return 1 if $stamp + $wind >= mars_time();
|
|
return 0;
|
|
}
|
|
|
|
sub is_link_recent {
|
|
my ($path, $wind) = @_;
|
|
my @stat = lstat($path);
|
|
return 0 if (!@stat);
|
|
return is_recent($stat[9], $wind);
|
|
}
|
|
|
|
sub get_alive_link {
|
|
my ($name, $peer, $unchecked) = @_;
|
|
my $path = "$mars/actual-$peer/$name";
|
|
my $result;
|
|
if ($compat_alivelinks) {
|
|
$result = get_link($path, 2);
|
|
my $path_old = "$mars/$name-$peer";
|
|
my $result_old = get_link($path_old, 2);
|
|
return $result_old if !$result;
|
|
# determine the newer link
|
|
if (get_link_stamp($path_old) > get_link_stamp($path)) {
|
|
return $result_old if $result_old;
|
|
}
|
|
} else {
|
|
$result = get_link($path, $unchecked);
|
|
}
|
|
return $result;
|
|
}
|
|
|
|
sub get_alive_stamp {
|
|
my ($name, $peer) = @_;
|
|
my $path = "$mars/actual-$peer/$name";
|
|
my $result = get_link_stamp($path);
|
|
if ($compat_alivelinks) {
|
|
my $path_old = "$mars/$name-$peer";
|
|
my $result_old = get_link_stamp($path_old);
|
|
return $result_old if !$result;
|
|
# determine the newer link
|
|
if ($result_old > $result) {
|
|
return $result_old;
|
|
}
|
|
}
|
|
return $result;
|
|
}
|
|
|
|
sub alive_glob {
|
|
my ($name, $hosts) = @_;
|
|
$hosts = "*" unless $hosts;
|
|
my %peers;
|
|
foreach my $path (lamport_glob("$mars/actual-$hosts/$name")) {
|
|
next unless $path =~ m:/actual-(.+)/:;
|
|
my $peer = $1;
|
|
next unless $peer;
|
|
$peers{$peer} = 1;
|
|
}
|
|
if ($compat_alivelinks) {
|
|
foreach my $path (lamport_glob("$mars/$name-$hosts")) {
|
|
next unless $path =~ m:/$name-(.+):;
|
|
my $peer = $1;
|
|
next unless $peer;
|
|
$peers{$peer} = 1;
|
|
}
|
|
}
|
|
return sort alphanum_cmp keys(%peers);
|
|
}
|
|
|
|
sub to_tmp {
|
|
my $path = shift;
|
|
$path =~ s:^(.*)/:$1/.tmp.$$.:;
|
|
return $path;
|
|
}
|
|
|
|
sub set_link {
|
|
my ($src, $dst) = @_;
|
|
# safeguard trailing slashes
|
|
$dst =~ s:/$::;
|
|
my $dst_tmp = to_tmp($dst);
|
|
unlink($dst_tmp);
|
|
symlink($src, $dst_tmp) or ldie "cannot create symlink '$dst' -> '$src'\n";
|
|
# the _order_ is important! remove existing intermediate element before re-appanding
|
|
if (exists($link_hash{$dst})) {
|
|
my @copy = @link_list;
|
|
@link_list = ();
|
|
foreach my $elem (@copy) {
|
|
next if $elem eq $dst;
|
|
push @link_list, $elem;
|
|
}
|
|
}
|
|
$link_hash{$dst} = $src;
|
|
push @link_list, $dst;
|
|
}
|
|
|
|
sub finish_links {
|
|
my ($timestamp) = @_;
|
|
return unless @link_list;
|
|
$timestamp = mars_time() unless $timestamp;
|
|
lprint "using lamport timestamp $timestamp\n" if $verbose;
|
|
my $trigger_code = 1;
|
|
my $count = 0;
|
|
while (my $link = shift @link_list) {
|
|
my $link_tmp = to_tmp($link);
|
|
my $target = readlink($link_tmp);
|
|
next unless defined($target);
|
|
my $this_timestamp = $timestamp;
|
|
unless (system("touch -h -d \"\@$this_timestamp\" $link_tmp") == 0) {
|
|
lwarn "cannot set mtime on symlink '$link_tmp'\n";
|
|
}
|
|
if ($dry_run) {
|
|
lprint "DRY_RUN: would create symlink '$link' -> '$target'\n";
|
|
unlink($link_tmp);
|
|
next;
|
|
}
|
|
# allow replacement of directories with symlinks
|
|
rmdir($link) if -d $link;
|
|
unless (rename($link_tmp, $link)) {
|
|
lwarn "cannot finalize symlink '$link'\n";
|
|
} elsif ($verbose) {
|
|
lprint "created symlink '$link' -> '$target'\n";
|
|
}
|
|
$count++;
|
|
$trigger_code = 2 if $link =~ m:/(primary|todo-global|ip):;
|
|
}
|
|
_trigger($trigger_code) if $count > 0;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# global checks
|
|
|
|
sub get_global_versions {
|
|
unless (%FLAGS_FEATURES) {
|
|
foreach my $txt (keys(%FEATURES_FLAGS)) {
|
|
my $mask = $FEATURES_FLAGS{$txt};
|
|
$FLAGS_FEATURES{$mask} = $txt;
|
|
$chksum_features |= $mask if $txt =~ m/^CHKSUM/;
|
|
$compress_features |= $mask if $txt =~ m/^COMPRESS/;
|
|
}
|
|
}
|
|
unless (defined($ARGV[0]) && $ARGV[0] =~ m/cluster|cat/) {
|
|
my $act_dir = "$mars/actual-$host";
|
|
mkdir($act_dir) unless -d $act_dir;
|
|
$kernel_version = get_alive_link("tree", $host, 1);
|
|
if ($kernel_version && $user_version != $kernel_version) {
|
|
lwarn "kernel_version=$kernel_version user_version=$user_version\n";
|
|
if ($user_version < $kernel_version) {
|
|
ldie "Sorry, your MARS kernel module uses version $kernel_version, but my $0 userspace version is only $user_version. That cannot work. Please upgrade your userspace scripts!\n";
|
|
} elsif (int($user_version) > int($kernel_version) + 1) {
|
|
ldie "MAJOR VERSION mismatch : only one major upgrade step is supported.\n";
|
|
}
|
|
lwarn "using different minor versions is possible, but you should upgrade your kernel module ASAP\n";
|
|
}
|
|
}
|
|
# compute the mimimum of kernel features capabilities
|
|
foreach my $peer (get_any_peers()) {
|
|
my $features = get_alive_link("features", $peer, 1);
|
|
next unless (defined($features) && $features);
|
|
next unless $features =~ m/^([0-9]+),?([0-9]*),?([x0-9a-f]*)/;
|
|
$features = $1;
|
|
my $strategy = $2;
|
|
my $flags = $3;
|
|
next unless $features > 0;
|
|
if ($kernel_features_version < 0 || $features < $kernel_features_version) {
|
|
$kernel_features_version = $features;
|
|
}
|
|
if (defined($strategy) && $strategy) {
|
|
if ($strategy < $kernel_strategy_version || $kernel_strategy_version < 0) {
|
|
$kernel_strategy_version = $strategy;
|
|
}
|
|
} else {
|
|
$kernel_strategy_version = 0;
|
|
}
|
|
if (defined($flags) && $flags ne "") {
|
|
$flags = hex($flags) if $flags =~ m/^0x/;;
|
|
$kernel_flags_version &= $flags;
|
|
} else {
|
|
$kernel_flags_version = 0x0;
|
|
}
|
|
}
|
|
# can ssh be switched off?
|
|
if ($kernel_strategy_version >= 5) {
|
|
$ssh_port = 0;
|
|
}
|
|
# determine cluster-wide $compat_* values
|
|
if ($kernel_strategy_version >= 3) {
|
|
$compat_alivelinks = 0;
|
|
}
|
|
my $compat_path = "$mars/compat-deletions";
|
|
$compat_deletions = get_link($compat_path, 2);
|
|
if ($kernel_features_version < 3 || !defined($compat_deletions) || $compat_deletions eq "") {
|
|
$compat_deletions = 1;
|
|
}
|
|
}
|
|
|
|
sub get_alive_links {
|
|
my $res = shift;
|
|
my $alive = shift || "alive";
|
|
my $hosts = shift || "*";
|
|
my $warn = shift || 0;
|
|
my $non_participating = shift || 0;
|
|
$res = "*" if (!$res || $res eq "all" || $res =~ m/,/);
|
|
my $use_remote_stamp = $alive =~ s/^\^// ? 1 : 0;
|
|
my @peer_list;
|
|
if ($hosts eq $real_host) {
|
|
# needed at join-cluster when nothing is known about others
|
|
@peer_list = ($real_host);
|
|
} elsif ($non_participating) {
|
|
@peer_list = get_total_peers();
|
|
} else {
|
|
@peer_list = get_any_peers($res ne "*" ? $res : undef);
|
|
}
|
|
my %peers;
|
|
foreach my $peer (@peer_list) {
|
|
next if ($peer eq $host && $hosts ne $host);
|
|
# After join-cluster & co, links may take a while to appear
|
|
$peers{$peer} = 1 if $non_participating;
|
|
# peer must be a candiate matching the hosts spec
|
|
if ($hosts && $hosts ne "*") {
|
|
next unless $peer =~ m/(^|[+,{}])$hosts($|[+,{}])/;
|
|
}
|
|
# OK: remember peer
|
|
$peers{$peer} = 1;
|
|
}
|
|
$peers{$host} = 1 if $hosts eq $host;
|
|
return %peers unless %peers;
|
|
my %links;
|
|
foreach my $peer (keys(%peers)) {
|
|
my $val = get_alive_link($alive, $peer, 2);
|
|
# When required and possible, get the _remote_ timestamp
|
|
# when the link tree was read _remotely_.
|
|
if ($use_remote_stamp) {
|
|
my $remote_path = "$mars/actual-$peer/read-stamp";
|
|
my $remote_val = get_link($remote_path, 2);
|
|
# check compatibility to old versions, and downgrades
|
|
if ($compat_alivelinks && $remote_val) {
|
|
my $path = "$mars/$alive-$peer";
|
|
my $stamp1 = get_link_stamp($path);
|
|
my $stamp2 = get_link_stamp($remote_path);
|
|
# Try to prefer the new remote stamp.
|
|
# Only use the old one when too much outdated.
|
|
if ($stamp2 &&
|
|
(!$stamp1 || $stamp2 + $window >= $stamp1)) {
|
|
$val = $remote_val;
|
|
}
|
|
}
|
|
}
|
|
$links{$peer} = $val;
|
|
}
|
|
my $projection = $any_peers{$host};
|
|
if ($projection && $warn) {
|
|
my $now = mars_time();
|
|
my $extra_count = 0;
|
|
my %own_resources = %$projection;
|
|
foreach my $peer (keys(%peers)) {
|
|
my $stamp = get_alive_link("time", $peer, 2);
|
|
next if (!$stamp || $stamp !~ m/^\s*[0-9.]/ || is_recent($stamp));
|
|
my $common = 0;
|
|
my @peer_resources = get_any_resources($peer);
|
|
foreach my $tmp_res (@peer_resources) {
|
|
next unless $own_resources{$tmp_res};
|
|
$common++;
|
|
last;
|
|
}
|
|
my $age = seconds2human($now - $stamp);
|
|
if (!$common) {
|
|
# Non-member peers are updated much less frequently,
|
|
# thus we need a much larger time window.
|
|
if (!is_recent($stamp, 3600)) {
|
|
$extra_count++;
|
|
if ($verbose) {
|
|
lwarn "nonmember peer '$peer' not reachable for $age\n";
|
|
}
|
|
}
|
|
next;
|
|
}
|
|
my $msg = "no metadata is arriving from peer '$peer', age = $age";
|
|
$msg .= " => check your network setup" if is_module_loaded();
|
|
lwarn "$msg\n";
|
|
}
|
|
if ($extra_count) {
|
|
lwarn "$extra_count non-member peers are NOT REACHABLE for >= 1 hour\n";
|
|
}
|
|
}
|
|
return %links;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# timeout handling
|
|
|
|
#
|
|
# return the lamport clock time in nanosecond resolution
|
|
# fallback to system time()
|
|
#
|
|
sub mars_time {
|
|
my $lamport_time;
|
|
if (open(my $lamport_clock, "<", "/proc/sys/mars/lamport_clock")) {
|
|
while (<$lamport_clock>) {
|
|
$lamport_time = $1 if /^lamport_now=(.*)/;
|
|
}
|
|
close($lamport_clock);
|
|
}
|
|
return $lamport_time || time() . "." . '0' x 9;
|
|
}
|
|
|
|
my $timeout_val = undef;
|
|
|
|
sub sleep_timeout {
|
|
my $sleeptime = shift || 1;
|
|
my $continue = shift;
|
|
if ($timeout < 0) {
|
|
sleep($sleeptime);
|
|
return 0;
|
|
}
|
|
if (!defined($timeout_val)) {
|
|
$timeout_val = $timeout;
|
|
lprint "Resetting timeout to $timeout\n" if $verbose > 0;
|
|
}
|
|
if ($timeout_val <= 0) {
|
|
$timeout_val = undef;
|
|
if (!defined($continue) || !$continue) {
|
|
ldie "Timeout reached.\n";
|
|
}
|
|
lwarn "Timeout reached. Continuing anyway.\n";
|
|
return 1;
|
|
}
|
|
my $rest = $timeout_val;
|
|
$rest = $sleeptime if $rest > $sleeptime;
|
|
sleep($rest);
|
|
$timeout_val -= $rest;
|
|
return 0;
|
|
}
|
|
|
|
# wait for some condition
|
|
sub wait_cond {
|
|
my ($cmd, $res, $specific) = @_;
|
|
my $is_actual = ($specific =~ s/^(is|has)-//);
|
|
$specific =~ s/^todo-//;
|
|
my $is_on = !($specific =~ s/-(off|0)$//);
|
|
$specific =~ s/-(on|1)$//;
|
|
if ($is_actual) {
|
|
if ($specific eq "device") {
|
|
check_mars_device($cmd, $res, 1, !$is_on);
|
|
return;
|
|
}
|
|
my %table =
|
|
(
|
|
"attach" => "is-attached",
|
|
"attached" => "is-attached",
|
|
"replay" => "is-replaying",
|
|
"replaying"=> "is-replaying",
|
|
"fetch" => "is-copying",
|
|
"fetching" => "is-copying",
|
|
"copy" => "is-copying",
|
|
"copying" => "is-copying",
|
|
"sync" => "is-syncing",
|
|
"syncing" => "is-syncing",
|
|
"primary" => "is-primary",
|
|
"secondary"=> "is-primary",
|
|
);
|
|
my $name = $table{$specific};
|
|
ldie "actual indicator '$specific' does not exist\n" unless exists($table{$specific});
|
|
$is_on = !$is_on if $name eq "secondary";
|
|
check_status($cmd, $res, $name, $is_on ? 1 : 0, 1, 1);
|
|
} else {
|
|
my %table =
|
|
(
|
|
"attach" => "attach",
|
|
"attached" => "attach",
|
|
"fetch" => "fetch",
|
|
"connect" => "fetch",
|
|
"replay" => "replay",
|
|
"sync" => "sync",
|
|
"primary" => "primary",
|
|
"secondary" => "secondary",
|
|
);
|
|
my $name = $table{$specific};
|
|
ldie "button '$specific' does not exist\n" unless exists($table{$specific});
|
|
check_todo($cmd, $res, $name, $is_on ? 1 : 0, 1);
|
|
}
|
|
}
|
|
|
|
# wait until everything is recent
|
|
sub wait_cluster {
|
|
my ($cmd, $res, $hosts, $abort, $trigger_code, $condition) = @_;
|
|
$res = "all" if (!$res || $res =~ m/,/);
|
|
$hosts = "*" unless $hosts;
|
|
$abort = $force unless defined($abort);
|
|
$trigger_code = ($hosts =~ m/\*/ ? 3 : 2) unless $trigger_code;
|
|
my $non_participating = ($trigger_code >= 8);
|
|
$timeout_val = undef;
|
|
get_global_versions();
|
|
finish_links();
|
|
if ($hosts eq $real_host && $kernel_strategy_version >= 5) {
|
|
return 0;
|
|
}
|
|
my $max_restarts = 3;
|
|
my $max_resets = 1;
|
|
restart:
|
|
lprint "Ping $hosts trigger=$trigger_code\n";
|
|
_trigger($trigger_code);
|
|
my %old_status = get_alive_links($res, "time", $hosts, 0, $non_participating);
|
|
my $total_count = scalar(keys(%old_status));
|
|
my $start_time = mars_time();
|
|
my %progress;
|
|
foreach my $peer (keys(%old_status)) {
|
|
$progress{$peer} = $non_participating ? 1 : 0;
|
|
}
|
|
my $count = scalar(keys(%old_status));
|
|
if ($count && $count < 10) {
|
|
lprint "Wait for answers from " . join(",", sort alphanum_cmp keys(%old_status)) . "\n";
|
|
} else {
|
|
lprint "Wait for answers from $count peers\n";
|
|
}
|
|
my $delta = $window > 0 ? $window / 2 : 30;
|
|
while (1) {
|
|
# Early abort when condition is met
|
|
if ($condition) {
|
|
my $status = eval($condition);
|
|
lprint "Condition '$condition' status=$status\n" if $verbose;
|
|
return $status if $status;
|
|
}
|
|
my $dead_count = 0;
|
|
my $alive_count = 0;
|
|
my $unknown_count = 0;
|
|
# Notice: prefer the remote timestamps for race prevention
|
|
my %status = get_alive_links($res, "^time", $hosts, 0, $non_participating);;
|
|
my $now = mars_time();
|
|
foreach my $peer (keys(%status)) {
|
|
next if ($peer eq $host && $hosts ne $host);
|
|
if (!$status{$peer}) {
|
|
# This can happen at join-cluster & co
|
|
my $ip_link = "$mars/ips/ip-$peer";
|
|
my $ip_link_stamp = get_link_stamp($ip_link);
|
|
if ($ip_link_stamp &&
|
|
$ip_link_stamp + $delta < $now &&
|
|
$ip_link_stamp > 1) {
|
|
$dead_count++;
|
|
} elsif ($start_time + $delta < $now) {
|
|
$dead_count++;
|
|
} else {
|
|
$unknown_count++;
|
|
}
|
|
next;
|
|
} elsif (!$old_status{$peer}) {
|
|
# Oh, only the old info is outdated, but we have a new one.
|
|
# Also possible at join-cluster: restart freshly
|
|
lwarn "Need restart for getting more 'time' links\n";
|
|
goto restart if $max_restarts-- > 0;
|
|
}
|
|
if ($status{$peer} !~ m/^[0-9.]+$/) {
|
|
$dead_count++;
|
|
next;
|
|
}
|
|
if ($status{$peer} > $old_status{$peer} &&
|
|
$status{$peer} > $start_time &&
|
|
$progress{$peer}-- <= 0) {
|
|
$alive_count++;
|
|
} elsif ($status{$peer} + $delta < $now &&
|
|
$old_status{$peer} + $delta < $now) {
|
|
$dead_count++;
|
|
} else {
|
|
$unknown_count++;
|
|
}
|
|
}
|
|
if ($unknown_count) {
|
|
_trigger($trigger_code);
|
|
# ensure more progress will happen
|
|
%old_status = %status if !$max_resets++;
|
|
} else {
|
|
if (!$dead_count) {
|
|
lprint "$alive_count/$total_count peer(s) seem to be alive\n";
|
|
} else {
|
|
lwarn "$alive_count/$total_count peer(s) seem to be alive, and $dead_count peer(s) seem to be dead / not reachable\n";
|
|
ldie "aborting (--force was not given)\n" if $abort;
|
|
}
|
|
last;
|
|
}
|
|
sleep_timeout(1, 1);
|
|
last if $timeout_val <= 0 && !$unknown_count;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
sub wait_cluster_noforce {
|
|
my ($cmd, $res) = @_;
|
|
if (!$force) {
|
|
lprint "WAITING for communication\n" if $verbose;
|
|
wait_cluster($cmd, $res, "*", 0);
|
|
}
|
|
}
|
|
|
|
sub update_cluster {
|
|
my ($cmd, $res, $hosts, $condition) = @_;
|
|
$res = "all" if (!$res || $res =~ m/,/);
|
|
$hosts = "*" unless $hosts;
|
|
lprint "UPDATING $res\n" if $verbose;
|
|
my $status = wait_cluster($cmd, $res, $hosts, 0, 8, $condition);
|
|
_reset_resources();
|
|
return $status;
|
|
}
|
|
|
|
sub is_cluster_recent {
|
|
my ($cmd, $res, $hosts) = @_;
|
|
my $dead_count = 0;
|
|
my $alive_count = 0;
|
|
my $unknown_count = 0;
|
|
my $now = mars_time();
|
|
my %status = get_alive_links($res, "time", $hosts);
|
|
foreach my $peer (keys(%status)) {
|
|
next if $peer eq $host;
|
|
if ($status{$peer} + $window/2 >= $now) {
|
|
$alive_count++;
|
|
} elsif ($status{$peer} + $window < $now) {
|
|
$dead_count++;
|
|
} else {
|
|
$unknown_count++;
|
|
}
|
|
}
|
|
return ($dead_count, $alive_count, $unknown_count);
|
|
}
|
|
|
|
sub recent_cluster {
|
|
my $cmd = shift;
|
|
my $res = shift;
|
|
$res = "all" if (!$res || $res =~ m/,/);
|
|
my $hosts = shift || "*";
|
|
my ($dead_count, $alive_count, $unknown_count) = is_cluster_recent($cmd, $res, $hosts);
|
|
return 1 if (!$dead_count && !$unknown_count);
|
|
wait_cluster($cmd, $res, $hosts, 0);
|
|
return 0;
|
|
}
|
|
|
|
# Newer kernel modules do no longer work on non-member resources
|
|
# by defaults, for scalability reasons.
|
|
# Activate such a resource as a guest.
|
|
sub _activate_resource {
|
|
my ($cmd, $res, $peer) = @_;
|
|
$peer = $host unless $peer;
|
|
finish_links();
|
|
if (!$res || $res eq "*" || $res eq "all") {
|
|
lwarn "no usable resource to activate ($res).\n";
|
|
return;
|
|
}
|
|
my $resdir = "$mars/resource-$res";
|
|
my $max_retry = 3;
|
|
until (-d $resdir) {
|
|
my $condition = "return -d \"$resdir\";";
|
|
update_cluster($cmd, $res, "*", $condition);
|
|
last if -d $resdir;
|
|
if ($max_retry-- < 0) {
|
|
ldie "Resource directory '$resdir' does not exist\n";
|
|
}
|
|
lwarn "Resource directory '$resdir' does not yet exist, fetching...\n";
|
|
sleep(3);
|
|
}
|
|
lprint "OK, resource directory '$resdir' exists.\n";
|
|
my $check_link = "$resdir/size";
|
|
$max_retry = 3;
|
|
until (get_link($check_link, 1)) {
|
|
my $condition = "return get_link(\"$check_link\", 1);";
|
|
update_cluster($cmd, $res, "*", $condition);
|
|
if ($max_retry-- < 0) {
|
|
ldie "link '$check_link' does not exist.\n";
|
|
}
|
|
lwarn "link '$check_link' does not yet exist, fetching...\n";
|
|
sleep(3);
|
|
}
|
|
lprint "OK, link '$check_link' exists.\n";
|
|
my $active_path = "$resdir/device-$peer";
|
|
my $check = get_link($active_path, 2);
|
|
if ($check) {
|
|
if ($peer ne $real_host) {
|
|
_push_check($peer, "", $resdir);
|
|
_push_check($peer, "", $active_path);
|
|
}
|
|
lprint "Resource '$res' was already activated at '$peer'\n";
|
|
return;
|
|
}
|
|
lprint "ENABLING resource '$res' at '$peer'\n";
|
|
# Ensure guest communication: a faked device-$peer must exist
|
|
# for activation.
|
|
# Notice: this may be later overwritten with another value.
|
|
set_link($res, $active_path);
|
|
# prevent fallback to old device detection
|
|
my $actual_dir = "$resdir/actual-$peer";
|
|
mkdir($actual_dir);
|
|
my $if_path = "$actual_dir/if-on";
|
|
set_link("0", $if_path);
|
|
finish_links();
|
|
_push_check($peer, "", $active_path) if $peer ne $real_host;
|
|
# wait for (self-)activation
|
|
wait_cluster($cmd, $res, $peer, 0);
|
|
# activated peers might have changed
|
|
my $primary = _get_designated_primary($res, 1);
|
|
for (my $retry = 3; $retry >= 0; $retry--) {
|
|
_trigger(3);
|
|
my $condition = "return _get_designated_primary(\"$res\", 1);";
|
|
my $from = ($primary && $primary ne "(none)") ? $primary : "*";
|
|
my $old_primary = $primary;
|
|
$primary = update_cluster($cmd, $res, $from, $condition);
|
|
last if ($primary && $primary eq $old_primary);
|
|
sleep(3);
|
|
}
|
|
}
|
|
|
|
sub activate_guest {
|
|
my ($cmd, $res, $peer) = @_;
|
|
$peer = $host unless $peer;
|
|
my $resdir = "$mars/resource-$res";
|
|
if ($cmd =~ m/deactivate/) {
|
|
ldie "resource directory '$resdir' does not exist.\n" unless -d $resdir;
|
|
my $check_glob = "$resdir/*{data,replay}-$peer";
|
|
if (lamport_glob($check_glob)) {
|
|
ldie "peer '$peer' ist a storage member of resource '$res'\n";
|
|
}
|
|
my $guest_path = "$resdir/device-$peer";
|
|
_create_delete($guest_path);
|
|
return;
|
|
}
|
|
_activate_resource($cmd, $res);
|
|
ldie "Could not get resource name '$res'\n" unless -d $resdir;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# syntactic checks
|
|
# (also check for existence)
|
|
|
|
sub check_id {
|
|
my ($str, $must_exist) = @_;
|
|
ldie "identifier '$str' is empty" unless defined($str) && $str;
|
|
ldie "identifier '$str' has disallowed characters" unless $str =~ m/^[A-Za-z0-9_][-A-Za-z0-9_]*$/;
|
|
ldie "identifier '$str' is too long (only 63 chars supported according to RFC 1123)" if length($str) > 63;
|
|
if (defined($must_exist) && $must_exist) {
|
|
my $ip_path = "$mars/ips/ip-$str";
|
|
unless (get_link($ip_path, 1)) {
|
|
ldie "host '$str' does not exist in $mars/ips/\n" if !$force;
|
|
lwarn "host '$str' does not exist in $mars/ips/ - hopefully you know what you are doing via --force\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
sub check_id_list {
|
|
my ($str, $must_exist) = @_;
|
|
ldie "comma-separated list '$str' has disallowed characters" unless $str =~ m/^[A-Za-z0-9_][-A-Za-z0-9_,]*$/;
|
|
foreach my $id (split(",", $str)) {
|
|
check_id($id, $must_exist);
|
|
}
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# semantic checks
|
|
|
|
sub check_res {
|
|
my $res = shift;
|
|
wait_cluster("wait-cluster", "all", "*", 0) unless -d "$mars/resource-$res";
|
|
if (not -d "$mars/resource-$res") {
|
|
# Ensure that all cluster information is present
|
|
# DO WHAT I MEAN: try to substitute a device name for a badly given resource name if it is unique
|
|
my $count = 0;
|
|
my $found;
|
|
my @tests = lamport_glob("$mars/resource-*/device-$host");
|
|
foreach my $test (@tests) {
|
|
my $target = get_link($test, 2);
|
|
if ($target eq $res) {
|
|
$found = $test;
|
|
$count++;
|
|
}
|
|
}
|
|
if (!$count) {
|
|
@tests = lamport_glob("$mars/resource-*/_direct-*-$host");
|
|
foreach my $test (@tests) {
|
|
my $target = get_link($test, 2);
|
|
$target =~ s/^.*,//;
|
|
if ($target eq $res) {
|
|
$found = $test;
|
|
$count++;
|
|
}
|
|
}
|
|
}
|
|
ldie "resource '$res' does not exist ($count replacements found)\n" unless $count == 1 and $found;
|
|
$found =~ s:^.*/resource-(.*)/.*$:$1:;
|
|
lwarn "substituting bad resource name '$res' by uniquely matching resource name '$found'\n";
|
|
$res = $found;
|
|
}
|
|
return $res;
|
|
}
|
|
|
|
sub _get_mars_size {
|
|
my ($cmd, $res) = @_;
|
|
my $dev_name = get_link("$mars/resource-$res/device-$host");
|
|
my $info = "/sys/devices/virtual/block/mars!$dev_name/size";
|
|
return `cat $info` * 512;
|
|
}
|
|
|
|
sub check_sizes {
|
|
my ($res, $peer) = @_;
|
|
my $physical_size = get_link("$mars/resource-$res/actsize-$peer", 2) || return;
|
|
my $logical_size = get_link("$mars/resource-$res/size", 1);
|
|
if (defined($physical_size) && $physical_size < $logical_size) {
|
|
lwarn "Physical device on host '$peer' has size $physical_size, which is smaller than the logical resource size $logical_size\n";
|
|
ldie "This is too dangerous. It cannot work. Fix it!\n" unless $force;
|
|
}
|
|
}
|
|
|
|
sub check_res_member {
|
|
my ($cmd, $res) = @_;
|
|
if (! link_exists("$mars/resource-$res/data-$host")) {
|
|
if (link_exists("$mars/resource-$res/device-$host")) {
|
|
# guest
|
|
return;
|
|
}
|
|
if (link_exists("$mars/resource-$res/replay-$host")) {
|
|
lwarn "Resource '$res' seems to have been destroyed.\n";
|
|
lwarn "Nevertheless, a replay link exists for host '$host'.\n";
|
|
lwarn "This can happen after 'leave-resource --host=$host' while host $host was active.\n";
|
|
lwarn "mars-user-manual.pdf forbids any usage of such a resource _strongly_.\n";
|
|
lwarn "In order to finally remove this resource from $host, use the\n";
|
|
lwarn "command 'marsadm leave-resource --force $res'\n";
|
|
} else {
|
|
lwarn "Sorry, I have not yet joined to resource '$res'\n";
|
|
}
|
|
ldie "Refusing work on resource name '$res'\n" unless $force;
|
|
lwarn "Running '$cmd' is dangerous, continuing on your own risk\n" unless $cmd eq "leave-resource";
|
|
}
|
|
check_sizes($res, $host);
|
|
}
|
|
|
|
sub _sync_finished {
|
|
my ($res, $peer) = @_;
|
|
my $lnk = "$mars/resource-$res/syncstatus-$peer";
|
|
return 0 unless link_exists($lnk);
|
|
my $syncstatus = get_link($lnk, 1);
|
|
my $size = get_link("$mars/resource-$res/size");
|
|
return 0 if ($size <= 0);
|
|
return 0 if ($syncstatus < $size);
|
|
my $syncpos = get_link("$mars/resource-$res/syncpos-$peer", 2);
|
|
if (defined($syncpos) && $syncpos) {
|
|
my $replay = get_link("$mars/resource-$res/replay-$peer", 1);
|
|
return 0 unless $replay;
|
|
my $cmp = compare_replaylinks($syncpos, $replay);
|
|
return 0 if $cmp > 0;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
sub check_sync_finished {
|
|
my ($res, $peer, $do_force) = @_;
|
|
check_sizes(@_);
|
|
if (!_sync_finished($res, $peer)) {
|
|
lwarn "Sync has not yet finished on host '$peer'\n";
|
|
if ($peer eq $host) {
|
|
lwarn "Don't try to make inconsistent host '$host' the new primary!\n";
|
|
lwarn "Please wait until sync has finished and all logfile have been replayed.\n";
|
|
ldie "Refusing to switch inconsistent host '$host' to primary\n";
|
|
} else {
|
|
lwarn "Changing the primary role during sync is dangerous for data consistency on host '$peer'!\n";
|
|
}
|
|
unless ($do_force) {
|
|
lprint "HINT: consider the option --ignore-sync if you are sure that you want to restart the sync\n";
|
|
ldie "First wait for sync finished before primary handover, or use --ignore-sync\n";
|
|
}
|
|
}
|
|
lprint "OK, it seems that sync has finished on host '$peer'.\n";
|
|
}
|
|
|
|
sub check_primary {
|
|
my ($cmd, $res, $no_designated, $no_fail) = @_;
|
|
my $fail = 0;
|
|
my $lnk = "$mars/resource-$res/actual-$host/is-primary";
|
|
my $is_primary = get_link($lnk, 1);
|
|
if (!$is_primary) { # give it a second chance
|
|
$is_primary = device_exists($res);
|
|
}
|
|
unless ($is_primary) {
|
|
lwarn "For operation '$cmd' I need to be primary\n";
|
|
$fail++;
|
|
}
|
|
if ($no_designated) {
|
|
my $primary = _get_designated_primary($res);
|
|
unless ($primary eq $host) {
|
|
lwarn "For operation '$cmd', I also must be the designated primary\n";
|
|
}
|
|
}
|
|
unless (defined($no_fail) && $no_fail) {
|
|
ldie "Operation '$cmd' only works at the primary side\n" if $fail;
|
|
}
|
|
return $fail;
|
|
}
|
|
|
|
sub check_not_primary {
|
|
my ($cmd, $res, $allow_force) = @_;
|
|
my $lnk = "$mars/resource-$res/actual-$host/is-primary";
|
|
my $is_primary = get_link($lnk, 1);
|
|
if ($is_primary) {
|
|
if (!$force || !defined($allow_force) || !$allow_force) {
|
|
ldie "operation '$cmd' cannot be executed on actual primary\n";
|
|
}
|
|
lwarn "operation '$cmd' is forced on actual primary '$host', THIS IS RISKY\n";
|
|
}
|
|
# also check whether we intend to become primary
|
|
my $primary = _get_designated_primary($res, 1);
|
|
if ($primary eq $host) {
|
|
lwarn "operation '$cmd' cannot be executed on designated primary\n";
|
|
ldie "first switch the designated primary, or use --force if you are sure.\n" unless $force;
|
|
lwarn "continuing anyway due to --force\n";
|
|
}
|
|
}
|
|
|
|
sub check_primary_gone {
|
|
my ($cmd, $res, $peer) = @_;
|
|
return 0 unless (defined($peer) && $peer && $peer ne "(none)");
|
|
return 0 if $peer eq $host;
|
|
my $lnk = "$mars/resource-$res/actual-$peer/is-primary";
|
|
my $is_primary = get_link($lnk, 1);
|
|
if (defined($is_primary) && $is_primary eq "0") {
|
|
lprint "OK, other host ($peer) is not actual primary.\n";
|
|
return 0;
|
|
}
|
|
lprint "waiting for other primary host ($peer) to disappear....\n";
|
|
return 1;
|
|
}
|
|
|
|
sub _make_messages {
|
|
my ($cmd, $res, $key, $val, $wait, $unchecked, $inv) = @_;
|
|
my %table =
|
|
(
|
|
"is-attached" => "attach",
|
|
"is-syncing" => "sync",
|
|
"is-copying" => "fetch",
|
|
"is-replaying" => "replay",
|
|
"is-primary" => "primary",
|
|
);
|
|
my $key_msg = defined($table{$key}) ? $table{$key} : $key;
|
|
my $val_msg = $val ? "on" : "off";
|
|
my $wait_msg = $wait ? "waiting $timeout until" : "ensuring that";
|
|
my $new_val = (defined($inv) && $inv) ? !$val : $val;
|
|
my $action_msg = ($new_val ? "'resume-" : "'pause-") . $key_msg . "'";
|
|
if ($key_msg eq "primary") {
|
|
$action_msg = $new_val ? "'$key_msg'" : "'secondary' (or better 'primary' on another host)";
|
|
} elsif ($key_msg eq "attach") {
|
|
$action_msg = $new_val ? "'$key_msg'" : "'detach'";
|
|
}
|
|
$action_msg = "Use $action_msg and/or wait until it has really succeeded.";
|
|
$action_msg .= " Notice that MARS is an ASYNCHRONOUS system, where both execution of actions, as well as propagation over the network may take some time." if $verbose;
|
|
$action_msg .= " Don't expect magic to happen if the network has some problems, or when the system load is extremely high." if $verbose > 1;
|
|
$action_msg .= " Please read the PDF manual, and try to understand it." if $verbose > 2;
|
|
return ($key_msg, $val_msg, $wait_msg, $action_msg);
|
|
}
|
|
|
|
sub check_todo {
|
|
my ($cmd, $res, $key, $val, $wait, $unchecked, $inv) = @_;
|
|
$key =~ s/fetch/connect/;
|
|
my $path = "$mars/resource-$res/todo-$host/$key";
|
|
$path = correct_path($path);
|
|
my $link;
|
|
for (;;) {
|
|
$link = get_link($path, $unchecked);
|
|
return unless defined($link);
|
|
$link = ($link eq $host) ? 1 : 0 if $key eq "primary";
|
|
$link = ($link eq "(none)") ? 1 : 0 if $key eq "secondary";
|
|
my ($key_msg, $val_msg, $wait_msg, $action_msg) = _make_messages(@_);
|
|
if (defined($inv) && $inv) {
|
|
last if $link != $val;
|
|
lprint "$wait_msg switch '$key' != '$val'....\n";
|
|
ldie "Cannot execute $cmd on resource $res: todo-switch '$key_msg' must not be $val_msg. $action_msg\n" if !$wait;
|
|
} else {
|
|
last if $link == $val;
|
|
lprint "$wait_msg switch '$key' == '$val'....\n";
|
|
ldie "Cannot execute $cmd on resource $res: todo-switch '$key_msg' must be $val_msg, but actually has value '$link'. $action_msg\n" if !$wait;
|
|
}
|
|
sleep_timeout();
|
|
}
|
|
lprint "OK, '$path' has acceptable value '$link'\n";
|
|
}
|
|
|
|
sub _run_compensation {
|
|
my ($compensation) = @_;
|
|
if (defined($compensation)) {
|
|
my $txt = $compensation;
|
|
$txt =~ s/\n/\\n/mg;
|
|
lprint "Running compensation action '$txt'\n";
|
|
eval $compensation;
|
|
}
|
|
}
|
|
|
|
sub check_status {
|
|
my ($cmd, $res, $key, $val, $wait, $unchecked, $inv, $peer, $action, $compensation) = @_;
|
|
$peer = $host unless defined($peer);
|
|
my $path = correct_path("$mars/resource-$res/actual-$peer/$key");
|
|
my $link;
|
|
my $rounds = 0;
|
|
my $fail_round = 10;
|
|
for (;;) {
|
|
$link = get_link($path, $unchecked);
|
|
$link = 0 unless (defined($link) && $link ne "");
|
|
my ($key_msg, $val_msg, $wait_msg, $action_msg) = _make_messages(@_);
|
|
if (defined($inv) && $inv) {
|
|
last if $link != $val;
|
|
lprint "at $peer: $wait_msg actual '$key' != '$val'...\n";
|
|
ldie "Cannot execute $cmd on resource $res: actual '$key_msg' must not be $val_msg. $action_msg Also ensure that your command _can_ succeed.\n" if !$wait;
|
|
} else {
|
|
last if $link == $val;
|
|
lprint "at $peer: $wait_msg actual '$key' == '$val'...\n";
|
|
ldie "Cannot execute $cmd on resource $res: actual '$key_msg' must be $val_msg. $action_msg Also ensure that your command _can_ succeed.\n" if !$wait;
|
|
}
|
|
if (defined($action) && $action && $rounds > 1) {
|
|
lprint "Action: $action\n" if $verbose > 2;
|
|
my $action_status = 0;
|
|
my $old_error_count = $error_count;
|
|
eval "$action";
|
|
$error_count = $old_error_count;
|
|
# Tolerate intermediate failures for some time
|
|
if ($action_status && $rounds > $fail_round) {
|
|
lwarn "failed action $action_status: $action\n";
|
|
_run_compensation($compensation);
|
|
ldie "Action failure, status=$action_status\n";
|
|
}
|
|
}
|
|
my $status = sleep_timeout(undef, 1);
|
|
if ($status) {
|
|
_run_compensation($compensation);
|
|
ldie "Timeout\n";
|
|
}
|
|
$rounds++;
|
|
}
|
|
lprint "OK at $peer: '$path' has acceptable value '$link'\n";
|
|
}
|
|
|
|
sub check_mars_device {
|
|
my ($cmd, $res, $wait, $inv) = @_;
|
|
my $dev = device_name($res);
|
|
my $backoff = 1;
|
|
my $round = 0;
|
|
if ($inv) {
|
|
while (device_exists($res)) {
|
|
ldie "cannot execute $cmd: device '$dev' has not yet disappeared\n" if !$wait;
|
|
lwarn "device '$dev' has not yet disappeared\n";
|
|
sleep_timeout($backoff);
|
|
# very slowly increasing backoff
|
|
if ($backoff < 10 && $round++ > 5) {
|
|
$round = 0;
|
|
$backoff++;
|
|
}
|
|
systemd_activate($cmd, $res, 0, 1);
|
|
}
|
|
lprint "device '$dev' is no longer present\n" unless device_exists($res);
|
|
return;
|
|
}
|
|
# !$inv
|
|
my $primary = _get_designated_primary($res);
|
|
ldie "for operation '$cmd', I should be the designated primary\n" unless $primary eq $host;
|
|
while (!device_exists($res)) {
|
|
my $text = get_error_text($cmd, $res);
|
|
lprint $text if $text;
|
|
ldie "aborting due to errors\n" if $text =~ m/error/mi;
|
|
ldie "cannot execute $cmd: device '$dev' not yet present\n" if !$wait;
|
|
lprint "device '$dev' not yet present\n";
|
|
sleep_timeout($backoff);
|
|
# very slowly increasing backoff
|
|
if ($backoff < 10 && $round++ > 5) {
|
|
$round = 0;
|
|
$backoff++;
|
|
}
|
|
}
|
|
lprint "device '$dev' is present\n" if device_exists($res);
|
|
}
|
|
|
|
sub check_userspace {
|
|
my ($dst) = @_;
|
|
if ($dst !~ m:/userspace/:) {
|
|
ldie "your path '$dst' must be inside $mars/userspace/ or $mars/resource-*/userspace/\n" unless $force;
|
|
lwarn "your path '$dst' is outside $mars/userspace/ or $mars/resource-*/userspace/ and you gave --force, hopefully YOU KNOW WHAT YOU ARE DOING\n";
|
|
}
|
|
}
|
|
|
|
sub check_sync_startable {
|
|
my ($cmd, $res) = @_;
|
|
my $primary = _get_designated_primary($res);
|
|
ldie "Cannot execute '$cmd' because noone is designated as primary.\n" if (!$primary || $primary eq "(none)");
|
|
# no danger when switch is turned on at the primary side.
|
|
return if $primary eq $host;
|
|
my $emergency_path = "$mars/resource-$res/actual-$primary/has-emergency";
|
|
my $emergency = get_link($emergency_path, 1);
|
|
if ($emergency) {
|
|
ldie "Primary '$primary' is in emergency mode. Cannot start sync.\nFree some space there first.\n";
|
|
}
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# state inspection routines
|
|
|
|
sub _get_minmax {
|
|
my ($res, $glob, $take_symlink) = @_;
|
|
my $min = -1;
|
|
my $max = -1;
|
|
my @paths = lamport_glob($glob) or lwarn "cannot find '$glob'\n";
|
|
foreach my $path (@paths) {
|
|
my $nr = $path;
|
|
if ($take_symlink) {
|
|
$nr = get_link($path, 1);
|
|
}
|
|
$nr =~ s@^(?:.*/)?[a-z]+-([0-9]+)(-[^/]*)?$@$1@;
|
|
$min = $nr if ($nr < $min || $min < 0);
|
|
$max = $nr if ($nr > $max || $max < 0);
|
|
}
|
|
return ($min, $max);
|
|
}
|
|
|
|
sub get_minmax_logfiles {
|
|
my ($res, $peer) = @_;
|
|
$peer = "" unless defined($peer);
|
|
return _get_minmax($res, "$mars/resource-$res/log-*$peer", 0);
|
|
}
|
|
|
|
sub get_minmax_versions {
|
|
my ($res, $peer) = @_;
|
|
$peer = "" unless defined($peer);
|
|
return _get_minmax($res, "$mars/resource-$res/version-*$peer", 0);
|
|
}
|
|
|
|
sub get_minmax_replays {
|
|
my ($res, $peer) = @_;
|
|
$peer = "" unless defined($peer);
|
|
return _get_minmax($res, "$mars/resource-$res/replay-*$peer", 1);
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# generic comparisons
|
|
|
|
sub compare_replaylinks {
|
|
my ($a, $b) = @_;
|
|
$a =~ m/log-([0-9]+)[^,]*,([0-9]+)/;
|
|
my ($a_log, $a_pos) = ($1, $2);
|
|
$b =~ m/log-([0-9]+)[^,]*,([0-9]+)/;
|
|
my ($b_log, $b_pos) = ($1, $2);
|
|
return -1 if $a_log < $b_log;
|
|
return +1 if $a_log > $b_log;
|
|
return -1 if $a_pos < $b_pos;
|
|
return +1 if $a_pos > $b_pos;
|
|
return 0;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
sub get_amount {
|
|
my ($resdir, $host, $direction, $only_files) = @_;
|
|
my $level = 0;
|
|
my $firstpos = 0;
|
|
my $logpos = 0;
|
|
my $oldpos = 0;
|
|
my $sum = 0;
|
|
for (;;) {
|
|
my $val0 = 0;
|
|
my $ok = 0;
|
|
if (!$level) {
|
|
my $replay_path = sprintf("%s/replay-%s", $resdir, $host);
|
|
my $replay_link = get_link($replay_path, 1);
|
|
return (0, 0, 0, 0) if !$replay_link;
|
|
return (0, 0, 0, 0) if $replay_link !~ m:log-([0-9]+)-[^,]+,([0-9]+),([0-9]+):;
|
|
$firstpos = int($1);
|
|
$logpos = $firstpos;
|
|
$oldpos = $firstpos;
|
|
if ($direction < 0) {
|
|
$sum = $2;
|
|
} else {
|
|
$val0 = $2;
|
|
$sum = $3;
|
|
}
|
|
}
|
|
if ($level > 0 || $direction > 0) {
|
|
my $file = sprintf("%s/log-%09d-%s", $resdir, $logpos, $host);
|
|
my @stat = stat($file);
|
|
my $val = 0;
|
|
if (@stat && file_exists($file)) {
|
|
$val = $stat[7];
|
|
$ok = 1;
|
|
} else {
|
|
my $glob = sprintf("%s/log-%09d-*", $resdir, $logpos);
|
|
foreach $file (lamport_glob($glob)) {
|
|
my @tstat = stat($file);
|
|
if (@tstat && $tstat[7] > $val) {
|
|
@stat = @tstat;
|
|
$val = $stat[7];
|
|
$ok = 1;
|
|
}
|
|
}
|
|
}
|
|
if (!$only_files) {
|
|
my $glob = sprintf("%s/version-%09d-*", $resdir, $logpos);
|
|
foreach $file (lamport_glob($glob)) {
|
|
my $vers_link = get_link($file, 1);
|
|
if ($vers_link && $vers_link =~ m;,([0-9]+):;) {
|
|
my $nval = $1;
|
|
$val = $nval if $nval > $val;
|
|
$ok = 1;
|
|
}
|
|
}
|
|
}
|
|
return ($sum, $firstpos, $oldpos, $level) unless $ok;
|
|
if ($level > 0) {
|
|
$sum += $val;
|
|
} else {
|
|
# the logfile may be bigger than the replay pos/length
|
|
my $new_sum = $val - $val0;
|
|
$sum = $new_sum if $new_sum > $sum;
|
|
$sum = 0 if $sum < 0;
|
|
}
|
|
}
|
|
$oldpos = $logpos;
|
|
if ($direction > 0) {
|
|
$logpos++;
|
|
} elsif ($direction < 0) {
|
|
$logpos--;
|
|
return ($sum, $firstpos, $oldpos, $level) if $logpos < 1;
|
|
} else {
|
|
return ($sum, $firstpos, $oldpos, $level);
|
|
}
|
|
$level++;
|
|
}
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# versionlink path handling routines
|
|
|
|
my %visited_pos;
|
|
|
|
sub _visit {
|
|
my ($nr, $peer) = @_;
|
|
$nr =~ s:^0*::;
|
|
my $visit = "$nr,$peer";
|
|
$visited_pos{$visit} = 1;
|
|
}
|
|
|
|
sub _is_visited {
|
|
my ($nr, $peer) = @_;
|
|
$nr =~ s:^0*::;
|
|
my $visit = "$nr,$peer";
|
|
return $visited_pos{$visit};
|
|
}
|
|
|
|
sub _parse_pos {
|
|
my ($basedir, $pos) = @_;
|
|
if ($pos =~ m/log-([0-9]+)-([^,]+)/) {
|
|
_visit($1, $2);
|
|
} elsif ($pos =~ m/version-([0-9]+)-([^,]+)/p) {
|
|
my $vers = get_link("$basedir/$MATCH");
|
|
my $count_matches = 0;
|
|
while ($vers =~ m/log-([0-9]+)-([^,]+)/p) {
|
|
$vers = $POSTMATCH;
|
|
_visit($1, $2);
|
|
$count_matches++;
|
|
}
|
|
lwarn "cannot parse '$pos' -> '$vers'\n" unless $count_matches;
|
|
} else {
|
|
lwarn "cannot parse '$pos'\n";
|
|
}
|
|
$pos =~ m/((?:log|version)-([0-9]+)-([^,]+)(?:,([0-9]+))?)/ or lwarn "cannot parse position info '$pos'\n";
|
|
return ($1, int($2), $3, defined($4) ? int($4) : -1);
|
|
}
|
|
|
|
sub _get_prev_pos {
|
|
my ($basedir, $nr, $peer) = @_;
|
|
my $path = sprintf("version-%09d-$peer", $nr);
|
|
my $vers = get_link("$basedir/$path", 2);
|
|
if (!defined($vers) || !$vers) {
|
|
# Scarce race.
|
|
# This can happen when new logfiles are present but not yet worked on.
|
|
# Also improves robustness on damaged filesystems.
|
|
# Decrement nr by hand, try, take the first part.
|
|
$path = sprintf("version-%09d-$peer", $nr - 1);
|
|
$vers = get_link("$basedir/$path", 2);
|
|
$vers =~ s/:.*// if defined($vers) && $vers;
|
|
} else {
|
|
# take the last part, pointing to the predecessor versionlink.
|
|
$vers =~ s/^.*://;
|
|
}
|
|
_parse_pos($basedir, $path) if defined($vers) && $vers;
|
|
return $vers;
|
|
}
|
|
|
|
sub _get_common_ancestor {
|
|
my ($total1, $total2) = (0, 0);
|
|
for (;;) {
|
|
my ($basedir, $pos1, $host1, $dep1, $pos2, $host2, $dep2) = @_;
|
|
my ($p1, $nr1, $from1, $len1) = _parse_pos($basedir, $pos1);
|
|
my ($p2, $nr2, $from2, $len2) = _parse_pos($basedir, $pos2);
|
|
if ($p1 eq $p2) {
|
|
# usually no split brain here (only if both path depths are non-zero)
|
|
my $split = ($dep1 && $dep2);
|
|
if (!$split) { # additionally check the corresponding version links
|
|
my $path1 = sprintf("$basedir/version-%09d-$from1", $nr1);
|
|
my $path2 = sprintf("$basedir/version-%09d-$from2", $nr2);
|
|
if (my $vers1 = get_link($path1, 1) and my $vers2 = get_link($path2, 1)) {
|
|
if ($vers1 ne $vers2) {
|
|
$split = 1;
|
|
$total1 += $len1;
|
|
$total2 += $len2;
|
|
}
|
|
}
|
|
}
|
|
return ($p1, $split, $total1, $total2);
|
|
} elsif ($nr1 > $nr2) {
|
|
# just flip arguments
|
|
@_ = ($basedir, $pos2, $host2, $dep2, $pos1, $host1, $dep1);
|
|
($total1, $total2) = ($total2, $total1);
|
|
next;
|
|
} elsif ($nr1 < $nr2) {
|
|
# recursively advance path depth
|
|
$total2 += $len2;
|
|
my $vers2 = _get_prev_pos($basedir, $nr2, $host2);
|
|
return ("", -1, $total1, $total2) if !$vers2;
|
|
@_ = ($basedir, $pos1, $host1, $dep1, $vers2, $host2, $dep2 + 1);
|
|
next;
|
|
} elsif ($from1 ne $from2) {
|
|
# split brain is sure now, but continue computing the common split point
|
|
my $vers1 = _get_prev_pos($basedir, $nr1, $host1);
|
|
return ("", 1, $total1 + $len1, $total2 + $len2) if !$vers1;
|
|
my $vers2 = _get_prev_pos($basedir, $nr2, $host2);
|
|
return ("", 1, $total1 + $len1, $total2 + $len2) if !$vers2;
|
|
my ($res, $split, $t1, $t2) = _get_common_ancestor($basedir, $vers1, $host1, $dep1 + 1, $vers2, $host2, $dep2 + 1);
|
|
return ($res, 1, $t1 + $total1 + $len1, $t2 + $total2 + $len2);
|
|
} elsif ($len1 < $len2) {
|
|
# there may be no split brain (just incomplete replay) depending on path depth
|
|
return ($p1, $dep1, $total1 + $len1, $total2 + $len2);
|
|
} elsif ($len2 < $len1) {
|
|
# dto symmetric
|
|
return ($p2, $dep2, $total1 + $len1, $total2 + $len2);
|
|
}
|
|
lwarn "error in algorithm: $p1, $nr1, $from1, $len1 : $p2, $nr2, $from2, $len2\n";
|
|
return ("", -1, 0, 0);
|
|
}
|
|
}
|
|
|
|
sub get_common_ancestor {
|
|
my ($basedir, $host1, $host2) = @_;
|
|
my $repl1 = get_link("$basedir/replay-$host1", 1);
|
|
my $repl2 = get_link("$basedir/replay-$host2", 1);
|
|
return _get_common_ancestor($basedir, $repl1, $host1, 0, $repl2, $host2, 0);
|
|
}
|
|
|
|
my %detected_splits = ();
|
|
|
|
sub detect_splitbrain {
|
|
my ($res, $do_report) = @_;
|
|
# dynamic programming
|
|
return $detected_splits{$res} if defined($detected_splits{$res});
|
|
my $basedir = "$mars/resource-$res";
|
|
my $ok = 1;
|
|
my @list = lamport_glob("$mars/resource-$res/replay-*");
|
|
my @hosts = map { $_ =~ s:.*/replay-::; $_ } @list;
|
|
foreach my $host1 (@hosts) {
|
|
foreach my $host2 (@hosts) {
|
|
next if $host1 ge $host2;
|
|
my ($point, $split, $size1, $size2);
|
|
for (my $retry = 2; $retry > 0; $retry--) {
|
|
($point, $split, $size1, $size2) = get_common_ancestor($basedir, $host1, $host2);
|
|
last if !$split;
|
|
# Workaround races.
|
|
sleep(0);
|
|
}
|
|
if ($split) {
|
|
$ok = 0;
|
|
if ($do_report) {
|
|
my $age = "";
|
|
if ($point) {
|
|
my $log = "$basedir/$point";
|
|
$log =~ s:,.+::;
|
|
my $stamp = get_link_stamp($log);
|
|
my $vers = $log;
|
|
$vers =~ s:/log-:/version-:;
|
|
my $stamp2 = get_link_stamp($vers);
|
|
# take the minimum
|
|
$stamp = $stamp2 if !$stamp || ($stamp2 && $stamp2 < $stamp);
|
|
$age = " age ~" . seconds2human(mars_time() - $stamp) if $stamp;
|
|
}
|
|
lwarn "SPLIT BRAIN of resource '$res' after logfile '$point'$age\n";
|
|
if ($point) {
|
|
lwarn " hostA = '$host1' logfile_amount='$size1' (" . number2human($size1) . ")\n";
|
|
lwarn " hostB = '$host2' logfile_amount='$size2' (" . number2human($size2) . ")\n";
|
|
}
|
|
} else {
|
|
return $ok;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if ($ok) { # check for duplicate logfiles
|
|
my @logs = lamport_glob("$mars/resource-$res/log-*");
|
|
my $oldnr = -1;
|
|
foreach my $path (sort alphanum_cmp @logs) {
|
|
$path =~ m:/log-([0-9]+):;
|
|
my $nr = $1;
|
|
if ($nr == $oldnr) {
|
|
$ok = 0;
|
|
lwarn "SPLIT BRAIN at resource '$res' detected: duplicate logfile number $nr\n";
|
|
lwarn "hint: first resolve split brain by 'leave-resource' or 'invalidate'\n";
|
|
lwarn "hint: if this does not help, try cleanup via 'log-purge-all'\n";
|
|
lwarn "hint: if this does not help, try 'log-purge-all --force'\n";
|
|
last;
|
|
}
|
|
$oldnr = $nr;
|
|
}
|
|
}
|
|
$detected_splits{$res} = $ok;
|
|
return $ok;
|
|
}
|
|
|
|
sub _mark_path_backward {
|
|
my ($basedir, $pos, $peer, $skip, $jump_peer) = @_;
|
|
my $sum = 0;
|
|
my $base_nr = 0;
|
|
for (;;) {
|
|
my ($p, $nr, $from, $len) = _parse_pos($basedir, $pos);
|
|
last if defined($skip) && $nr < $skip;
|
|
$base_nr = $nr;
|
|
_visit($nr, $peer);
|
|
# When following chains from foreign hosts (e.g. the designated primary),
|
|
# we must jump over to our own chain somewhen, because the lengths of
|
|
# the chains may be different (caused by invalidate & friends).
|
|
if (defined($jump_peer) && $jump_peer ne $peer) {
|
|
my $peer_path = sprintf("$basedir/version-%09d-$peer", $nr);
|
|
my $jump_path = sprintf("$basedir/version-%09d-$jump_peer", $nr);
|
|
my $peer_version = get_link($peer_path, 2);
|
|
my $jump_version = get_link($jump_path, 2);
|
|
if (defined($peer_version) && defined($jump_version) && $peer_version eq $jump_version) {
|
|
$peer = $jump_peer;
|
|
}
|
|
}
|
|
$pos = _get_prev_pos($basedir, $nr, $peer, 1);
|
|
last if !$pos;
|
|
# optionally don't count the last versionlink, pointing into nirvana
|
|
if (defined($skip) && $skip && $nr > 1) {
|
|
my ($p, $nr, $from, $len) = _parse_pos($basedir, $pos);
|
|
last if !$p;
|
|
my $next = _get_prev_pos($basedir, $nr, $peer, 1);
|
|
last if !$next;
|
|
}
|
|
$sum += $len;
|
|
}
|
|
return ($sum, $base_nr);
|
|
}
|
|
|
|
sub _mark_path_forward {
|
|
my ($basedir, $pos, $peer) = @_;
|
|
my @list = ($pos);
|
|
while (@list) {
|
|
my %next_list;
|
|
foreach $pos (@list) {
|
|
my ($p, $nr, $from, $len) = _parse_pos($basedir, $pos);
|
|
my $cand = sprintf("$basedir/version-%09d-$peer", $nr + 1);
|
|
my $vers = get_link($cand, 2);
|
|
next unless defined($vers) && $vers ne "";
|
|
$vers =~ s/^.*://;
|
|
my ($cp, $cnr, $cfrom, $clen) = _parse_pos($basedir, $vers);
|
|
if (int($cnr) == int($nr) && $cfrom eq $from && $clen == $len) {
|
|
$next_list{$cand} = 1;
|
|
}
|
|
}
|
|
@list = keys(%next_list);
|
|
}
|
|
}
|
|
|
|
sub _mark_path_transitive {
|
|
_mark_path_forward(@_);
|
|
_mark_path_backward(@_);
|
|
}
|
|
|
|
sub log_purge_res {
|
|
my ($cmd, $res) = @_;
|
|
lwarn "DANGEROUS OPERATION: $cmd --force on resource '$res'\n" if $force;
|
|
%visited_pos = ();
|
|
my %logs;
|
|
my %start_logs;
|
|
my $basedir = "$mars/resource-$res";
|
|
my $max_retry = 3;
|
|
retry:
|
|
my $start_count = 0;
|
|
my %situation = ();
|
|
foreach my $data (lamport_glob("$basedir/{data,replay}-*")) {
|
|
$data =~ m:/(data|replay)-(.+):;
|
|
my $peer = $2;
|
|
my $replay = "$basedir/replay-$peer";
|
|
my $target = get_link($replay, 1);
|
|
next unless $target;
|
|
lprint "found replay link '$replay' -> '$target'\n";
|
|
# only the logfile number is relevant, not the positions
|
|
$target =~ s/,.*//;
|
|
$situation{$replay} = $target;
|
|
$start_logs{$target}++;
|
|
$start_count++;
|
|
_mark_path_transitive($basedir, $target, $peer);
|
|
}
|
|
if (!$start_count) {
|
|
lprint "Resource '$res' contains no valid information - there is nothing to purge\n";
|
|
return;
|
|
}
|
|
my %to_delete = ();
|
|
foreach my $file (lamport_glob("$basedir/version-*")) {
|
|
$file =~ m:/(version-([0-9]+)-([^,]+)): or ldie "bad path '$file'\n";
|
|
my $cand = $1;
|
|
my $nr = $2;
|
|
my $from = $3;
|
|
lprint "checking '$cand'\n";
|
|
my $vers = get_link($file, 1);
|
|
$vers =~ m/(log-[0-9]+-[^,:]+)/;
|
|
my $log = $1;
|
|
lprint " corresponding logfile is '$log'\n";
|
|
if (_is_visited($nr, $from)) {
|
|
lprint " ok '$cand'\n";
|
|
$logs{$log}++;
|
|
next;
|
|
}
|
|
if (!$force && $from ne $host) {
|
|
lprint " skipping foreign object '$cand'\n";
|
|
$logs{$log}++;
|
|
next;
|
|
}
|
|
lwarn "deleting foreign object from peer '$from' because you said --force\n" if $from ne $host;
|
|
$to_delete{$file}++;
|
|
}
|
|
foreach my $file (lamport_glob("$basedir/log-*")) {
|
|
$file =~ m:/(log-[0-9]+-(.*)): or ldie "bad path '$file'\n";
|
|
my $log = $1;
|
|
my $from = $2;
|
|
lprint "checking '$log'\n";
|
|
if ($logs{$log}) {
|
|
lprint " ok '$log'\n";
|
|
$logs{$log} = -1;
|
|
next;
|
|
}
|
|
if ($start_logs{$log}) {
|
|
lprint " ok start '$log'\n";
|
|
$logs{$log} = -1;
|
|
next;
|
|
}
|
|
if (!$force && is_link_recent($log)) {
|
|
lprint " skipping recent object '$log'\n";
|
|
next;
|
|
}
|
|
if (!$force && $from ne $host) {
|
|
lprint " skipping foreign object '$log'\n";
|
|
next;
|
|
}
|
|
lwarn "deleting foreign object from peer '$from' because you said --force\n" if $from ne $host;
|
|
$to_delete{$file}++;
|
|
}
|
|
# check for any races in the initial situation
|
|
my $nr_races = 0;
|
|
foreach my $replay (sort keys(%situation)) {
|
|
my $old_situation = $situation{$replay};
|
|
my $target = get_link($replay, 1);
|
|
# only the logfile number is relevant, not the positions
|
|
$target =~ s/,.*//;
|
|
if (!$target || $target ne $old_situation) {
|
|
lwarn "Race on '$replay' -> '$target' instead of '$old_situation'\n";
|
|
$nr_races++;
|
|
}
|
|
}
|
|
if ($nr_races) {
|
|
if ($max_retry-- > 0) {
|
|
lwarn "Restarting due to $nr_races races\n";
|
|
sleep(1);
|
|
goto retry;
|
|
}
|
|
ldie "Detected $nr_races, the situation is not stable\n";
|
|
}
|
|
# POINT OF NO RETURN
|
|
foreach my $file (sort keys(%to_delete)) {
|
|
_create_delete($file);
|
|
}
|
|
my $count = 0;
|
|
foreach my $log (sort alphanum_cmp keys(%logs)) {
|
|
my $nr = $logs{$log};
|
|
next if $nr < 0 || any_exists("$basedir/$log");
|
|
lprint_stderr "info: logfile '$log' is referenced ($nr), but not present.\n";
|
|
$count++;
|
|
}
|
|
if ($count) {
|
|
lprint_stderr " Unreferenced logfiles are not necessarily bad.\n";
|
|
lprint_stderr " They can regularly appear after 'leave-resource',\n";
|
|
lprint_stderr " or 'invalidate', or after emergency mode,\n";
|
|
lprint_stderr " or after similar operations.\n";
|
|
}
|
|
finish_links();
|
|
}
|
|
|
|
sub err_purge_res {
|
|
my ($cmd, $res) = @_;
|
|
foreach my $leftlink (lamport_glob("$mars/{,resource-$res/}actual-$host/msg-err-*")) {
|
|
lprint " deleting '$leftlink'\n" if $verbose;
|
|
_create_delete($leftlink);
|
|
}
|
|
finish_links();
|
|
}
|
|
|
|
sub try_to_avoid_splitbrain {
|
|
my ($cmd, $res, $old_primary) = @_;
|
|
my $old_timeout = $timeout;
|
|
$old_primary = "" if $old_primary eq "(none)";
|
|
if (!detect_splitbrain($res, 0)) {
|
|
lwarn "ATTENTION: you are starting a non-forced primary switchover in a split brain situation.\n";
|
|
lwarn "ATTENTION: that's no good idea.\n";
|
|
lwarn "ATTENTION: I will continue to do what you want.\n";
|
|
lwarn "ATTENTION: But you are responsible for the consequences.\n";
|
|
return 0;
|
|
}
|
|
# now try to prevent producing a _new_ split brain situation....
|
|
my @host_list = lamport_glob("$mars/resource-$res/replay-*");
|
|
return 0 if scalar(@host_list) < 2;
|
|
my ($min, $max) = get_minmax_versions($res);
|
|
my $vers_glob = sprintf("$mars/resource-$res/version-%09d-*", $max);
|
|
my $ok = 1;
|
|
my $replay_err_path = "$mars/resource-$res/actual-$host/msg-err-replay-stop";
|
|
my $replay_err = get_link($replay_err_path, 1);
|
|
if ($replay_err && $replay_err ne "OK") {
|
|
my @stat = lstat($replay_err_path);
|
|
if ($stat[9] + $window >= mars_time()) {
|
|
my $msg = _get_text($replay_err);
|
|
lwarn "cannot guarantee split brain avoidance: $msg\n";
|
|
return 0;
|
|
}
|
|
}
|
|
my $emergency_path = "$mars/resource-$res/actual-$host/has-emergency";
|
|
my $emergency = get_link($emergency_path, 1);
|
|
if ($emergency) {
|
|
ldie "emergency mode $emergency has been entered locally: handover is not possible. Either free some space in $mars/, or use --force to use a potentially outdated version.\n";
|
|
}
|
|
my $primary = _get_designated_primary($res, -1);
|
|
if ($primary eq "(none)") {
|
|
# try to determine the old primary when unique
|
|
my $glob_logs = sprintf("$mars/resource-$res/log-%09d-*", $max);
|
|
my @candidates = lamport_glob($glob_logs);
|
|
if (scalar(@candidates) == 1) {
|
|
my $log_path = pop @candidates;
|
|
if ($log_path =~ m:/log-[0-9]+-(.+)$:) {
|
|
$primary = $1;
|
|
lprint "Using last primary '$primary' as a substitute.\n";
|
|
}
|
|
}
|
|
}
|
|
if ($primary && $primary eq $host) {
|
|
lprint "Switching back to last primary.\n";
|
|
return 0;
|
|
}
|
|
# if the old primary is known, we can ignore all other / unrelated hosts
|
|
if ($primary && $primary ne $host && $primary ne "(none)") {
|
|
my $path_p = "$mars/resource-$res/replay-$primary";
|
|
my $path_h = "$mars/resource-$res/replay-$host";
|
|
my $replay_p = get_link($path_p);
|
|
my $replay_h = get_link($path_h);
|
|
my $p_path = sprintf("$mars/resource-$res/version-%09d-%s", $max, $primary);
|
|
my $h_path = sprintf("$mars/resource-$res/version-%09d-%s", $max, $host);
|
|
my $p_vers = get_link($p_path, 1);
|
|
my $h_vers = get_link($h_path, 1);
|
|
if (!$p_vers || !$h_vers || $p_vers ne $h_vers) {
|
|
# Safeguard against artificial or missing version links
|
|
# (e.g. after crashes etc):
|
|
# when replay links are equal, we are safe.
|
|
my $stamp_p = get_link_stamp($path_p);
|
|
my $stamp_h = get_link_stamp($path_h);
|
|
my $p_stamp = get_link_stamp($p_path);
|
|
my $stable_stamp = mars_time() - $window / 4;
|
|
if ($replay_p eq $replay_h &&
|
|
$p_vers &&
|
|
$p_stamp < $stable_stamp &&
|
|
$stamp_p < $stable_stamp && $stamp_h < $stable_stamp) {
|
|
# self-healing the versionlink
|
|
lwarn "Correcting the versionlink '$h_path' from '$h_vers' to '$p_vers'\n";
|
|
set_link($p_vers, $h_path);
|
|
finish_links();
|
|
} else {
|
|
$ok = 0;
|
|
}
|
|
} elsif (!$replay_p || !$replay_h || $replay_p ne $replay_h) {
|
|
$ok = 0;
|
|
}
|
|
} else {
|
|
# old primary is unknown: we have no chance, other than comparing _all_ versions.
|
|
my @versions = lamport_glob($vers_glob);
|
|
my $first = get_link(shift @versions);
|
|
while (@versions) {
|
|
my $next = get_link(shift @versions);
|
|
if ($next ne $first) {
|
|
$ok = 0;
|
|
}
|
|
}
|
|
}
|
|
return 0 if $ok;
|
|
lprint "Trying to avoid split brain for $timeout s: logfile update not yet completed.\n";
|
|
my $tpl = get_macro("replinfo");
|
|
my $new_situation = eval_macro($cmd, $res, $tpl, @_);
|
|
print $new_situation;
|
|
# condition not met
|
|
return 1;
|
|
}
|
|
|
|
sub get_size {
|
|
my $arg = shift;
|
|
my $orig_arg = $arg;
|
|
# Do what I mean: when given a device, take its actual size.
|
|
if (-b $arg) {
|
|
my $mangled = $arg;
|
|
$mangled =~ s:/dev/::;
|
|
$mangled =~ s:/:!:g;
|
|
my $path = "/sys/block/$mangled/size";
|
|
$arg = `cat $path` * 512 if -r $path;
|
|
}
|
|
if ($arg !~ m/^([0-9]+(?:\.[0-9]*)?)([kmgtp]?)$/i) {
|
|
ldie "Size argument '$arg' must be a number, optionally followed by a suffix [kKmMgGtTpP]. Lowercase = multiples of 1000, Uppercase = multiples of 1024.\n";
|
|
}
|
|
my $mod = $2 || "";
|
|
$arg = $1;
|
|
$_ = $mod;
|
|
SWITCH: {
|
|
/^$/ and last SWITCH;
|
|
/^k$/ and $arg *= 1000, last SWITCH;
|
|
/^m$/ and $arg *= 1000 * 1000, last SWITCH;
|
|
/^g$/ and $arg *= 1000 * 1000 * 1000, last SWITCH;
|
|
/^t$/ and $arg *= 1000 * 1000 * 1000 * 1000, last SWITCH;
|
|
/^p$/ and $arg *= 1000 * 1000 * 1000 * 1000 * 1000, last SWITCH;
|
|
/^K$/ and $arg *= 1024, last SWITCH;
|
|
/^M$/ and $arg *= 1024 * 1024, last SWITCH;
|
|
/^G$/ and $arg *= 1024 * 1024 * 1024, last SWITCH;
|
|
/^T$/ and $arg *= 1024 * 1024 * 1024 * 1024, last SWITCH;
|
|
/^P$/ and $arg *= 1024 * 1024 * 1024 * 1024 * 1024, last SWITCH;
|
|
ldie "bad unit suffix '$mod'";
|
|
}
|
|
ldie "size argument '$orig_arg' evaluating to '$arg' is not a multiple of 4K = 4096\n" if ($arg % 4096) != 0;
|
|
return $arg;
|
|
}
|
|
|
|
# DEPRECATED
|
|
#
|
|
# TST NOTE: avoid calling this function. As such it is conceptually wrong,
|
|
# because during split-brain situations, there exists a _set_ of non-unique
|
|
# primaries. I want to remove this function, but I currently can't because
|
|
# other internal software at 1&1 is depending on it.
|
|
|
|
#
|
|
# Get actual primary node from links below actual-*/ subdirs
|
|
#
|
|
sub _get_actual_primary {
|
|
my ($res) = @_;
|
|
# TST: Presence of local device takes precedence over anything else.
|
|
# This tries to workaround the most important special case of
|
|
# split-brain situations, but cannot fix the problem exhaustively.
|
|
llog "DEPRECATED: you are trying to uniquely identify an actual primary hostname (as seen from host $host resource $res), but this is conceptually wrong because in split-brain situations there may exist multiple ones. Use view-is-primary instead. That would be safe.\n";
|
|
return $host if device_exists($res);
|
|
# The following old code is CONCEPTUALLY WRONG for split-brain situations (see NOTE above)
|
|
my @primary_links = lamport_glob("$mars/resource-$res/actual-*/is-primary");
|
|
my $primary;
|
|
foreach my $link (@primary_links) {
|
|
if (my $val = get_link($link)) {
|
|
$primary = ($link =~ qr%.*actual-([^/]+)/is-primary%)[0];
|
|
last;
|
|
# Note: if there are more than one 'is-primary' links (an insane state anyway),
|
|
# the first 'is-primary' link is selected. Other links are ignored.
|
|
}
|
|
}
|
|
return $primary;
|
|
}
|
|
|
|
my %old_primary;
|
|
|
|
sub _get_designated_primary {
|
|
my ($res, $unchecked) = @_;
|
|
my $fallback_to_old = 0;
|
|
if (defined($unchecked) && $unchecked == -1) {
|
|
$fallback_to_old = 1;
|
|
$unchecked = 1;
|
|
}
|
|
my $val = get_link("$mars/resource-$res/primary", $unchecked);
|
|
if (!defined($val) || !$val || $val eq "(none)") {
|
|
if ($fallback_to_old) {
|
|
$val = $old_primary{$res} if defined($old_primary{$res});
|
|
if (!defined($val) || !$val || $val eq "(none)") {
|
|
my $last_lnk = "$mars/resource-$res/userspace/last-primary";
|
|
$val = get_link($last_lnk, 2);
|
|
}
|
|
}
|
|
} else {
|
|
$old_primary{$res} = $val;
|
|
}
|
|
return $val;
|
|
}
|
|
|
|
sub is_actual_primary {
|
|
my ($cmd, $res, $peer) = @_;
|
|
$peer = $host unless (defined($peer) && $peer);
|
|
my $is_primary = get_link("$mars/resource-$res/actual-$peer/is-primary");
|
|
# notice: device presence _must not_ be used anymore.
|
|
return $is_primary;
|
|
}
|
|
|
|
sub __conv_tv {
|
|
my ($tv_sec, $tv_nsec) = @_;
|
|
if (defined($tv_nsec)) {
|
|
$tv_nsec = ".$tv_nsec";
|
|
} else {
|
|
$tv_nsec = "";
|
|
}
|
|
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(int($tv_sec));
|
|
return "$tv_sec$tv_nsec" unless defined($sec);
|
|
return sprintf("%04d-%02d-%02d %02d:%02d:%02d%s", $year+1900, $mon + 1, $mday, $hour, $min, $sec, $tv_nsec);
|
|
}
|
|
|
|
sub __conv_errno {
|
|
my ($txt, $error) = @_;
|
|
return "$txt$error" if !defined($error) || ($error <= 0);
|
|
$! = $error;
|
|
my $res = "${txt}[$!]";
|
|
$! = 0;
|
|
return $res;
|
|
}
|
|
|
|
sub _replace_timestamps {
|
|
my ($txt, $omit_nsec) = @_;
|
|
if (defined($omit_nsec) && $omit_nsec) {
|
|
$txt =~ s:([0-9]{9,99})\.([0-9]{9}):__conv_tv($1):ge;
|
|
} else {
|
|
$txt =~ s:([0-9]{9,99})\.([0-9]{9}):__conv_tv($1,$2):ge;
|
|
}
|
|
$txt =~ s:((error|status)\s*=?\s*-)([0-9]+):__conv_errno($1,$3):ge;
|
|
return $txt;
|
|
}
|
|
|
|
sub _get_text {
|
|
my ($glob, $regex, $do_print, $get_count) = @_;
|
|
my $text = "";
|
|
my $count = 0;
|
|
foreach my $path (lamport_glob($glob)) {
|
|
open(IN, "<", $path) or next;
|
|
while (my $line = <IN>) {
|
|
# use regex e.g. for fetching only errors and warnings
|
|
if (!$regex || $line =~ $regex) {
|
|
$line = _replace_timestamps($line);
|
|
$count++;
|
|
if ($do_print) {
|
|
print $line;
|
|
llog($line);
|
|
} else {
|
|
$text .= $line;
|
|
}
|
|
}
|
|
}
|
|
close(IN);
|
|
}
|
|
return $count if defined($get_count) && $get_count;
|
|
return $text;
|
|
}
|
|
|
|
my %global_error_texts;
|
|
|
|
sub get_error_text {
|
|
my ($cmd, $res) = @_;
|
|
my $path = "$mars/resource-$res/logstatus-$host.status";
|
|
if (defined($global_error_texts{$path})) {
|
|
return $global_error_texts{$path};
|
|
}
|
|
my $text = _get_text($path, "m/^(err|warn)/i", 0);
|
|
$global_error_texts{$path} = $text;
|
|
return $text;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# helpers
|
|
|
|
sub _trigger {
|
|
my $code = shift || 1;
|
|
if ($dry_run) {
|
|
lprint "DRY_RUN: would execute trigger '$code'\n";
|
|
return;
|
|
}
|
|
lprint "Trigger '$code'\n" if $verbose;
|
|
system("(echo -n $code > /proc/sys/mars/trigger) >/dev/null 2>&1");
|
|
}
|
|
|
|
# Please do not misuse this.
|
|
# Use it only _exceptionally_ for creation of new memberships & co.
|
|
# Always prefer the PULL PRINCIPLE where possible.
|
|
sub _push_link {
|
|
my ($peer, $src, $dst) = @_;
|
|
if (!$peer || !$src || !$dst) {
|
|
lwarn "Bad push_link args '@_'\n";
|
|
return;
|
|
}
|
|
if ($peer eq $real_host) {
|
|
set_link($src, $dst);
|
|
return;
|
|
}
|
|
my $code = "p $peer $src $dst";
|
|
lprint "Pushing link '$dst' -> '$src' to peer '$peer'\n" if $verbose;
|
|
_trigger($code);
|
|
}
|
|
# DTO do not abuse
|
|
sub _push_link_foreign {
|
|
my ($peer, $peer_ip, $src, $dst) = @_;
|
|
if (!$peer || !$src || !$dst) {
|
|
lwarn "Bad push_link args '@_'\n";
|
|
return;
|
|
}
|
|
if ($peer eq $real_host) {
|
|
set_link($src, $dst);
|
|
return;
|
|
}
|
|
$peer_ip = _get_ip($peer) unless $peer_ip;
|
|
my $code = "P $peer $peer_ip $src $dst";
|
|
lprint "Pushing link '$dst' -> '$src' to peer '$peer' '$peer_ip'\n" if $verbose;
|
|
_trigger($code);
|
|
}
|
|
|
|
sub _push_check {
|
|
my ($peer, $peer_ip, $path) = @_;
|
|
return if $peer eq $real_host;
|
|
if (!$peer || !$path) {
|
|
lwarn "Bad push_check args '@_'\n";
|
|
return;
|
|
}
|
|
$peer_ip = _get_ip($peer) unless $peer_ip;
|
|
my $code = "c $peer $peer_ip $path";
|
|
lprint "Pushing check '$path' to peer '$peer' ip='$peer_ip'\n" if $verbose;
|
|
_trigger($code);
|
|
}
|
|
|
|
sub _push_fetch {
|
|
my ($peer, $peer_ip, $dir_path) = @_;
|
|
if (!$peer || !$dir_path) {
|
|
lwarn "Bad push_fetch args '@_'\n";
|
|
return;
|
|
}
|
|
$peer_ip = _get_ip($peer) unless $peer_ip;
|
|
my $code = "f $peer $peer_ip $dir_path";
|
|
lprint "Trigger fetch '$dir_path' from peer '$peer' ip='$peer_ip'\n" if $verbose;
|
|
lprint "$code\n" if $verbose > 1;
|
|
_trigger($code);
|
|
}
|
|
|
|
sub _switch {
|
|
my ($cmd, $res, $path, $on) = @_;
|
|
my $src = $on ? "1" : "0";
|
|
$path = correct_path($path);
|
|
my $old = get_link($path, 1);
|
|
if ($old && $old eq $src) {
|
|
lprint "${cmd} on resource $res is already activated\n" if $cmd;
|
|
return;
|
|
}
|
|
set_link($src, $path);
|
|
lprint "successfully started ${cmd} on resource $res\n" if $cmd;
|
|
}
|
|
|
|
sub _writable {
|
|
my ($path, $on) = @_;
|
|
my $oldmode = (lstat $path)[2] & 0700;
|
|
my $newmode = $on ? $oldmode | 0200 : $oldmode & ~0200;
|
|
lprint "chmod '$path' $oldmode $newmode";
|
|
chmod($newmode, $path) == 1 or ldie "cannot chmod '$path'\n";
|
|
}
|
|
|
|
sub _get_ip {
|
|
my ($peer) = @_;
|
|
$peer = $host unless $peer;
|
|
check_id($peer);
|
|
# Dynamic programming: this also applies to --ip-$peer=$peer_ip
|
|
return $known_ips{$peer} if $known_ips{$peer};
|
|
# Normally, everything should be in /mars/ips/ip-*
|
|
my $ip_path = "$mars/ips/ip-$peer";
|
|
if (my $from_link = get_link($ip_path, 2)) {
|
|
lprint_stderr "Using IP '$from_link' from '$ip_path'\n" if $verbose;
|
|
$known_ips{$peer} = $from_link;
|
|
return $from_link;
|
|
}
|
|
# Try any probe data
|
|
my $probe_path = "$mars/probe-$real_host/$mars/ips/ip-$peer";
|
|
if (my $probe_link = get_link($probe_path, 2)) {
|
|
lprint_stderr "Using PROBE IP '$probe_link' from '$probe_path'\n" if $verbose;
|
|
$known_ips{$peer} = $probe_link;
|
|
return $probe_link;
|
|
}
|
|
# Try the backups in reverse order
|
|
my $backup_glob = "$mars/backups-*/ips-backup/ip-$peer";
|
|
foreach my $backup (sort reverse_cmp glob($backup_glob)) {
|
|
my $check = get_link($backup, 2);
|
|
if ($check) {
|
|
lprint_stderr "Using BACKUP IP '$check' from '$backup'\n" if $verbose;
|
|
$known_ips{$peer} = $check;
|
|
return $check;
|
|
}
|
|
}
|
|
# Try /usr/bin/host
|
|
my $answer = `/usr/bin/host -t A $peer`;
|
|
if ($answer && $answer =~ m/([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)/) {
|
|
my $addr = $1;
|
|
lprint_stderr "DNS query for '$peer' found IPv4 address '$addr'\n";
|
|
$known_ips{$peer} = $addr;
|
|
return $addr;
|
|
}
|
|
ldie "Cannot determine foreign IP for peer '$peer'\n" if $peer ne $real_host;
|
|
chomp (my @info = `/sbin/ip addr show`);
|
|
my $interface = "";
|
|
foreach my $line (@info) {
|
|
$interface = $1 if $line =~ m#^[0-9]+:\s([a-zA-Z_0-9]+):#;
|
|
next if $interface eq "lo";
|
|
if ($line =~ m#\sinet\s(\d+\.\d+\.\d+\.\d+)#) {
|
|
my $from_if = $1;
|
|
lprint_stderr "Using IP '$from_if' from interface '$interface'\n";
|
|
$known_ips{$peer} = $from_if;
|
|
return $from_if;
|
|
}
|
|
}
|
|
ldie "Cannot determine my own IP address ($real_host)\n";
|
|
}
|
|
|
|
sub _fake_versionlink {
|
|
my ($cmd, $res, $nr_links, $log_nr, $primary) = @_;
|
|
my $basedir = "$mars/resource-$res";
|
|
my $make_count = 0;
|
|
for (my $rounds = $nr_links; $rounds > 0 && $log_nr > 0; $rounds--) {
|
|
my $new_version = sprintf("$basedir/version-%09d-$host", $log_nr);
|
|
my $pri_version = sprintf("$basedir/version-%09d-$primary", $log_nr);
|
|
if ($primary eq $host) {
|
|
ldie "Cannot fake my own version link '$new_version'\n";
|
|
}
|
|
my $retry = 3;
|
|
for (;;) {
|
|
my $pri_link = get_link($pri_version, 1);
|
|
if ($pri_link) {
|
|
lprint "Creating new versionlink '$new_version' -> '$pri_link'\n";
|
|
set_link($pri_link, $new_version);
|
|
$make_count++;
|
|
last;
|
|
}
|
|
lwarn "Primary versionlink '$pri_version' does not exist\n";
|
|
return 0 if $retry-- < 0;
|
|
# During races with log-rotate, the appearace may be delayed
|
|
wait_cluster($cmd, $res, $primary, 0);
|
|
}
|
|
$log_nr--;
|
|
}
|
|
lwarn "Cannot create faked versionlink\n" if !$make_count;
|
|
return $make_count;
|
|
}
|
|
|
|
sub _set_replaylink {
|
|
my ($cmd, $res, $log_nr, $primary, $msg) = @_;
|
|
$msg = " -- THIS IS EXTREMELY RISKY -- any inconsistencies are on your own!" unless defined($msg);
|
|
ldie "no designated primary defined\n" unless ($primary && $primary ne "(none)");
|
|
my $basedir = "$mars/resource-$res";
|
|
my $rep_path = "$basedir/replay-$host";
|
|
my $rep_val = sprintf("log-%09d-$primary,0,0", $log_nr);
|
|
lprint "creating new replaylink '$rep_path' -> '$rep_val'\n";
|
|
set_link($rep_val, $rep_path);
|
|
_push_link($primary, $rep_val, $rep_path) if $primary ne $host;
|
|
my $ok = _fake_versionlink($cmd, $res, 2, $log_nr, $primary);
|
|
set_link("$log_nr$msg", "$basedir/skip-check-$host");
|
|
return $ok;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# lowlevel tools
|
|
|
|
sub lowlevel_ls_host_ips {
|
|
my ($cmd) = @_;
|
|
for my $path (lamport_glob("$mars/ips/ip-*")) {
|
|
$path =~ m:/ip-(.*):;
|
|
my $peer = $1;
|
|
my $ip = get_link($path, 1);
|
|
lprint "$peer $ip\n";
|
|
}
|
|
}
|
|
|
|
sub lowlevel_set_host_ip {
|
|
my ($cmd, $peer, $ip) = @_;
|
|
check_id($peer);
|
|
if (!$ip) {
|
|
$ip = _get_ip($peer);
|
|
}
|
|
my $path = "$mars/ips/ip-$peer";
|
|
my $old = get_link($path, 2) || "";
|
|
lprint "Set host '$peer' IP from '$old' to '$ip'\n";
|
|
set_link($ip, $path);
|
|
foreach my $peer2 (get_total_peers()) {
|
|
next if $peer2 eq $peer;
|
|
_push_link($peer2, $ip, $path);
|
|
}
|
|
}
|
|
|
|
sub lowlevel_delete_host {
|
|
my ($cmd, $peer) = @_;
|
|
check_id($peer, 1);
|
|
my $path = "$mars/ips/ip-$peer";
|
|
my $old = get_link($path);
|
|
lprint "Removing host '$peer' old IP '$old'\n";
|
|
if (!$compat_deletions) {
|
|
foreach my $peer2 (get_total_peers()) {
|
|
next if $peer2 eq $peer;
|
|
_push_link($peer2, ".deleted", $path);
|
|
}
|
|
}
|
|
_create_delete($path);
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# commands
|
|
|
|
sub ignore_cmd {
|
|
my ($cmd, $res) = @_;
|
|
lprint "ignoring command '$cmd' on resource '$res'\n";
|
|
}
|
|
|
|
sub senseless_cmd {
|
|
my ($cmd, $res) = @_;
|
|
lprint "command '$cmd' makes no sense with MARS (ignoring on resource '$res')\n";
|
|
}
|
|
|
|
sub forbidden_cmd {
|
|
my ($cmd, $res) = @_;
|
|
ldie "command '$cmd' on resource '$res' cannot be used with MARS (migth affect too many hosts, lead to undesired consequences)\n";
|
|
}
|
|
|
|
sub nyi_cmd {
|
|
my ($cmd, $res) = @_;
|
|
ldie "command '$cmd' on resource '$res' is not yet implemented\n";
|
|
}
|
|
|
|
sub is_module_loaded {
|
|
return -d "/proc/sys/mars";
|
|
}
|
|
|
|
sub set_connect_pref_list {
|
|
my ($cmd, $res, $list) = @_;
|
|
check_res_member($cmd, $res);
|
|
my $dst = "$mars/resource-$res/connect-$host";
|
|
if ($cmd =~ m/^get-/) {
|
|
my $value = get_link($dst);
|
|
lprint "$value\n";
|
|
return;
|
|
}
|
|
check_id_list($list, 1);
|
|
set_link($list, $dst);
|
|
}
|
|
|
|
sub set_defaults {
|
|
my ($cmd, $res, $value) = @_;
|
|
my $key = $cmd;
|
|
$key =~ s/set-(global-)?//;
|
|
my $path = "$mars/defaults/$key";
|
|
if ($res) {
|
|
check_res_member($cmd, $res) if $res;
|
|
$path = "$mars/resource-$res/defaults/$key";
|
|
}
|
|
if ($value) {
|
|
my $flags = txt2featuresflags($cmd, $value);
|
|
lprint "Set '$path' to '$flags'\n";
|
|
set_link($flags, $path);
|
|
} else {
|
|
lprint "Delete '$path'\n";
|
|
_create_delete($path);
|
|
}
|
|
}
|
|
|
|
sub emergency_limit_res {
|
|
my ($cmd, $res, $value) = @_;
|
|
my $dst = "$mars/resource-$res/todo-$host/emergency-limit";
|
|
if ($cmd =~ m/^get-/) {
|
|
my $value = get_link($dst);
|
|
lprint "$value\n";
|
|
return;
|
|
}
|
|
ldie "percent argument '$value' isn't numeric\n" unless $value =~ m/^[0-9]+$/;
|
|
ldie "percent argument '$value' isn't between 0 and 100%\n" unless ($value >= 0 && $value <= 100);
|
|
set_link($value, $dst);
|
|
}
|
|
|
|
sub set_link_cmd {
|
|
my $cmd = shift;
|
|
for (;;) {
|
|
my $src = shift || last;
|
|
if ($cmd =~ m/^get-/) {
|
|
my $value = get_link($src);
|
|
lprint "$value\n";
|
|
next;
|
|
}
|
|
my $dst = shift || ldie "you did not supply a symlink destination for source '$src'\n";
|
|
ldie "symlink target '$dst' is not an absolute path\n" unless $dst =~ m:^/:;
|
|
check_userspace($dst);
|
|
my $dir = `dirname "$dst"` or ldie "path '$dst' has no dirname\n";
|
|
chomp $dir;
|
|
ldie "directory '$dir' does not exist\n" unless -d $dir;
|
|
set_link($src, $dst);
|
|
}
|
|
}
|
|
|
|
sub set_sync_limit_value {
|
|
my ($cmd, $value) = @_;
|
|
my $todo_dir = "$mars/defaults-$host";
|
|
ldie "directory '$todo_dir' does not exist\n" unless -d $todo_dir;
|
|
my $dst = "$todo_dir/sync-limit";
|
|
if ($cmd =~ m/^get-/) {
|
|
my $value = get_link($dst);
|
|
lprint "$value\n";
|
|
return;
|
|
}
|
|
set_link($value, $dst);
|
|
}
|
|
|
|
sub create_uuid {
|
|
my ($cmd) = @_;
|
|
my $old_uuid = get_link("$mars/uuid", 2);
|
|
ldie "Cluster was already created with uuid='$old_uuid'. " .
|
|
"For safety reasons, no override is possible at marsadm level.\n" if $old_uuid;
|
|
my $uuid = `echo -n \$(hostname) \$(date)`;
|
|
set_link($uuid, "$mars/uuid");
|
|
finish_links(); # opportunity for errors => don't continue
|
|
lprint "New cluster UUID is '$uuid'\n";
|
|
}
|
|
|
|
sub _create_dirs {
|
|
my ($cmd) = @_;
|
|
system("mkdir $mars/ips") unless -d "$mars/ips";
|
|
system("mkdir $mars/userspace") unless -d "$mars/userspace";
|
|
system("mkdir $mars/defaults") unless -d "$mars/defaults";
|
|
system("mkdir $mars/defaults-$host") unless -d "$mars/defaults-$host";
|
|
system("mkdir $mars/todo-global") unless -d "$mars/todo-global";
|
|
mkdir("$mars/actual-$host") unless -d "$mars/actual-$host";
|
|
set_link($marsadm_version, "$mars/actual-$host/marsadm-version");
|
|
}
|
|
|
|
sub _create_cluster {
|
|
my ($cmd) = @_;
|
|
ldie "The $mars directory does not exist.\n" unless -d $mars;
|
|
my $ip = _get_ip($host);
|
|
_create_dirs($cmd);
|
|
create_uuid(@_) if $cmd eq "create-cluster";
|
|
set_link("0", "$mars/defaults-$host/sync-limit");
|
|
set_link("(none)", "$mars/defaults-$host/sync-pref-list");
|
|
set_link($ip, "$mars/ips/ip-$host");
|
|
set_link("1", "$mars/todo-global/deleted-$host");
|
|
}
|
|
|
|
sub create_cluster {
|
|
my ($cmd, $peer) = @_;
|
|
ldie "cluster is already created\n" if !$force && -d "$mars/ips";
|
|
ldie "mars module is loaded, please unload first\n" if is_module_loaded();
|
|
_create_cluster(@_);
|
|
}
|
|
|
|
sub join_cluster {
|
|
my ($cmd, $peer, $peer_ip) = @_;
|
|
ldie "Cannot join myself (peer='$peer', host='$host')\n" if $peer eq $host;
|
|
ldie "Directory $mars is missing\n" unless -d $mars;
|
|
if (lamport_glob("$mars/resource-*")) {
|
|
lwarn "DANGER: some resources already exist!\n";
|
|
ldie "DANGER: If you are sure that no resource clash is possible, re-invoke $cmd with '--force' option\n" unless $force;
|
|
}
|
|
_create_dirs($cmd);
|
|
$peer_ip = _get_ip($peer) unless $peer_ip;
|
|
# try new join method
|
|
if (is_module_loaded()) {
|
|
my $ip = _get_ip($host);
|
|
lprint "MARS kernel module is loaded, trying the new $cmd method.\n";
|
|
my $old_uuid = get_link("$mars/uuid", 2);
|
|
if (!$old_uuid || $old_uuid eq "(any)") {
|
|
mkdir("$mars/ips") unless -d "$mars/ips";
|
|
mkdir("$mars/todo-global") unless -d "$mars/todo-global";
|
|
mkdir("$mars/defaults-$host") unless -d "$mars/defaults-$host";
|
|
mkdir("$mars/actual-$host") unless -d "$mars/actual-$host";
|
|
set_link("(any)", "$mars/uuid") unless $old_uuid;
|
|
set_link($ip, "$mars/ips/ip-$host");
|
|
my $peer_known = link_exists("$mars/ips/ip-$peer");
|
|
if (!$peer_known) {
|
|
set_link($peer_ip, "$mars/ips/ip-$peer");
|
|
}
|
|
# make PRELIMINARY links, timestamp is 1 second after the Big Bang (1970)
|
|
finish_links(1);
|
|
_trigger();
|
|
lprint "Update local '$real_host' information\n";
|
|
update_cluster($cmd, "all", $real_host);
|
|
if ($peer_known) {
|
|
lprint "Update peer '$peer' information\n";
|
|
update_cluster($cmd, "all", $peer);
|
|
}
|
|
# write again, this time with current mars_time()
|
|
set_link($ip, "$mars/ips/ip-$host");
|
|
set_link("0", "$mars/defaults-$host/sync-limit");
|
|
set_link("(none)", "$mars/defaults-$host/sync-pref-list");
|
|
my $deleted_path = "$mars/todo-global/deleted-$host";
|
|
set_link("1", $deleted_path) unless link_exists($deleted_path);
|
|
finish_links();
|
|
_push_link($peer, $ip, "$mars/ips/ip-$host");
|
|
lprint "Checking uuid\n";
|
|
my $round = 0;
|
|
my $new_uuid = get_link("$mars/uuid", 1);
|
|
while ($round++ < 5) {
|
|
_trigger(3);
|
|
sleep(3);
|
|
lprint "... update from $peer round $round\n";
|
|
_reset_resources();
|
|
update_cluster($cmd, "all", $peer);
|
|
$new_uuid = get_link("$mars/uuid");
|
|
last if ($new_uuid && $new_uuid ne "(any)");
|
|
_push_link($peer, $ip, "$mars/ips/ip-$host");
|
|
}
|
|
if ($new_uuid && $new_uuid ne "(any)") {
|
|
lprint "Successfully joined cluster, uuid='$new_uuid'\n";
|
|
return;
|
|
} else {
|
|
lwarn "Unable to determine the remote cluster uuid.\n";
|
|
}
|
|
} else {
|
|
lwarn "Cluster uuid '$old_uuid' already present, cannot use the new $cmd method\n";
|
|
}
|
|
}
|
|
lprint "Falling back to the old ssh/rsync based $cmd method (peer='$peer' peer_ip='$peer_ip')\n";
|
|
ldie "OLD method: MARS module is loaded, please unload first before using ssh\n" if is_module_loaded();
|
|
my $ip = _get_ip($host);
|
|
rsync_cmd($peer, "--max-size=1 --update $peer:$mars/ $mars/");
|
|
# check uniqness of IPs
|
|
my %ips = ();
|
|
foreach my $other_ip_path (lamport_glob("$mars/ips/*")) {
|
|
my $other_ip = get_link($other_ip_path, 1);
|
|
my $other_host = $other_ip_path;
|
|
$other_host =~ s:.*/ip-::;
|
|
lwarn "Host '$other_host' IP '$other_ip' is AMBIGUOUS\n" if defined($ips{$other_host});
|
|
$ips{$other_host} = $other_ip;
|
|
lwarn "New IP '$ip' already exists for host '$host' at '$other_ip_path'\n" if ($ip eq $other_ip && $other_host eq $host);
|
|
}
|
|
_create_cluster(@_);
|
|
finish_links();
|
|
rsync_cmd($peer, "--update $mars/ips/ $peer:$mars/ips/");
|
|
}
|
|
|
|
sub _get_probe {
|
|
my ($probe_dir, $peer, $peer_ip) = @_;
|
|
$peer_ip = _get_ip($peer) unless $peer_ip;
|
|
mkdir($probe_dir);
|
|
my $new_mars = "$probe_dir/$mars";
|
|
_push_fetch($peer, $peer_ip, $probe_dir);
|
|
return $new_mars;
|
|
}
|
|
|
|
# allow accumulation data from multiple peers
|
|
sub _wait_probe {
|
|
my ($probe_dir, $peer, $peer_ip) = @_;
|
|
my $got_path = "$probe_dir/got-$peer";
|
|
lprint "Wait for probe data from '$peer' in '$got_path'\n";
|
|
while (!link_exists($got_path)) {
|
|
lprint "Wait for '$got_path' to appear\n";
|
|
sleep_timeout(5);
|
|
last if link_exists($got_path);
|
|
_push_fetch($peer, $peer_ip, $probe_dir);
|
|
}
|
|
lprint "Got probe data from '$peer' in '$got_path'\n";
|
|
}
|
|
|
|
sub merge_cluster {
|
|
my ($cmd, $peer, $peer_ip) = @_;
|
|
my $uuid = readlink("$mars/uuid");
|
|
my @resources = lamport_glob("$mars/resource-*");
|
|
my @ip_links = lamport_glob("$mars/ips/*");
|
|
if ($cmd =~ m/-list/) {
|
|
print "UUID: $uuid\n";
|
|
print "IPs:\n";
|
|
foreach my $ip (@ip_links) {
|
|
print "$ip\n";
|
|
}
|
|
print "RESOURCEs:\n";
|
|
foreach my $i (@resources) {
|
|
print "$i\n";
|
|
}
|
|
return;
|
|
}
|
|
ldie "No peer argument given" unless $peer;
|
|
ldie "Cannot merge myself (peer='$peer', host='$host')\n" if $peer eq $host;
|
|
ldie "Directory $mars is missing\n" unless -d $mars;
|
|
ldie "A cluster UUD '$mars/uuid' does not exist. Please use 'join-cluster instead.\n" unless link_exists("$mars/uuid");
|
|
# start_deprecated_CODE
|
|
if (!ssh_cmd($peer, $ssh_probe, 1)) {
|
|
lprint "Falling back to OLD ssh-based $cmd\n";
|
|
merge_cluster_old(@_);
|
|
return;
|
|
}
|
|
lprint "Using NEW $cmd without ssh\n";
|
|
# end_deprecated_code
|
|
if ($peer_ip) {
|
|
lprint "Given on command line: peer '$peer' has IP '$peer_ip'\n";
|
|
} else {
|
|
$peer_ip = _get_ip($peer);
|
|
ldie "Cannot determine IP for peer '$peer'. Please give the IP on the command lime.\n" unless $peer_ip;
|
|
lprint "Determined: peer '$peer' appears to have IP '$peer_ip'\n";
|
|
}
|
|
my $peer_ip_path = "$mars/ips/ip-$peer";
|
|
my $probe_ip = get_link($peer_ip_path, 1);
|
|
unless ($probe_ip) {
|
|
finish_links();
|
|
lprint "Need to set preliminary peer IP\n";
|
|
set_link($peer_ip, $peer_ip_path);
|
|
# Make PRELIMINARY links, timestamp is 1 second after the Big Bang (1970)
|
|
finish_links(1);
|
|
_trigger(3);
|
|
}
|
|
my $probe_dir = "$mars/probe-$real_host";
|
|
system("rm -rf $probe_dir");
|
|
my $new_mars = _get_probe($probe_dir, $peer, $peer_ip);
|
|
_wait_probe($probe_dir, $peer, $peer_ip);
|
|
my %ips;
|
|
my %old_peers;
|
|
my %merge_peers;
|
|
foreach my $old_peer (get_total_peers()) {
|
|
$ips{$old_peer} = _get_ip($old_peer);
|
|
$old_peers{$old_peer} = 1;
|
|
$merge_peers{$old_peer} = 1;
|
|
lprint " old peer '$old_peer' ip='$ips{$old_peer}'\n";
|
|
}
|
|
foreach my $ip_path (lamport_glob("$new_mars/ips/ip-*")) {
|
|
my $ip = get_link($ip_path, 1);
|
|
next unless $ip;
|
|
my $new_peer = $ip_path;
|
|
$new_peer =~ s:^.+/ip-::;
|
|
next if $merge_peers{$new_peer};
|
|
$ips{$new_peer} = _get_ip($new_peer);
|
|
$merge_peers{$new_peer} = 1;
|
|
lprint " new peer '$new_peer' ip='$ips{$new_peer}'\n";
|
|
}
|
|
my %new_res;
|
|
foreach my $ip (lamport_glob("$new_mars/resource-*")) {
|
|
my $new_res = $ip;
|
|
$new_res =~ s:^.+/resource-::;
|
|
$new_res{$new_res} = 1;
|
|
}
|
|
my $new_uuid = readlink("$new_mars/uuid");
|
|
ldie "Other cluster peer '$peer' has no UUID\n" unless $new_uuid;
|
|
my %old_res;
|
|
foreach my $str (get_total_resources()) {
|
|
$old_res{$str} = 1;
|
|
}
|
|
if ($new_uuid eq $uuid) {
|
|
lprint "Other cluster peer '$peer' has the same UUID.\n";
|
|
lprint "No resource name checking necessary.\n";
|
|
lprint "Operation '$cmd' will work logically idempotent.\n";
|
|
} else {
|
|
# Check that both sets of resources are logically disjoint
|
|
lprint "Other cluster peer '$peer' has a different UUID, checking for resource name conflicts.\n";
|
|
my %inter_res = key_intersect(\%old_res, \%new_res);
|
|
my $conflicts = 0;
|
|
foreach my $res (keys(%inter_res)) {
|
|
lwarn "common resource '$res'\n";
|
|
$conflicts++;
|
|
}
|
|
ldie "cannot merge cluster '$peer' due to $conflicts conflicts\n" if $conflicts;
|
|
}
|
|
lprint "MERGE '$peer'\n";
|
|
foreach my $new_peer1 (keys(%merge_peers)) {
|
|
lprint "fetch '$new_peer1'\n";
|
|
_push_fetch($new_peer1, $ips{$new_peer1}, "/");
|
|
foreach my $new_peer2 (keys(%merge_peers)) {
|
|
lprint " push '$new_peer2'\n";
|
|
my $ip_path = "$mars/ips/ip-$new_peer2";
|
|
_push_link_foreign($new_peer1, $ips{$new_peer1}, $ips{$new_peer2}, $ip_path);
|
|
}
|
|
}
|
|
system("rm -rf $probe_dir");
|
|
}
|
|
|
|
# deprecated, to disappear
|
|
sub merge_cluster_old {
|
|
my ($cmd, $peer) = @_;
|
|
ldie "ssh is disabled\n" unless $ssh_port;
|
|
my $uuid = readlink("$mars/uuid");
|
|
my @resources = lamport_glob("$mars/resource-*");
|
|
my @ip_links = lamport_glob("$mars/ips/*");
|
|
# check connections
|
|
my $check_cmd = "uname -a";
|
|
system("$check_cmd") == 0 or ldie "oops, 'uname is not installed'\n";
|
|
system("rsync --help > /dev/null") == 0 or ldie "Command 'rsync' is not installed\n";
|
|
ssh_cmd($peer, $ssh_probe);
|
|
my @old_peers;
|
|
foreach my $ip (@ip_links) {
|
|
my $old_peer = $ip;
|
|
$old_peer =~ s:^.+/ip-::;
|
|
next if $old_peer eq $host;
|
|
next if $old_peer eq $real_host;
|
|
ssh_cmd($old_peer, $ssh_probe);
|
|
push @old_peers, $old_peer;
|
|
}
|
|
# check whether merge-cluster is possible
|
|
my %total_res;
|
|
foreach my $res (@resources) {
|
|
$total_res{$res}++;
|
|
}
|
|
my $ssh_cmd = make_ssh_cmd($peer) . " marsadm merge-cluster-list";
|
|
my $answer = `$ssh_cmd`;
|
|
$answer =~ m/^UUID: (.*)$/m or ldie "cannot determine remote UUID from '$answer'\n";
|
|
my $other_uuid = $1;
|
|
ldie "Other cluster peer '$peer' has no UUID\n" unless $other_uuid;
|
|
if ($other_uuid eq $uuid) {
|
|
lprint "Other cluster peer '$peer' has the same UUID.\n";
|
|
lprint "No resource name checking necessary.\n";
|
|
lprint "Operation '$cmd' will therfore work logically idempotent.\n";
|
|
} else {
|
|
if (link_exists("$mars/tree-$peer")) {
|
|
lwarn "A valid tree signature '$mars/tree-$peer' already exists, thus it appears to be already merged!\n";
|
|
ldie "Aborting for saftey. Override via --force only if you know what you are doing!\n" unless $force;
|
|
}
|
|
# Check that both sets of resources are disjoint
|
|
lprint "Other cluster peer '$peer' has a different UUID, checking for resource name conflicts.\n";
|
|
my @other_resources;
|
|
my @conflicts;
|
|
my $copy = $answer;
|
|
$copy =~ s/\A.*?RESOURCEs:\n//ms;
|
|
while ($copy) {
|
|
$copy =~ s/\A(.*)\n$//m;
|
|
my $other_res = $1;
|
|
last unless $other_res;
|
|
push @other_resources, $other_res;
|
|
if ($total_res{$other_res}++) {
|
|
push @conflicts, $other_res;
|
|
}
|
|
}
|
|
if (@conflicts) {
|
|
lprint "CONFLICTS:\n";
|
|
foreach my $res (@conflicts) {
|
|
lprint "\t$res\n";
|
|
}
|
|
ldie "Cannot $cmd: some resource directories exist at both clusters with same name.\nThis cannot be overriden.\nPlease resolve the conflict by hand.\n";
|
|
}
|
|
lprint "List of total resources:\n";
|
|
foreach my $res (keys(%total_res)) {
|
|
lprint "\t$res\n";
|
|
}
|
|
# INTERNAL, for debugging and error analysis: backup the old uuid symlink
|
|
my $backup = "$backup_dir/uuid-backups";
|
|
system("mkdir -p $backup; cp -a $mars/uuid $backup/") unless link_exists("$backup/uuid");
|
|
}
|
|
# Start the "hot phase"
|
|
my $rsync_cmd = "--max-size=1";
|
|
rsync_cmd($peer, "$rsync_cmd $peer:$mars/uuid $mars/uuid");
|
|
foreach my $old_peer (@old_peers) {
|
|
rsync_cmd($old_peer, "$rsync_cmd $mars/uuid $old_peer:$mars/uuid");
|
|
}
|
|
$rsync_cmd .= " --update --ignore-existing";
|
|
rsync_cmd($peer, "$rsync_cmd $peer:$mars/ $mars/");
|
|
rsync_cmd($peer, "$rsync_cmd $mars/ $peer:$mars/");
|
|
}
|
|
|
|
sub leave_cluster {
|
|
my ($cmd) = @_;
|
|
ldie "mars kernel module is not loaded. This is needed for communication with some other hosts!\n" if !is_module_loaded();
|
|
my $check = "/mars/resource-*/data-$host";
|
|
ldie "I am member of some resources\n" if lamport_glob($check) && !$force;
|
|
wait_cluster($cmd) unless $force;
|
|
foreach my $path (lamport_glob("$mars/actual-*/*-$host")) {
|
|
_create_delete($path);
|
|
}
|
|
_create_delete("$mars/ips/ip-$host");
|
|
finish_links();
|
|
wait_cluster($cmd) unless $force;
|
|
foreach my $path (lamport_glob("$mars/actual-*/*-$host")) {
|
|
_create_delete($path);
|
|
}
|
|
_create_delete("$mars/todo-global/deleted-$host");
|
|
finish_links();
|
|
wait_cluster($cmd) unless $force;
|
|
while (-f "$mars/ips/ip-$host" && $timeout_val >= 0) {
|
|
sleep_timeout(3, 1);
|
|
}
|
|
system("rmmod mars") if $host eq $real_host;
|
|
foreach my $path (lamport_glob("$mars/{,resource-*/}{todo,actual}-*/*-$host")) {
|
|
unlink($path);
|
|
}
|
|
foreach my $path (lamport_glob("$mars/{,resource-*/}{todo,actual}-$host")) {
|
|
rmdir($path); # at least try it
|
|
}
|
|
}
|
|
|
|
sub create_res {
|
|
my ($cmd, $res, $dev, $appear, $size_arg) = @_;
|
|
my $create = ($cmd eq "create-resource");
|
|
|
|
ldie "undefined device or size argument\n" unless $dev;
|
|
$appear = $res if !$appear;
|
|
check_id($appear) if $appear;
|
|
|
|
my $resdir = "$mars/resource-$res";
|
|
if ($create) {
|
|
if (-d $resdir) {
|
|
lwarn "resource directory '$res' already exists\n";
|
|
my @host_list = lamport_glob("$resdir/replay-*");
|
|
if (@host_list) {
|
|
my $h_list = join(',', map({ $_ =~ s:.*/replay-::; $_; } (@host_list)));
|
|
lwarn "DANGER: hosts '$h_list' are already member of resource '$res'.\n";
|
|
ldie "REFUSING to trash your resource!\n" unless $force;
|
|
}
|
|
}
|
|
lprint "creating new resource '$res'\n";
|
|
} else {
|
|
if (link_exists("$resdir/data-$host")) {
|
|
lwarn "resource '$res' has been already joined -- this is dangerous!\n";
|
|
ldie "refusing dangerous operation\n" unless $force;
|
|
} else {
|
|
lprint "joining to existing resource '$res'\n";
|
|
}
|
|
}
|
|
|
|
my $size = 0;
|
|
if (-b $dev) {
|
|
ldie "block device '$dev' must be an absolute path starting with '/'\n" unless $dev =~ m/^\//;
|
|
use Fcntl 'SEEK_END', 'O_RDONLY', 'O_RDWR', 'O_EXCL';
|
|
my $flags = O_RDWR | O_EXCL;
|
|
if ($force) {
|
|
$flags = O_RDONLY;
|
|
}
|
|
sysopen(TEST, $dev, $flags) or ldie "cannot open device '$dev' for exclusive rw access\n";
|
|
$size = sysseek(TEST, 0, SEEK_END);
|
|
close(TEST);
|
|
lprint "block device '$dev': determined size = $size bytes\n";
|
|
if ($size_arg) {
|
|
my $new_size = get_size($size_arg);
|
|
ldie "size argument '$size_arg' is smaller than device size '$size'\n" unless $new_size <= $size;
|
|
lprint "reducing size from $size to $new_size\n";
|
|
$size = $new_size;
|
|
}
|
|
} elsif (-f $dev) {
|
|
lprint "Using regular (sparse) file '$dev' as an image file\n";
|
|
$size = get_size($dev);
|
|
if ($size > 0) {
|
|
$dev = "";
|
|
} else {
|
|
ldie "bad parameter '$dev': must be either a block device name, or size followed by k or m or g or t\n";
|
|
}
|
|
} else {
|
|
ldie "Device argument '$dev' does not exist or is no usable block device.\n"
|
|
}
|
|
|
|
ldie "implausible size $size" unless $size > 4096 * 16; # smaller floppies should not exist ;)
|
|
|
|
# check for uniqeness of $appear
|
|
if ($appear) {
|
|
foreach my $old_dev (lamport_glob("$mars/resource-*/device-$host")) {
|
|
$old_dev =~ m:/resource-([^/]+)/:;
|
|
my $old_res = $1;
|
|
next if (!$old_res || $old_res eq $res || $old_res eq "(none)");
|
|
my $old_name = get_link($old_dev);
|
|
if ($old_name eq $appear) {
|
|
if (link_exists("$mars/resource-$old_res/data-$host")) {
|
|
ldie "Device name '/dev/mars/$old_name' already exists at resource '$old_res'\n";
|
|
} else {
|
|
lwarn "Device name '/dev/mars/$old_name' already exists in another resource '$old_res'.\n";
|
|
lwarn "This does no harm, but may be confusing.\n";
|
|
lwarn "Please name your devices equal to the resource names by convention.\n";
|
|
}
|
|
}
|
|
}
|
|
# warn if devices are named differently throughout the cluster
|
|
foreach my $old_dev (lamport_glob("$resdir/device-*")) {
|
|
my $old_name = get_link($old_dev);
|
|
next unless $old_name;
|
|
next if $old_name eq "(none)";
|
|
if ($old_name ne $appear) {
|
|
$old_dev =~ m:/device-(.+)$:;
|
|
my $old_host = $1;
|
|
next unless $old_host;
|
|
lwarn "your name '/dev/mars/$appear' differs from '/dev/mars/$old_name' on host '$old_host'.\n";
|
|
lwarn "this does no harm, but may be confusing.\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
err_purge_res($cmd, $res);
|
|
|
|
if (!$create) {
|
|
_activate_resource($cmd, $res, $host);
|
|
}
|
|
|
|
my $max_primary_retry = 5;
|
|
primary_retry:
|
|
|
|
my $primary;
|
|
my $replay_nr = -1;
|
|
my $use_rsync = 0;
|
|
if ($create) {
|
|
mkdir($resdir);
|
|
ldie "could not create resource '$res'\n" unless -d $resdir;
|
|
set_link($size, "$resdir/size");
|
|
} else { # join
|
|
my $ip = _get_ip($host);
|
|
# For safety, try to get the very newest infos.
|
|
# Reason: newer kernel modules will fetch non-member resource infos less frequently.
|
|
# Therefore we shift some responsibility for non-member -> member transitions to userspace.
|
|
$primary = _get_designated_primary($res, 1);
|
|
for (my $retry = 0; $retry < 3; $retry++) {
|
|
last if (defined($primary) && $primary);
|
|
lprint "Trying to get primary info\n";
|
|
my $condition = "return _get_designated_primary(\"$res\", 1);";
|
|
$primary = update_cluster($cmd, $res, "*", $condition);
|
|
}
|
|
my $max_retry = 5;
|
|
for (;;) {
|
|
$primary = _get_designated_primary($res, 1);
|
|
last if $max_retry-- < 0;
|
|
if ($primary && $primary ne $host && $primary ne "(none)") {
|
|
# first check whether symlink information appears to be recent enough
|
|
last if recent_cluster($cmd, $res, $primary);
|
|
# for safety, fetch newest infos from last known primary
|
|
if ($max_retry >= 3) {
|
|
lprint "Trying update-cluster from '$primary'\n";
|
|
update_cluster($cmd, $res, $primary);
|
|
next;
|
|
} elsif ($max_retry >= 1) {
|
|
lprint "Trying full update-cluster\n";
|
|
update_cluster($cmd, $res);
|
|
next;
|
|
}
|
|
$use_rsync++;
|
|
rsync_cmd($primary, "--max-size=1 --update $primary:$mars/resource-$res/ $mars/resource-$res/", $res, 1);
|
|
next;
|
|
}
|
|
lprint "Using update-cluster for primary determination\n";
|
|
update_cluster($cmd, $res);
|
|
next if $max_retry > 2;
|
|
my @peers = map { m:^$mars/ips/ip-(.+):; $1 } lamport_glob("$mars/ips/ip-*");
|
|
if (!@peers || $max_retry < 0) {
|
|
lwarn "cannot get a relevant primary\n";
|
|
last;
|
|
}
|
|
# We are desperate. Try to fetch the directory from anywhere.
|
|
my $peer = shift @peers;
|
|
next if $peer eq $host;
|
|
next if $peer eq $real_host;
|
|
$use_rsync++;
|
|
rsync_cmd($peer, "--max-size=1 --update $peer:$mars/resource-$res $mars/", 1);
|
|
} # retry loop
|
|
ldie "resource '$res' does not exist\n" unless -d $resdir;
|
|
ldie "resource '$res' has no designated primary\n" unless $primary && $primary ne "(none)";
|
|
finish_links();
|
|
wait_cluster($cmd, $res);
|
|
my $res_size = get_link("$mars/resource-$res/size", 1);
|
|
if ($size < $res_size) {
|
|
lwarn "size of new device is only $size, but should be $res_size\n";
|
|
ldie "refusing to join due to bad size\n" unless $force;
|
|
} elsif ($size > $res_size) {
|
|
lprint "Your physical device has size $size, which is larger than the logical resource size $res_size.\n";
|
|
lprint "This does no harm, but you are wasting some space.\n";
|
|
}
|
|
$primary = _get_designated_primary($res);
|
|
ldie "Sorry, joining is only possible if a designated primary exists.\n" if $primary eq "(none)";
|
|
ldie "implausible state: I ($host) am already designated primary of resource '$res' which I just wanted to join\n" if $primary eq $host;
|
|
ldie "my ip '$ip' is not registered -- please run 'join-cluster' first\n" unless link_exists("$mars/ips/ip-$host");
|
|
check_sync_startable(@_);
|
|
my $replay = get_link("$resdir/replay-$primary");
|
|
if ($replay =~ m/^log-([0-9]+)-/) {
|
|
$replay_nr = $1;
|
|
$replay_nr =~ s/^0+//;
|
|
} else {
|
|
lwarn "'$res' cannot determine current primary '$primary' logfile number from '$replay'.\n";
|
|
if ($max_primary_retry-- >= 0) {
|
|
wait_cluster($cmd, $res);
|
|
goto primary_retry;
|
|
}
|
|
ldie "Primary '$primary' info is missing for '$res'.\n";
|
|
}
|
|
}
|
|
|
|
# purge before complaining
|
|
log_purge_res($cmd, $res);
|
|
err_purge_res($cmd, $res);
|
|
finish_links();
|
|
_wait_delete();
|
|
|
|
# check for remains from former incidents
|
|
my $max_retry = 3;
|
|
while ($max_retry-- > 0) {
|
|
my @remains = ();
|
|
foreach my $remain (lamport_glob("$resdir/{log,version}-*-$host")) {
|
|
if (!$create) {
|
|
$remain =~ m/-([0-9]+)-$host/;
|
|
my $nr = $1;
|
|
next if $nr < $replay_nr;
|
|
}
|
|
lwarn "OLD REMAIN '$remain'\n";
|
|
push @remains, $remain;
|
|
}
|
|
last if !@remains;
|
|
lwarn "Resource directory has some old remains.\n";
|
|
if ($force && !$create) {
|
|
foreach my $remain (@remains) {
|
|
lwarn "REMOVING remain '$remain'";
|
|
_create_delete($remain);
|
|
}
|
|
finish_links();
|
|
_wait_delete();
|
|
next;
|
|
}
|
|
lwarn "First cleanup.\n";
|
|
my $extra_op = $create ? "delete-resource" : "log-purge-all";
|
|
lwarn "If you really want to $cmd, run 'marsadm $extra_op' first.\n";
|
|
ldie "REFUSING $cmd\n" unless $force;
|
|
}
|
|
|
|
my $file = "$resdir/data-$host";
|
|
if (!$dev) {
|
|
lwarn "link '$file' already exists - reusing\n" if link_exists($file);
|
|
lprint "setup sparse file '$file' with size $size\n";
|
|
open(OUT, ">>", $file) or ldie "could not open '$file'\n";
|
|
truncate(OUT, $size) or ldie "truncate to size $size failed\n";
|
|
close OUT;
|
|
} else {
|
|
lprint "using existing device '$dev'\n";
|
|
set_link($dev, $file);
|
|
_push_link($primary, $dev, $file) if $primary;
|
|
}
|
|
|
|
my $max_join_retry = 5;
|
|
join_retry:
|
|
|
|
if ($appear) {
|
|
lprint "resource '$res' will appear as local device '/dev/mars/$appear'\n";
|
|
my $device_path = "$resdir/device-$host";
|
|
set_link($appear, $device_path);
|
|
_push_link($primary, $appear, $device_path) if $primary;
|
|
}
|
|
|
|
mkdir("$mars/userspace") unless -d "$mars/userspace";
|
|
mkdir("$resdir/userspace") unless -d "$resdir/userspace";
|
|
mkdir("$resdir/defaults") unless -d "$resdir/defaults";
|
|
mkdir("$resdir/defaults-$host");
|
|
mkdir("$resdir/local-$host");
|
|
mkdir("$resdir/actual-$host");
|
|
set_link("0", "$resdir/actual-$host/if-on");
|
|
my $todo = "$resdir/todo-$host";
|
|
mkdir($todo);
|
|
set_link("0", "$todo/attach");
|
|
set_link("1", "$todo/connect");
|
|
set_link("1", "$todo/sync");
|
|
set_link("1", "$todo/allow-replay");
|
|
unlink("$resdir/syncstatus-$host");
|
|
|
|
my $replay_ok = 1;
|
|
if ($create) {
|
|
set_link($host, "$resdir/primary");
|
|
set_link($host, "$resdir/userspace/last-primary");
|
|
set_link($size, "$resdir/syncstatus-$host");
|
|
my $startnr = get_link("$resdir/maxnr", 2);
|
|
if (defined($startnr) && $startnr ne "" && $startnr > 0) {
|
|
$startnr += 100000;
|
|
} else {
|
|
$startnr = 1;
|
|
}
|
|
my $fmt_old = sprintf("%09d", $startnr - 1);
|
|
my $fmt = sprintf("%09d", $startnr);
|
|
set_link("log-$fmt-$host,0,0", "$resdir/replay-$host");
|
|
my $logfile = "$resdir/log-$fmt-$host";
|
|
safe_creat($logfile) unless $dry_run;
|
|
my $old_fake = "00000000000000000000000000000000,log-$fmt_old-$host,0";
|
|
set_link("$old_fake:", "$resdir/version-$fmt_old-$host");
|
|
set_link("00000000000000000000000000000000,log-$fmt-$host,0:$old_fake", "$resdir/version-$fmt-$host");
|
|
set_link("$startnr", "$resdir/skip-check-$host") if $startnr > 1;
|
|
set_link("$startnr", "$resdir/maxnr");
|
|
finish_links();
|
|
lprint "successfully created resource '$res'\n";
|
|
} else { # join
|
|
$replay_ok = _set_replaylink($cmd, $res, $replay_nr, $primary, "");
|
|
set_link("0", "$resdir/syncstatus-$host");
|
|
finish_links();
|
|
_trigger(3);
|
|
if ($use_rsync && $ssh_port) {
|
|
rsync_cmd($primary, "--max-size=1 --update $file $primary:$mars/resource-$res/", 1);
|
|
}
|
|
wait_cluster($cmd, $res, $primary);
|
|
}
|
|
set_link("1", "$todo/attach");
|
|
finish_links();
|
|
_trigger(3);
|
|
_reset_resources();
|
|
if (!$create) {
|
|
# Check for races with logrotate
|
|
for (my $check_redo = 3; $check_redo > 0; $check_redo--) {
|
|
wait_cluster($cmd, $res, $primary, 0);
|
|
# Split brain or orphanness can happen due to _inherent_ races
|
|
# with fresh appearance of the new replaylink.
|
|
lprint "Checking '$res' for split brain\n";
|
|
%detected_splits = ();
|
|
my $split_ok = detect_splitbrain($res, 1);
|
|
lprint "redo=$check_redo resource '$res' split_ok=$split_ok replay_ok=$replay_ok\n";
|
|
if (!$split_ok || !$replay_ok) {
|
|
lwarn "Race '$res' with log-rotate detected.\n";
|
|
if ($max_join_retry-- >= 0) {
|
|
wait_cluster($cmd, $res, "*", 0);
|
|
invalidate_res_phase1($cmd, $res);
|
|
wait_cluster($cmd, $res, $host, 0);
|
|
invalidate_res_phase2($cmd, $res);
|
|
lwarn "Restarting '$res' due to unstable situation at primary '$primary'\n";
|
|
goto join_retry;
|
|
}
|
|
}
|
|
}
|
|
lprint "Successfully joined resource '$res' to primary '$primary'\n";
|
|
}
|
|
_systemd_trigger($cmd, 1);
|
|
}
|
|
|
|
sub _fetch_transitive_peers {
|
|
lprint "Computing the transitive closure of peers:\n";
|
|
my @peers = get_total_peers();
|
|
my @old_peers;
|
|
my $probe_dir = "$mars/probe-$real_host";
|
|
system("rm -rf $probe_dir");
|
|
while (scalar(@peers) > scalar(@old_peers)) {
|
|
lprint "Current #peers: " . scalar(@peers) . "\n";
|
|
@old_peers = @peers;
|
|
foreach my $peer (@peers) {
|
|
next if $peer eq $real_host;
|
|
_get_probe($probe_dir, $peer);
|
|
}
|
|
foreach my $peer (@peers) {
|
|
next if $peer eq $real_host;
|
|
_wait_probe($probe_dir, $peer);
|
|
}
|
|
_reset_resources();
|
|
@peers = get_total_peers();
|
|
}
|
|
lprint "Final #peers: " . scalar(@peers) . "\n";
|
|
}
|
|
|
|
sub split_cluster {
|
|
my ($cmd) = @_;
|
|
# start_deprecated_CODE
|
|
foreach my $peer (get_total_peers()) {
|
|
next unless $ssh_port;
|
|
next if $peer eq $real_host;
|
|
if (!ssh_cmd($peer, $ssh_probe, 1)) {
|
|
lprint "Falling back to OLD ssh-based $cmd\n";
|
|
split_cluster_old(@_);
|
|
return;
|
|
}
|
|
}
|
|
lprint "Using NEW $cmd without ssh\n";
|
|
# end_deprecated_code
|
|
my $probe_dir = _fetch_transitive_peers();
|
|
system("rm -rf $probe_dir");
|
|
lprint "IPS:\n";
|
|
my @peers = get_total_peers();
|
|
foreach my $peer (@peers) {
|
|
my $ip = _get_ip($peer);
|
|
lprint " Peer='$peer' IP='$ip'\n";
|
|
}
|
|
lprint "PEER GROUPS:\n";
|
|
my $nr = 0;
|
|
my %touched;
|
|
my %peer_groups;
|
|
foreach my $peer1 (@peers) {
|
|
next if ($touched{$peer1});
|
|
my %members;
|
|
$members{$peer1} = 1;
|
|
foreach my $res (get_total_resources($peer1)) {
|
|
lprint " peer '$peer1' res '$res'\n" if $verbose > 2;
|
|
my @peers2 = get_total_peers($res);
|
|
foreach my $peer2 (@peers2) {
|
|
next if $members{$peer2};
|
|
lprint " peer '$res' res '$res' '$peer2'\n" if $verbose > 2;
|
|
$touched{$peer2} = 1;
|
|
$members{$peer2} = 1;
|
|
}
|
|
}
|
|
$peer_groups{$peer1} = \%members;
|
|
$nr++;
|
|
lprint " #$nr: $peer1 => " . join(",", sort alphanum_cmp keys(%members)) . "\n";
|
|
}
|
|
if ($nr <= 1) {
|
|
lprint "Nothing to do: there are only $nr group(s)\n";
|
|
system("rm -rf $probe_dir");
|
|
return;
|
|
}
|
|
|
|
# PHASE 0: create emergency backup
|
|
unless ($dry_run) {
|
|
my $ips_backup = "$backup_dir/ips-backup";
|
|
lprint "Phase 0: create backup at $ips_backup\n";
|
|
system("mkdir -p $ips_backup") and ldie "Cannot create '$ips_backup'\n";
|
|
system("cp -a $mars/ips/* $ips_backup/");
|
|
}
|
|
|
|
# PHASE 1: delete all
|
|
lprint "Phase 1: delete all IPs\n";
|
|
foreach my $peer (@peers) {
|
|
my $ip_path = "$mars/ips/ip-$peer";
|
|
_create_delete($ip_path);
|
|
}
|
|
finish_links();
|
|
_wait_delete();
|
|
_reset_resources();
|
|
foreach my $peer (@peers) {
|
|
my $ip_path = "$mars/ips/ip-$peer";
|
|
foreach my $targ (@peers) {
|
|
my $targ_ip = _get_ip($targ);
|
|
_push_link_foreign($targ, $targ_ip, ".deleted", $ip_path);
|
|
}
|
|
}
|
|
finish_links();
|
|
_reset_resources();
|
|
lprint "Wait for effect\n";
|
|
sleep(10);
|
|
|
|
# PHASE 2: regenerate all groups
|
|
lprint "Phase 2: re-create all groups\n";
|
|
foreach my $group (sort alphanum_cmp keys(%peer_groups)) {
|
|
my $group_ip = _get_ip($group);
|
|
lprint " GROUP '$group' ip '$group_ip'\n" if $verbose;
|
|
my $list = $peer_groups{$group};
|
|
my %members = %$list;
|
|
foreach my $peer (sort alphanum_cmp keys(%members)) {
|
|
my $ip_path = "$mars/ips/ip-$peer";
|
|
my $ip = _get_ip($peer);
|
|
lprint " PEER '$peer' '$ip'\n" if $verbose;
|
|
foreach my $targ (sort alphanum_cmp keys(%members)) {
|
|
my $targ_ip = _get_ip($targ);
|
|
_push_link_foreign($targ, $targ_ip, $ip, $ip_path);
|
|
}
|
|
}
|
|
}
|
|
finish_links();
|
|
system("rm -rf $probe_dir");
|
|
_reset_resources();
|
|
}
|
|
|
|
# deprecated, to disappear
|
|
sub split_cluster_old {
|
|
my ($cmd) = @_;
|
|
ldie "ssh is disabled\n" unless $ssh_port;
|
|
# checks
|
|
lprint "Checking...\n";
|
|
my $ips_backup = "$backup_dir/ips-backup";
|
|
system("mkdir -p $ips_backup") and ldie "OOps";
|
|
system("cp -a $mars/ips/* $ips_backup/");
|
|
foreach my $peer (lamport_glob("$ips_backup/ip-*")) {
|
|
$peer =~ s:^$ips_backup/ip-::;
|
|
ssh_cmd($peer, $ssh_probe);
|
|
}
|
|
lprint "Creating IP backups...\n";
|
|
# create ips backup
|
|
foreach my $peer (lamport_glob("$ips_backup/ip-*")) {
|
|
$peer =~ s:^$ips_backup/ip-::;
|
|
next if $peer eq $host;
|
|
rsync_cmd($peer, "--max-size=1 --update $peer:$mars/ips/ $ips_backup/", 1);
|
|
}
|
|
system("cp -a $mars/ips/* $ips_backup/");
|
|
my @peers = map { $_ =~ s:^$ips_backup/ip-::; $_ } (lamport_glob("$ips_backup/ip-*"));
|
|
ldie "Empty peer list\n" unless @peers;
|
|
foreach my $peer (@peers) {
|
|
next if $peer eq $host;
|
|
rsync_cmd($peer, "--max-size=1 --update $ips_backup $peer:$backup_dir/");
|
|
}
|
|
|
|
# Check symmetry
|
|
lprint "Checking symmetry of resource memberships...\n";
|
|
my $data_cmd = "ls $mars/resource-*/data-*";
|
|
my $standard = qx:$data_cmd:;
|
|
ldie "Cannot get local resource participants.\n" unless $standard;
|
|
foreach my $peer (@peers) {
|
|
next if $peer eq $host;
|
|
my $ssh_cmd = make_ssh_cmd($peer) . " '$data_cmd'";
|
|
my $compare = qx:$ssh_cmd:;
|
|
ldie "Asymmetric $mars/resource-*/data-* with peer $peer\n" unless $compare eq $standard;
|
|
}
|
|
|
|
# delete all hosts everywhere
|
|
lprint "Deleting all peers IPs everywhere...\n";
|
|
foreach my $peer (@peers) {
|
|
print "--- peer $peer\n";
|
|
my $cmd = "rm -rf $mars/ips";
|
|
ssh_cmd($peer, $cmd, 1);
|
|
}
|
|
|
|
# wait until all peer threads are gone
|
|
lprint "Waiting...\n";
|
|
sleep(10);
|
|
my $cond_cmd = "(( \$(ps ax | grep mars_peer | grep -v grep | wc -l) == 0 ))";
|
|
for (my $loop = 0; $loop < $timeout; $loop++) {
|
|
my $running = 0;
|
|
foreach my $peer (@peers) {
|
|
my $status = ssh_cmd($peer, $cond_cmd, 1);
|
|
$running++ if $status;
|
|
}
|
|
last if !$running;
|
|
print "--- $running peers have running peer threads.\n";
|
|
sleep(1);
|
|
}
|
|
|
|
for (my $loop = 0; $loop < 3; $loop++) {
|
|
# re-create all groups everywhere
|
|
lprint "RE-CREATE peer IPs...\n";
|
|
foreach my $peer (@peers) {
|
|
print "--- peer $peer\n";
|
|
my $cmd = "rm -f $mars/todo-global/delete* $mars/actual-*/msg-*connection-*; ";
|
|
$cmd .= "shopt -s nullglob; ";
|
|
$cmd .= "for i in $mars/resource-*; do if ! [[ -e \$i/data-$peer ]] && ! [[ -e \$i/replay-$peer ]]; then rm -rf $backup_dir/\${i##*/}; mv \$i $backup_dir/; fi; done; ";
|
|
$cmd .= "mkdir -p $mars/ips; ";
|
|
my $sub_list = "{ for dir in $mars/resource-*/data-$peer; do (cd \${dir%/*} && for i in data-*; do echo \$i; done); done; echo x-$peer; }";
|
|
my $sub_cmd = "echo RESTORE IP \$j; cp -a $ips_backup/ip-\$j $mars/ips/";
|
|
$cmd .= "for j in \$($sub_list | cut -d- -f2- | sort -u); do $sub_cmd; done";
|
|
lprint "$cmd\n";
|
|
ssh_cmd($peer, $cmd, 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
sub leave_res_phase0 {
|
|
my ($cmd, $res) = @_;
|
|
check_not_primary($cmd, $res, 1);
|
|
my $errors = 0;
|
|
foreach my $tmp (lamport_glob("$mars/resource-$res/todo-$host/*")) {
|
|
next if $tmp =~ m:/delete:;
|
|
my $status = get_link($tmp, 2);
|
|
if ($status) {
|
|
lwarn "switch '$tmp' is not off\n";
|
|
$errors++;
|
|
}
|
|
}
|
|
foreach my $tmp (lamport_glob("$mars/resource-$res/actual-$host/{is-,logfile-}*")) {
|
|
my $status = get_link($tmp);
|
|
if ($status) {
|
|
lwarn "running status '$tmp' is not off\n";
|
|
$errors++;
|
|
}
|
|
}
|
|
if (!$force) {
|
|
check_status($cmd, $res, "is-attached", 0, 0, 1);
|
|
ldie "there were $errors errors.\n" if $errors;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
sub leave_res_phase1 {
|
|
my ($cmd, $res) = @_;
|
|
set_link("1", "$mars/resource-$res/work-$host");
|
|
_create_delete("$mars/resource-$res/replay-$host");
|
|
_create_delete("$mars/resource-$res/data-$host");
|
|
_create_delete("$mars/resource-$res/syncstatus-$host");
|
|
my $syncpos = "$mars/resource-$res/syncpos-$host";
|
|
_create_delete($syncpos) if link_exists($syncpos);
|
|
my $skip_check = "$mars/resource-$res/skip-check-$host";
|
|
_create_delete($skip_check) if link_exists($skip_check);
|
|
my $vstatus = "$mars/resource-$res/verifystatus-$host";
|
|
_create_delete($vstatus) if link_exists($vstatus);
|
|
_create_delete("$mars/resource-$res/device-$host");
|
|
_create_delete("$mars/resource-$res/actsize-$host");
|
|
foreach my $dir (lamport_glob("$mars/resource-$res/*-$host/")) {
|
|
foreach my $tmp (lamport_glob("${dir}*")) {
|
|
_create_delete($tmp);
|
|
}
|
|
_create_delete($dir);
|
|
}
|
|
finish_links();
|
|
return 0;
|
|
}
|
|
|
|
# wait for deletions (avoid races with following commands)
|
|
sub leave_res_phase2 {
|
|
my ($cmd, $res) = @_;
|
|
_wait_delete();
|
|
_reset_resources();
|
|
$force = 0; # this would be too dangerous
|
|
log_purge_res($cmd, $res);
|
|
err_purge_res($cmd, $res);
|
|
return 0;
|
|
}
|
|
|
|
sub leave_res_phase3 {
|
|
my ($cmd, $res) = @_;
|
|
_wait_delete();
|
|
_reset_resources();
|
|
err_purge_res($cmd, $res);
|
|
# deprecated: hindering for new deletion method
|
|
if ($compat_deletions && $host eq $real_host) {
|
|
system("rm -f $mars/resource-$res/log-*");
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
sub leave_res_phase4 {
|
|
my ($cmd, $res) = @_;
|
|
_systemd_trigger($cmd, 1);
|
|
return 0;
|
|
}
|
|
|
|
sub delete_res_phase1 {
|
|
my ($cmd, $res) = @_;
|
|
my $basedir = "$mars/resource-$res";
|
|
# preconditions
|
|
if (! -d $basedir) {
|
|
lprint "resource directory '$basedir' does no longer exist.\n";
|
|
return 0;
|
|
}
|
|
my @host_list = get_total_peers($res);
|
|
my $cnt = scalar(@host_list);
|
|
if ($cnt > 0) {
|
|
my $h_list = join(',', @host_list);
|
|
ldie "resource '$res' is not empty: first remove the hosts '$h_list' via leave-resource\n" unless $force;
|
|
lwarn "BRUTE FORCE resource destruction: '$res' has $cnt members ($h_list) THESE ARE FINALLY TRASHED right now -- you are RESPONSIBLE for any subsequent problems.\n";
|
|
}
|
|
for my $path (`find $basedir/* | sort -r`) {
|
|
chomp $path;
|
|
next if $path =~ m:/(maxnr$|\.deleted-):;
|
|
_create_delete($path);
|
|
}
|
|
set_link("1", "$mars/resource-$res/work-$host");
|
|
finish_links();
|
|
_wait_delete();
|
|
_reset_resources();
|
|
return 0;
|
|
}
|
|
|
|
sub delete_res_phase2 {
|
|
my ($cmd, $res) = @_;
|
|
_systemd_trigger($cmd, 1);
|
|
return 0;
|
|
}
|
|
|
|
sub logrotate_res {
|
|
my ($cmd, $res) = @_;
|
|
# report any upgrades / downgrades
|
|
set_link($marsadm_version, "$mars/actual-$host/marsadm-version");
|
|
if ($force) {
|
|
lwarn "operation $cmd --force is DANGEROUS\n";
|
|
} else {
|
|
return if check_primary($cmd, $res, 0, 1);
|
|
}
|
|
my $log_glob = "$mars/resource-$res/log-*-$host";
|
|
lprint "get logfiles '$log_glob'\n";
|
|
my @paths = lamport_glob($log_glob);
|
|
my $last;
|
|
if (@paths) {
|
|
@paths = sort alphanum_cmp @paths;
|
|
lprint "found " . scalar(@paths) . " logfiles.\n";
|
|
$last = pop(@paths);
|
|
} else {
|
|
# By definition, logrotate an old logfile must have been produced
|
|
# by the _same_ host. Only relevant for dangerous cron --force.
|
|
lprint "nothing to $cmd: no old logfiles for '$host' exist.\n";
|
|
return;
|
|
}
|
|
if (-z $last) {
|
|
lprint "an empty logfile '$last' already exists, nothing to do.\n" if $verbose;
|
|
return;
|
|
}
|
|
# prevent too many small logfiles when secondaries are not catching up
|
|
if (scalar(@paths) >= $thresh_logfiles) {
|
|
my @stat = stat($last);
|
|
my $size = 0;
|
|
if (@stat) {
|
|
$size = $stat[7] / (1000 * 1000 * 1000);
|
|
}
|
|
if ($size < $thresh_logsize) {
|
|
lprint "current logfile '$last' is smaller than $thresh_logsize GB, skipping logrotate.\n" if $verbose;
|
|
return;
|
|
}
|
|
}
|
|
my $nr = $last;
|
|
$nr =~ s/^.*log-([0-9]+)-.+$/$1/;
|
|
my $next = sprintf("$mars/resource-$res/log-%09d-$host", $nr + 1);
|
|
ldie "logfile '$next' already exists\n" if file_exists($next);
|
|
safe_creat($next) unless $dry_run;
|
|
my $startnr = get_link("$mars/resource-$res/maxnr", 1);
|
|
$startnr = $nr + 1 if ($nr >= $startnr);
|
|
set_link("$startnr", "$mars/resource-$res/maxnr");
|
|
}
|
|
|
|
sub _get_deletable_logfiles {
|
|
my ($cmd, $res) = @_;
|
|
my $min = -1;
|
|
my $max = -1;
|
|
my %replay_links;
|
|
my $max_restarts = 3;
|
|
# Race prevention: get the replay links first, afterwards the logfiles
|
|
restart:
|
|
my @replay_paths = lamport_glob("$mars/resource-$res/replay-*") or ldie "cannot find any replay symlinks\n";
|
|
foreach my $path (@replay_paths) {
|
|
my $target = get_link($path, 1);
|
|
if (!$target) {
|
|
ldie "cannot get link '$path'\n" if $max_restarts-- < 0;
|
|
goto restart;
|
|
}
|
|
$replay_links{$path} = $target;
|
|
}
|
|
my @log_paths = lamport_glob("$mars/resource-$res/log-*") or ldie "cannot find any logfiles\n";
|
|
foreach my $path (@log_paths) {
|
|
$path =~ m/\/log-([0-9]+)-/;
|
|
my $nr = $1;
|
|
$min = $nr if ($nr < $min || $min < 0);
|
|
$max = $nr if ($nr > $max || $max < 0);
|
|
}
|
|
foreach my $path (@replay_paths) {
|
|
my $target = $replay_links{$path};
|
|
$target =~ m/^log-([0-9]+)/;
|
|
my $nr = $1;
|
|
$max = $nr if ($nr < $max || $max < 0);
|
|
}
|
|
return ($min, $max);
|
|
}
|
|
|
|
# for old deletion method (to disappear)
|
|
my %delete_nrs;
|
|
|
|
sub _get_delete_nr {
|
|
my ($basedir, $total) = @_;
|
|
return @{$delete_nrs{$basedir}} if defined($delete_nrs{$basedir});
|
|
_scan_caches() unless %total_peers;
|
|
my $min_nr = 0;
|
|
my $max_nr = 0;
|
|
foreach my $path (lamport_glob("$basedir/deleted-*")) {
|
|
$path =~ m:/deleted-(.+):;
|
|
my $peer = $1;
|
|
next unless $peer;
|
|
next unless $total || $peer eq $real_host || $any_peers{$peer};
|
|
my $link = get_link($path, 1);
|
|
$link =~ m/0*([0-9]+)/;
|
|
$min_nr = $1 if (defined($1) && ($1 < $min_nr || !$min_nr));
|
|
$max_nr = $1 if (defined($1) && $1 > $max_nr);
|
|
}
|
|
my @deletes = lamport_glob("$basedir/delete-*");
|
|
foreach my $path (@deletes) {
|
|
$path =~ m:/delete-0*([0-9]+)-(.+):;
|
|
my $nr = $1;
|
|
my $peer = $2;
|
|
next unless $peer;
|
|
next unless $total || $peer eq $real_host || $any_peers{$peer};
|
|
$max_nr = $nr if (defined($nr) && $nr > $max_nr);
|
|
}
|
|
my $nr_links = $max_nr - $min_nr;
|
|
if ($nr_links > $max_deletions / 2) {
|
|
$verbose = 1 if $verbose <= 0;
|
|
lwarn "Too many deletion links have accumulated into directory $basedir/.\n";
|
|
lwarn "Probably your networking / your firewall rules / another setup problem is causing this, and your monitoring does not notice it.\n";
|
|
lwarn "Please fix it.\n";
|
|
lwarn "When necessary, remove $basedir/delete-* link by hand.\n";
|
|
# There might be holes in the sequence numbers of delete-* links.
|
|
# Don't account for them when it comes to aborts.
|
|
if (scalar(@deletes) > $max_deletions) {
|
|
lwarn "URGENT: please fix it.\n";
|
|
ldie "Aborting for safety reasons\n" if $cron_mode && !$force;
|
|
}
|
|
}
|
|
$delete_nrs{$basedir} = [$min_nr, $max_nr];
|
|
return @{$delete_nrs{$basedir}};
|
|
}
|
|
|
|
sub _set_delete_nr {
|
|
my ($basedir, $min_nr, $max_nr) = @_;
|
|
$delete_nrs{$basedir} = [$min_nr, $max_nr];
|
|
}
|
|
|
|
my %del_dirs;
|
|
my %del_targets;
|
|
|
|
sub _replace_delete {
|
|
my ($basedir, $target) = @_;
|
|
if (!$del_dirs{$basedir}) {
|
|
foreach my $link (lamport_glob("$basedir/delete-*")) {
|
|
my $targ = readlink($link);
|
|
next unless $targ;
|
|
$del_targets{$targ} = $link;
|
|
}
|
|
$del_dirs{$basedir} = 1;
|
|
}
|
|
my ($min_nr, $delete_nr) = _get_delete_nr($basedir, 1);
|
|
if (defined($del_targets{$target})) {
|
|
my $old = $del_targets{$target};
|
|
$old =~ m:/delete-([0-9]+)-:;
|
|
my $old_nr = $1;
|
|
return $old if $old_nr >= $min_nr;
|
|
}
|
|
my $new = sprintf("$basedir/delete-%09d-$real_host", ++$delete_nr);
|
|
_set_delete_nr($basedir, $min_nr, $delete_nr);
|
|
return $new;
|
|
}
|
|
|
|
my $nr_deletions = 0;
|
|
|
|
sub _create_delete {
|
|
my ($target) = @_;
|
|
ldie "cannot delete: '$target' is no absolute path\n" unless $target =~ m:^/:;
|
|
if (!$compat_deletions) {
|
|
lprint "delete '$target'\n" if $verbose;
|
|
set_link(".deleted", $target);
|
|
return;
|
|
}
|
|
my $global_path = "$mars/todo-global";
|
|
my $new = _replace_delete($global_path, $target);
|
|
lprint "create symlink $new -> $target\n" if $verbose;
|
|
set_link($target, $new);
|
|
$nr_deletions++;
|
|
}
|
|
|
|
sub _wait_delete {
|
|
return if $dry_run;
|
|
lwarn "Do not run this in --parallel mode\n" if $child_prefix;
|
|
finish_links();
|
|
return if !$nr_deletions;
|
|
$nr_deletions = 0;
|
|
for (;;) {
|
|
my $del_link = "$mars/todo-global/deleted-$real_host";
|
|
my $deleted = get_link($del_link, 1);
|
|
if (!$deleted) {
|
|
return if !$compat_deletions;
|
|
lwarn "Old deletions: '$del_link' does not exist\n";
|
|
sleep_timeout();
|
|
return;
|
|
}
|
|
$deleted =~ s/^0+//;
|
|
my ($min_nr, $delete_nr) = _get_delete_nr("$mars/todo-global");
|
|
last if $deleted >= $delete_nr;
|
|
lprint "waiting for deletions to apply locally....\n";
|
|
sleep_timeout();
|
|
}
|
|
}
|
|
|
|
sub delete_file_cmd {
|
|
my $cmd = shift;
|
|
my $res = shift; # ignore this
|
|
foreach my $path (@_) {
|
|
check_userspace($path);
|
|
_create_delete($path);
|
|
}
|
|
}
|
|
|
|
sub _purge_del_glob {
|
|
my ($clean_glob, $min_time) = @_;
|
|
foreach my $path (glob($clean_glob)) {
|
|
next unless -l $path;
|
|
my $val = readlink($path);
|
|
next unless $val eq ".deleted";
|
|
my $age = get_link_stamp($path);
|
|
next unless $age;
|
|
if ($age < $min_time) {
|
|
lprint " remove deleted '$path'\n" if $verbose;
|
|
unlink($path);
|
|
}
|
|
}
|
|
}
|
|
|
|
sub _get_min_time {
|
|
my ($start_time, $res) = @_;
|
|
my $min_time = $start_time;
|
|
if (!$force) {
|
|
my %status = get_alive_links($res, "time", "*");
|
|
foreach my $peer (keys(%status)) {
|
|
my $stamp = $status{$peer};
|
|
$stamp = 0 if (!defined($stamp) || $stamp eq "");
|
|
$min_time = $stamp if $stamp < $min_time;
|
|
}
|
|
# protect against dead / decommissioned peers
|
|
my $back_time = $start_time - 3600 * $keep_backups;
|
|
$min_time = $back_time if $back_time > $min_time;
|
|
}
|
|
$min_time -= $window;
|
|
return $min_time;
|
|
}
|
|
|
|
# old deletion method, hopefully to disappear somewhen in future
|
|
sub _compat_purge_deleted {
|
|
my ($del_glob, $msg_glob) = @_;
|
|
my $start_time = mars_time();
|
|
foreach my $leftlink (lamport_glob($del_glob)) {
|
|
# remove outdated .deleted-* markers
|
|
if ($leftlink =~ m:/.deleted-:) {
|
|
my $info = get_link($leftlink, 2);
|
|
if ($info =~ m/^[0-9]+,[0-9]+$/) {
|
|
my ($serial, $mode) = split(/,/, $info);
|
|
my $todo_glob = "$mars/todo-global/deleted-*";
|
|
if ($mode) {
|
|
$leftlink =~ m:/resource-([^/]+)/:;
|
|
my $res = $1;
|
|
$todo_glob = "$mars/resource-$res/todo-$real_host/deleted-*"
|
|
}
|
|
# compute the minimum border
|
|
my $border = 0;
|
|
foreach my $deleted_link (lamport_glob($todo_glob)) {
|
|
my $limit = get_link($deleted_link, 2);
|
|
next if !$limit || $limit <= 1;
|
|
$border = $limit if (!$border || $limit < $border);
|
|
}
|
|
if ($serial < $border && $serial > 1) {
|
|
unlink($leftlink);
|
|
next;
|
|
}
|
|
}
|
|
}
|
|
my $stamp = get_link_stamp($leftlink);
|
|
next unless $stamp + 3600 * 24 < $start_time;
|
|
lprint " unlink '$leftlink'\n" if $verbose;
|
|
unlink($leftlink);
|
|
}
|
|
foreach my $leftlink (lamport_glob($msg_glob)) {
|
|
# cleanup historic msg-*
|
|
if ($leftlink =~ m:/msg-(conncetion-from-|additional-connection-):) {
|
|
lprint " unlink deprecated '$leftlink'\n" if $verbose;
|
|
unlink($leftlink);
|
|
next;
|
|
}
|
|
my $stamp = get_link_stamp($leftlink);
|
|
next unless $stamp + 3600 * 4 < $start_time;
|
|
lprint " unlink '$leftlink'\n" if $verbose;
|
|
unlink($leftlink);
|
|
}
|
|
}
|
|
|
|
my $purged_globally = 0;
|
|
|
|
sub link_purge_global {
|
|
my ($cmd) = @_;
|
|
return if $purged_globally++;
|
|
my $start_time = mars_time();
|
|
my $min_time = _get_min_time($start_time, "*");
|
|
# new deletion method
|
|
my $clean_glob = "$mars/{,*/}{.*,*}";
|
|
_purge_del_glob($clean_glob, $min_time);
|
|
# old deletion method
|
|
my $del_glob = "$mars/{,*/}{.tmp,.deleted,delete,work}-*";
|
|
my $msg_glob = "$mars/actual-*/msg-*";
|
|
_compat_purge_deleted($del_glob, $msg_glob);
|
|
# keep internal backups for at least 1 hour
|
|
if ($keep_backups < 1) {
|
|
$keep_backups = 1;
|
|
}
|
|
lprint "removing left-over symlinks...\n" if $verbose;
|
|
# remove any old alivelinks
|
|
get_global_versions();
|
|
my $kernel_compat = get_alive_link("compat-alivelinks", $host, 2);
|
|
# only when a new kernel is running
|
|
if (defined($kernel_compat) && $kernel_compat ne "") {
|
|
my $keep_time = 0;
|
|
my $glob = "{alive,buildtag,emergency,features,rest-space,time,tree,usable,used,compat-alive}";
|
|
# Notice: $kernel_compat might differ from $compat_alivelinks
|
|
# For example, this can happen temporarily during join-cluster & co
|
|
if (!$kernel_compat) {
|
|
lprint "NEW alivelinks active: purging OLD alivelinks\n" if $verbose;
|
|
$glob = "$mars/$glob-*";
|
|
} elsif ($compat_alivelinks && $kernel_strategy_version < 3) {
|
|
# This should happen only after true downgrades.
|
|
# Notice that newer kernels are writing _both_ variants
|
|
# during compat mode.
|
|
lprint "OLD alivelinks active: purging NEW alivelinks\n" if $verbose;
|
|
$glob = "$mars/actual-$host/$glob*";
|
|
$keep_time = $keep_backups unless $force;
|
|
} else {
|
|
$glob = "";
|
|
}
|
|
if ($glob) {
|
|
foreach my $leftlink (lamport_glob($glob)) {
|
|
if ($keep_time) {
|
|
my $stamp = get_link_stamp($leftlink);
|
|
next unless $stamp + 3600 * $keep_time < $start_time;
|
|
}
|
|
_create_delete($leftlink);
|
|
}
|
|
}
|
|
}
|
|
foreach my $leftlink (lamport_glob("$mars/backup*")) {
|
|
my $stamp = get_link_stamp($leftlink);
|
|
next unless $stamp + 3600 * $keep_backups < $start_time;
|
|
lprint " unlink '$leftlink'\n" if $verbose;
|
|
system("rm -rf $leftlink");
|
|
}
|
|
# remove any left-over probe dirs
|
|
my $probe_glob = "$mars/probe-*";
|
|
foreach my $path (glob($probe_glob)) {
|
|
my $age = get_link_stamp($path);
|
|
next unless ($age < $min_time);
|
|
system("rm -rf $path");
|
|
}
|
|
}
|
|
|
|
sub link_purge_res {
|
|
my ($cmd, $res) = @_;
|
|
link_purge_global($cmd);
|
|
my $start_time = mars_time();
|
|
my $min_time = _get_min_time($start_time, $res);
|
|
# new deletion method
|
|
my $clean_glob = "$mars/resource-$res/{,*/}{.*,*}";
|
|
_purge_del_glob($clean_glob, $min_time);
|
|
# old deletion method
|
|
my $del_glob = "$mars/resource-$res/{,*/}{.tmp,.deleted,delete,work}-*";
|
|
my $msg_glob = "$mars/resource-$res/actual-*/msg-*";
|
|
_compat_purge_deleted($del_glob, $msg_glob);
|
|
}
|
|
|
|
sub logdelete_res {
|
|
my ($cmd, $res) = @_;
|
|
lprint "removing left-over $res logfiles...\n" if $verbose;
|
|
my $start_time = mars_time();
|
|
my @paths = lamport_glob("$mars/resource-$res/log-*") or ldie "cannot find any logfiles\n";
|
|
@paths = sort alphanum_cmp @paths;
|
|
my ($min_deletable, $max_deletable) = _get_deletable_logfiles(@_);
|
|
lprint "min deletable logfile number: $min_deletable\n" if $verbose;
|
|
lprint "min non-deletable logfile number: $max_deletable\n" if $verbose;
|
|
if ($min_deletable >= $max_deletable) {
|
|
lprint "no logfiles are deletable.\n" if $verbose;
|
|
return;
|
|
}
|
|
if ($cmd eq "log-delete-one") {
|
|
$max_deletable = $min_deletable + 1; # delete only the first one
|
|
}
|
|
|
|
my $nr = 0;
|
|
my $first = shift(@paths);
|
|
for (;;) {
|
|
last unless $first;
|
|
my $next = shift(@paths);
|
|
# never delete the very last logfile
|
|
last unless $next;
|
|
# safeguard: only delete logfiles having a minium age
|
|
last if !$force && is_link_recent($first);
|
|
$nr = $first;
|
|
$nr =~ s/^.*log-([0-9]+)-.+$/$1/;
|
|
next unless $nr < $max_deletable;
|
|
|
|
lprint "chosen '$first' for deletion\n" if $verbose;
|
|
|
|
_create_delete($first);
|
|
$first = $next;
|
|
}
|
|
# Determine whether a parallel join-resource is ongoing
|
|
my $transient_join = 0;
|
|
foreach my $path (lamport_glob("$mars/resource-$res/device-*")) {
|
|
my $val = get_link($path, 1);
|
|
next unless $val;
|
|
my $stamp = get_link_stamp($path);
|
|
if ($val eq "(none)") {
|
|
lwarn "Transient join-resource detected at '$path'\n";
|
|
$transient_join = 1;
|
|
# Remove any transient links after a while
|
|
if ($stamp < 10 ) {
|
|
finish_links();
|
|
set_link($val, $path);
|
|
finish_links($stamp + 1);
|
|
} elsif ($stamp < 20) {
|
|
_create_delete($path);
|
|
}
|
|
next;
|
|
}
|
|
next if is_member($res, $host);
|
|
next unless is_guest($res, $host);
|
|
# remove any inactive guests after backup retention period
|
|
my $device_on_path = "$mars/resource-$res/actual-$host/if-on";
|
|
my $device_on_val = get_link($device_on_path, 1);
|
|
my $device_on_stamp = get_link_stamp($device_on_path);
|
|
$device_on_stamp = $stamp if $stamp > $device_on_stamp;
|
|
# is the guest inactive?
|
|
if (defined($device_on_val) &&
|
|
$device_on_val ne "" &&
|
|
!$device_on_val &&
|
|
$device_on_stamp > 20 &&
|
|
$device_on_stamp + $keep_backups < $start_time) {
|
|
lprint "Purging guest '$res'\n";
|
|
#_create_delete($path);
|
|
}
|
|
}
|
|
my @versionlinks = lamport_glob("$mars/resource-$res/version-*");
|
|
# When join-resource is humming, or during split-brain,
|
|
# don't remove versionlinks as far as possible.
|
|
if (scalar(@versionlinks) < $max_deletions / 8 &&
|
|
($transient_join ||
|
|
!detect_splitbrain($res, 1))) {
|
|
lwarn "Keeping some versionlinks\n";
|
|
return unless $force;
|
|
}
|
|
lprint "Removing left-over versionlinks...\n" if $verbose;
|
|
foreach my $versionlink (@versionlinks) {
|
|
my $nrv = $versionlink;
|
|
$nrv =~ s/^.*\/version-([0-9]+)-.+$/$1/;
|
|
# we need at least one more version link than logfiles for consistency checks
|
|
next unless $nrv < $max_deletable - 1;
|
|
_create_delete($versionlink);
|
|
}
|
|
|
|
# remove outdated split-cluster remains
|
|
my %peers;
|
|
foreach my $path (lamport_glob("$mars/ips/ip-*")) {
|
|
$path =~ m:/ip-(.*):;
|
|
my $peer = $1;
|
|
$peers{$peer} = 1;
|
|
}
|
|
foreach my $leftlink (lamport_glob("$mars/{,resource-$res/}todo-*/deleted-*")) {
|
|
$leftlink =~ m:/deleted-(.*):;
|
|
my $peer = $1;
|
|
next if $peers{$peer};
|
|
my $stamp = get_link_stamp($leftlink);
|
|
next unless $stamp + 3600 < $start_time;
|
|
lprint " unlink '$leftlink'\n" if $verbose;
|
|
unlink($leftlink);
|
|
}
|
|
}
|
|
|
|
sub cron_phase1 {
|
|
my ($cmd, $res) = @_;
|
|
$cron_mode = 1;
|
|
link_purge_res(@_);
|
|
logrotate_res(@_);
|
|
finish_links();
|
|
_trigger(3);
|
|
return 0;
|
|
}
|
|
|
|
sub cron_phase2 {
|
|
my ($cmd, $res) = @_;
|
|
$cron_mode = 1;
|
|
logdelete_res(@_);
|
|
return 0;
|
|
}
|
|
|
|
sub attach_res_phase0 {
|
|
my ($cmd, $res) = @_;
|
|
return 0 if $force;
|
|
my $detach = ($cmd eq "detach");
|
|
if ($detach) {
|
|
my $device_in_use = get_link("$mars/resource-$res/actual-$host/open-count", 1);
|
|
if ($device_in_use) {
|
|
my $want_path = "$mars/resource-$res/systemd-want";
|
|
my $want = get_link($want_path, 2);
|
|
if ($want) {
|
|
lprint "IMPORTANT: Relying on systemd for $cmd of resource '$res'\n";
|
|
my $path = "$mars/resource-$res/todo-$host/attach";
|
|
_switch($cmd, $res, $path, 0);
|
|
finish_links();
|
|
systemd_activate($cmd, $res, 0, 1);
|
|
return 0;
|
|
}
|
|
my $dev = device_name($res);
|
|
ldie "device '$dev' is in use\n";
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
# only for systemd: wait that primary device is no longer open
|
|
sub attach_res_phase0b {
|
|
my ($cmd, $res) = @_;
|
|
return 0 unless $cmd eq "detach";
|
|
return 0 unless systemd_present(@_);
|
|
check_status($cmd, $res, "open-count", 0, 1);
|
|
wait_cluster($cmd);
|
|
return 0;
|
|
}
|
|
|
|
sub attach_res_phase1 {
|
|
my ($cmd, $res) = @_;
|
|
my $detach = ($cmd eq "detach");
|
|
my $path = "$mars/resource-$res/todo-$host/attach";
|
|
_switch($cmd, $res, $path, !$detach);
|
|
return 0;
|
|
}
|
|
|
|
sub attach_res_phase2 {
|
|
my ($cmd, $res) = @_;
|
|
my $detach = ($cmd eq "detach");
|
|
return 0 if $force;
|
|
if (!is_module_loaded()) {
|
|
lwarn "Kernel module not loaded: $cmd will become effective after modprobe\n";
|
|
return 0;
|
|
}
|
|
check_status($cmd, $res, "is-attached", $detach ? 0 : 1, 1);
|
|
if ($detach) {
|
|
system("sync");
|
|
check_mars_device($cmd, $res, 1, 1);
|
|
check_status($cmd, $res, "is-replaying", 0, 1);
|
|
check_status($cmd, $res, "is-syncing", 0, 1);
|
|
system("sync");
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
sub fetch_global_res {
|
|
my ($cmd, $res) = @_;
|
|
my $pause = ($cmd =~ m/disconnect|pause/);
|
|
my @paths = lamport_glob("$mars/resource-$res/todo-*/");
|
|
for my $path (@paths) {
|
|
_switch($cmd, $res, "$path/connect", !$pause);
|
|
}
|
|
}
|
|
|
|
sub fetch_local_res {
|
|
my ($cmd, $res) = @_;
|
|
my $pause = ($cmd =~ m/disconnect|pause/);
|
|
my $path = "$mars/resource-$res/todo-$host/connect";
|
|
_switch($cmd, $res, $path, !$pause);
|
|
}
|
|
|
|
sub pause_sync_global_res {
|
|
my ($cmd, $res) = @_;
|
|
my $pause = ($cmd =~ m/pause/);
|
|
check_sync_startable(@_) if !$pause;
|
|
my @paths = lamport_glob("$mars/resource-$res/todo-*/");
|
|
for my $path (@paths) {
|
|
_switch($cmd, $res, "$path/sync", !$pause);
|
|
}
|
|
}
|
|
|
|
sub pause_sync_local_res {
|
|
my ($cmd, $res) = @_;
|
|
my $pause = ($cmd =~ m/pause/);
|
|
check_sync_startable(@_) if !$pause;
|
|
my $path = "$mars/resource-$res/todo-$host/sync";
|
|
_switch($cmd, $res, $path, !$pause);
|
|
}
|
|
|
|
sub pause_replay_global_res {
|
|
my ($cmd, $res) = @_;
|
|
my $pause = ($cmd =~ m/pause/);
|
|
my @paths = lamport_glob("$mars/resource-$res/todo-*/");
|
|
for my $path (@paths) {
|
|
_switch($cmd, $res, "$path/replay", !$pause);
|
|
}
|
|
}
|
|
|
|
sub pause_replay_local_res {
|
|
my ($cmd, $res) = @_;
|
|
my $pause = ($cmd =~ m/pause/);
|
|
my $path = "$mars/resource-$res/todo-$host/replay";
|
|
_switch($cmd, $res, $path, !$pause);
|
|
}
|
|
|
|
sub up_res_phase0 {
|
|
my ($cmd, $res) = @_;
|
|
my $down = ($cmd eq "down");
|
|
if ($down) {
|
|
attach_res_phase0("detach", $res);
|
|
} else {
|
|
attach_res_phase0("attach", $res);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
sub up_res_phase1 {
|
|
my ($cmd, $res) = @_;
|
|
my $down = ($cmd eq "down");
|
|
if ($down) {
|
|
pause_replay_local_res("pause-replay-local", $res);
|
|
pause_sync_local_res("pause-sync-local", $res);
|
|
fetch_local_res("pause-fetch", $res);
|
|
attach_res_phase1("detach", $res);
|
|
} else {
|
|
attach_res_phase1("attach", $res);
|
|
fetch_local_res("resume-fetch-local", $res);
|
|
# ignore ldie on sync, just do all the rest
|
|
eval {
|
|
pause_sync_local_res("resume-sync-local", $res);
|
|
};
|
|
pause_replay_local_res("resume-replay-local", $res);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
sub up_res_phase2 {
|
|
my ($cmd, $res) = @_;
|
|
my $down = ($cmd eq "down");
|
|
if ($down) {
|
|
attach_res_phase2("detach", $res);
|
|
} else {
|
|
attach_res_phase2("attach", $res);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
sub set_replay_res {
|
|
my ($cmd, $res, $new_nr) = @_;
|
|
if (!$new_nr || $new_nr <= 0) {
|
|
ldie "you must supply a numeric logfile number as third argument.\n";
|
|
}
|
|
check_not_primary($cmd, $res);
|
|
check_todo($cmd, $res, "replay", 0, 0);
|
|
my $replaylink = "$mars/resource-$res/replay-$host";
|
|
my $old_val = get_link($replaylink);
|
|
my $old_nr = $old_val;
|
|
$old_nr =~ s/log-([0-9]+)-.*/$1/;
|
|
ldie "old log number '$old_nr' is wrong\n" unless $old_nr > 0;
|
|
if ($new_nr > $old_nr) {
|
|
lwarn "you try to skip logfile numbers from $old_nr to $new_nr, are you sure?\n";
|
|
ldie "you would need --force if you really know what you are doing.\n" unless $force;
|
|
}
|
|
my $primary = _get_designated_primary($res);
|
|
_set_replaylink($cmd, $res, $new_nr, $primary);
|
|
}
|
|
|
|
sub fake_sync_phase1 {
|
|
my ($cmd, $res) = @_;
|
|
my $path = "$mars/resource-$res/todo-$host/sync";
|
|
_switch($cmd, $res, $path, 0);
|
|
return 0;
|
|
}
|
|
|
|
sub fake_sync_phase2 {
|
|
my ($cmd, $res) = @_;
|
|
check_status($cmd, $res, "is-syncing", 0, 1);
|
|
return 0;
|
|
}
|
|
|
|
sub fake_sync_phase3 {
|
|
my ($cmd, $res) = @_;
|
|
my $size = get_link("$mars/resource-$res/size");
|
|
my $target = "$mars/resource-$res/syncstatus-$host";
|
|
set_link($size, $target);
|
|
return 0;
|
|
}
|
|
|
|
sub _primary_res {
|
|
my ($res, $new, $old) = @_;
|
|
my $pri = "$mars/resource-$res/primary";
|
|
finish_links();
|
|
my $old_stamp = get_link_stamp($pri);
|
|
set_link($new, $pri);
|
|
if ($new eq "(none)") {
|
|
# Allow overriding of secondaries in a partitioned cluster
|
|
# by using a weaker timestamp.
|
|
# When somebody issues a (more or less) _concurrent_ "primary" command
|
|
# during a network partition, let the "primary" win over the "secondary".
|
|
# Notice: the definition of "concurrent" in the sense of Lamport
|
|
# (where _any_ communication may take "arbitrarily" long)
|
|
# may be non-intuitive to humans in the presence of a network outage.
|
|
finish_links($old_stamp + 1) if $old_stamp;
|
|
} else {
|
|
my $last_lnk = "$mars/resource-$res/userspace/last-primary";
|
|
my $prev_lnk = "$mars/resource-$res/userspace/prev-primary";
|
|
system("mv -f $last_lnk $prev_lnk");
|
|
set_link($new, $last_lnk);
|
|
}
|
|
lprint "designated primary changed from '$old' to '$new'\n";
|
|
}
|
|
|
|
# check whether primary/secondary switching is possible at all
|
|
sub primary_phase0 {
|
|
my ($cmd, $res) = @_;
|
|
ldie "cannot switch primary: mars kernel module is not loaded\n" unless ($cmd eq "secondary" || -d "/proc/sys/mars");
|
|
if ($force) {
|
|
check_todo($cmd, $res, "fetch", 0, 0);
|
|
}
|
|
my $old = _get_designated_primary($res, -1);
|
|
lprint "Current designated primary: $old\n";
|
|
if ($cmd eq "primary") {
|
|
if ($host ne $old) {
|
|
lprint "Allowing handover in cases of sync: ignore_sync=$ignore_sync\n" if $ignore_sync;
|
|
check_sync_finished($res, $host, $ignore_sync);
|
|
# also check that other secondaries won't loose their sync primary
|
|
my @names = lamport_glob("$mars/resource-$res/data-*");
|
|
# for k <= 2 replicas, the previous check must have been sufficient
|
|
if (scalar(@names) > 2) {
|
|
my $allow_anyway = ($force || $ignore_sync);
|
|
lprint "Allowing handover in cases of sync: force=$force ignore_sync=$ignore_sync\n" if $allow_anyway;
|
|
foreach my $name (@names) {
|
|
$name =~ m:/data-(.+):;
|
|
my $peer = $1;
|
|
next if ($peer eq $old || $peer eq $host);
|
|
check_sync_finished($res, $peer, $allow_anyway);
|
|
}
|
|
}
|
|
}
|
|
check_todo($cmd, $res, "attach", 1, 0);
|
|
check_todo($cmd, $res, "fetch", 1, 0) if !$force;
|
|
check_todo($cmd, $res, "replay", 1, 0);
|
|
# check that no logfile replay errors exist.
|
|
my $replay_error = get_link("$mars/resource-$res/actual-$host/replay-code", 2);
|
|
if (defined($replay_error) && $replay_error ne "" && int($replay_error) < 0) {
|
|
lwarn "Logfile replay / recovery stopped with error code $replay_error.\n";
|
|
ldie "Won't switch to avoid unnoticed data loss. You may however do a 'primary --force'.\n" unless $force;
|
|
}
|
|
}
|
|
my $want_path = "$mars/resource-$res/systemd-want";
|
|
my $want = get_link($want_path, 2);
|
|
if ($want) {
|
|
my $new;
|
|
my $oper;
|
|
if ($cmd eq "primary") {
|
|
$new = $host;
|
|
$oper = "start";
|
|
} else {
|
|
$new = "(none)";
|
|
$oper = "stop";
|
|
}
|
|
set_link($new, $want_path);
|
|
my $unit_path = "$mars/resource-$res/systemd-$oper-unit";
|
|
my $unit = get_link($unit_path, 2);
|
|
if ($unit) {
|
|
if ($old ne "(none)") {
|
|
report_systemd_status($cmd, $res, 0, "stop", $old);
|
|
}
|
|
lprint "IMPORTANT: Relying on systemd for $oper of unit '$unit'\n";
|
|
lprint "IMPORTANT: unit '$unit' wanted at '$new'\n";
|
|
finish_links();
|
|
_trigger(3);
|
|
return 0;
|
|
}
|
|
}
|
|
return 0 if ($old eq $host and $cmd eq "primary");
|
|
return 0 if $old eq "(none)";
|
|
my $open_count_path = "$mars/resource-$res/actual-$old/open-count";
|
|
my $device_in_use = get_link($open_count_path, 1);
|
|
if ($device_in_use) {
|
|
my $dev = device_name($res, $old);
|
|
lwarn "device '$dev' for resource '$res' is $device_in_use times in use on primary host '$old'\n";
|
|
ldie "first you must umount/close the device (on host '$old')\n" unless $force;
|
|
lwarn "First you SHOULD umount/close the device (on host '$old'), but you ignore this recommendation by giving the --force option.\n";
|
|
if (is_link_recent($open_count_path)) {
|
|
lwarn "You are forcing a SPLIT BRAIN via --force right now. Do you know that this is an ERRONEOUS state? Do you really know what you are doing?\n";
|
|
} else {
|
|
lwarn "You may produce a SPLIT BRAIN via --force because the peer host '$old' is currently not reachable.\n";
|
|
}
|
|
}
|
|
lprint "all preconditions OK for resource '$res'\n";
|
|
return 0;
|
|
}
|
|
|
|
# only for primary --force: wait until fetch has actually stopped
|
|
sub primary_phase0a {
|
|
my ($cmd, $res) = @_;
|
|
my $new = $host;
|
|
if (!$force && $cmd =~ m/primary/) {
|
|
lprint "Prepare new primary '$new' handover\n";
|
|
_switch($cmd, $res, "$mars/resource-$res/todo-$new/fetch", 1);
|
|
}
|
|
finish_links();
|
|
return 0 unless $force;
|
|
wait_cond($cmd, $res, "is-fetch-off");
|
|
return 0;
|
|
}
|
|
|
|
# only for systemd: wait that primary device is no longer open
|
|
sub primary_phase0b {
|
|
my ($cmd, $res) = @_;
|
|
return 0 unless systemd_present(@_);
|
|
# only relevant for true handover
|
|
my $old = _get_designated_primary($res, -1);
|
|
return 0 if ($old eq $host || $old eq "(none)");
|
|
# ignore primary --force
|
|
my $connect_path = "$mars/resource-$res/todo-$host/connect";
|
|
my $connect = get_link($connect_path, 1);
|
|
return 0 if (!defined($connect) || !$connect);
|
|
# Notice: this is a workaround for a problem which is
|
|
# outside of our scope. For example, a remote umount will
|
|
# fail when any (delayed) process has any filehandle open.
|
|
# As long as the umount was unsuccessful, we _cannot_ proceed
|
|
# with planned handover.
|
|
# As a workaround, we constantly trigger the remote systemd
|
|
# in the _hope_ that the umount will succeed, and the
|
|
# open-count will then go down to zero, hopefully somewhen.
|
|
my $watch = "$mars/resource-$res/systemd-want";
|
|
my $action = "";
|
|
if (link_exists($watch)) {
|
|
$action = "system(\"touch -h $watch\");";
|
|
my $response_path = "$mars/resource-$res/userspace/systemd-status-stop-$old";
|
|
$action .= "\$action_status = get_link(\"$response_path\");";
|
|
my $msg = "systemctl stop on peer $old: status=\$action_status\n";
|
|
$action .= "ldie \"$msg\" if \$action_status;";
|
|
}
|
|
# try to compensate failures by systemd restart
|
|
my $compensation = undef;
|
|
if ($old ne "(none)") {
|
|
$compensation = "lprint \"Restarting '$res' on '$old'\n\"; ";
|
|
$compensation .= "_primary_res(\"$res\", \"$old\", \"(none)\"); ";
|
|
$compensation .= "set_systemd_want_phase1(\"$cmd\", \"$res\", \"$old\"); ";
|
|
$compensation .= "finish_links(); ";
|
|
$compensation .= "_trigger(3); ";
|
|
$compensation .= "lprint \"Triggered systemd at '$old'.\n\";";
|
|
}
|
|
check_status($cmd, $res, "open-count", 0, 1, undef, undef, $old, $action, $compensation);
|
|
return 0;
|
|
}
|
|
|
|
# when necessary, switch to secondary (intermediately)
|
|
sub primary_phase1 {
|
|
my ($cmd, $res) = @_;
|
|
return 0 if ($force and $cmd eq "primary");
|
|
my $old = _get_designated_primary($res, -1);
|
|
return 0 if ($old eq $host and $cmd eq "primary");
|
|
my $new = "(none)";
|
|
if (!$force and $cmd eq "primary") {
|
|
my $status = try_to_avoid_splitbrain($cmd, $res, $old);
|
|
return $status if $status;
|
|
}
|
|
return 0 if $old eq $new;
|
|
_primary_res($res, $new, $old);
|
|
return 0;
|
|
}
|
|
|
|
my $phase2_waited = 0;
|
|
|
|
sub primary_phase1b {
|
|
my ($cmd, $res) = @_;
|
|
$phase2_waited = 0;
|
|
finish_links();
|
|
return 0 if $force;
|
|
my $old = _get_designated_primary($res, -1);
|
|
my $status = check_primary_gone($cmd, $res, $old);
|
|
return $status if $status;
|
|
if (!$force and $cmd eq "primary") {
|
|
my $status = try_to_avoid_splitbrain($cmd, $res, $old);
|
|
return $status if $status;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
# when necessary, wait
|
|
sub primary_phase2 {
|
|
my ($cmd, $res) = @_;
|
|
return 0 if $force;
|
|
return 0 unless $cmd eq "primary";
|
|
wait_cluster($cmd) if !$phase2_waited++;
|
|
my $old = _get_designated_primary($res, -1);
|
|
return check_primary_gone($cmd, $res, $old);
|
|
}
|
|
|
|
sub primary_phase2b {
|
|
my ($cmd, $res) = @_;
|
|
return 0 if $force;
|
|
if (systemd_present(@_)) {
|
|
my $old = _get_designated_primary($res, -1);
|
|
return try_to_avoid_splitbrain($cmd, $res, $old);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
# when necessary, switch to primary
|
|
sub primary_phase3 {
|
|
my ($cmd, $res) = @_;
|
|
return 0 unless $cmd eq "primary";
|
|
my $old = _get_designated_primary($res, -1);
|
|
my $new = $host;
|
|
_primary_res($res, $new, $old);
|
|
return 0;
|
|
}
|
|
|
|
sub primary_phase3b {
|
|
finish_links();
|
|
return 0;
|
|
}
|
|
|
|
# wait for device to appear / disappear
|
|
sub primary_phase4 {
|
|
my ($cmd, $res) = @_;
|
|
if($cmd eq "secondary") {
|
|
check_mars_device($cmd, $res, 1, 1);
|
|
return 0;
|
|
}
|
|
my $ok = detect_splitbrain($res, 1);
|
|
if (!$ok) {
|
|
lwarn "\n";
|
|
lwarn "Sorry, in split brain situations I can only set the _designated_\n";
|
|
lwarn "primary, but I cannot _guarantee_ that becoming the\n";
|
|
lwarn "_actual_ primary is always possible.\n";
|
|
lwarn "You SHOULD resolve the split brain ASAP (e.g. by leave-resource\n";
|
|
lwarn "or invalidate etc).\n";
|
|
lwarn "\n";
|
|
lwarn "If you already tried to resolve the split brain manually, but\n";
|
|
lwarn "this message does not disappear, the reason could be some\n";
|
|
lwarn "hindering left-overs/remains from the former split brain.\n";
|
|
lwarn "ONLY in such a case, try log-purge-all --force.\n";
|
|
lwarn "\n";
|
|
return 0;
|
|
}
|
|
check_mars_device($cmd, $res, 1, 0);
|
|
# new switch semantics, when nothing has failed before: up
|
|
up_res_phase1(@_);
|
|
return 0;
|
|
}
|
|
|
|
sub primary_phase5 {
|
|
return 0 unless systemd_present(@_);
|
|
return set_systemd_want_phase3(@_);
|
|
}
|
|
|
|
sub wait_umount_res {
|
|
my ($cmd, $res) = @_;
|
|
my $path = "$mars/resource-$res/actual-$host/open-count";
|
|
while (1) {
|
|
my $sum = get_link($path);
|
|
last if !$sum;
|
|
lprint "device for resource '$res' is $sum times in use on $host\n";
|
|
sleep_timeout(1);
|
|
}
|
|
lprint "OK, device for resource '$res' is not in use.\n";
|
|
}
|
|
|
|
sub invalidate_res_phase0 {
|
|
my ($cmd, $res) = @_;
|
|
check_not_primary($cmd, $res);
|
|
my $primary = _get_designated_primary($res);
|
|
ldie "for operation '$cmd', some other designated primary must exist (currently there is none)\n" if $primary eq "(none)";
|
|
ldie "Cannot invalidate the designated primary host '$primary'\n" if $primary eq $host;
|
|
return 0;
|
|
}
|
|
|
|
sub invalidate_res_phase1 {
|
|
my ($cmd, $res) = @_;
|
|
_switch($cmd, $res, "$mars/resource-$res/todo-$host/attach", 0);
|
|
_switch($cmd, $res, "$mars/resource-$res/todo-$host/sync", 0);
|
|
_switch($cmd, $res, "$mars/resource-$res/todo-$host/fetch", 0);
|
|
_switch($cmd, $res, "$mars/resource-$res/todo-$host/replay", 0);
|
|
return 0;
|
|
}
|
|
|
|
sub invalidate_res_phase2 {
|
|
my ($cmd, $res) = @_;
|
|
if (!$force) {
|
|
check_status($cmd, $res, "is-syncing", 0, 1);
|
|
check_status($cmd, $res, "is-fetching", 0, 1);
|
|
check_status($cmd, $res, "is-replaying", 0, 1);
|
|
check_status($cmd, $res, "is-attached", 0, 1);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
sub invalidate_res_phase3 {
|
|
my ($cmd, $res) = @_;
|
|
my $dst = "$mars/resource-$res/syncstatus-$host";
|
|
my $primary = _get_designated_primary($res);
|
|
ldie "Cannot execute 'invalidate' because noone is designated as primary.\n" if (!$primary || $primary eq "(none)");
|
|
ldie "Cannot invalidate the designated primary host '$primary'\n" if $primary eq $host;
|
|
my $replay = get_link("$mars/resource-$res/replay-$primary");
|
|
$replay =~ m/^log-([0-9]+)-/ or ldie "replay link '$replay' is not parsable\n";
|
|
my $replay_nr = $1;
|
|
|
|
set_link("0", $dst);
|
|
finish_links(); # opportunity for errors => don't continue
|
|
for my $vers_path (lamport_glob("$mars/resource-$res/version-*-$host")) {
|
|
$vers_path =~ m:/version-([0-9]+):;
|
|
my $this_nr = $1;
|
|
_create_delete($vers_path) if $this_nr >= $replay_nr;
|
|
}
|
|
_create_delete("$mars/resource-$res/replay-$host");
|
|
finish_links();
|
|
return 0;
|
|
}
|
|
|
|
sub invalidate_res_phase4 {
|
|
my ($cmd, $res) = @_;
|
|
_wait_delete();
|
|
return 0;
|
|
}
|
|
|
|
sub invalidate_res_phase5 {
|
|
my ($cmd, $res) = @_;
|
|
$force = 0; # this would be too dangerous
|
|
log_purge_res(@_);
|
|
err_purge_res(@_);
|
|
return 0;
|
|
}
|
|
|
|
sub invalidate_res_phase6 {
|
|
my ($cmd, $res) = @_;
|
|
_wait_delete();
|
|
return 0;
|
|
}
|
|
|
|
sub invalidate_res_phase7 {
|
|
my ($cmd, $res) = @_;
|
|
my $dst = "$mars/resource-$res/syncstatus-$host";
|
|
my $primary = _get_designated_primary($res);
|
|
ldie "Cannot execute 'invalidate' because noone is designated as primary.\n" if (!$primary || $primary eq "(none)");
|
|
ldie "Cannot invalidate the designated primary host '$primary'\n" if $primary eq $host;
|
|
my $replay = get_link("$mars/resource-$res/replay-$primary");
|
|
$replay =~ m/^log-([0-9]+)-/ or ldie "replay link '$replay' is not parsable\n";
|
|
my $replay_nr = $1;
|
|
|
|
_set_replaylink($cmd, $res, $replay_nr, $primary, "");
|
|
finish_links();
|
|
return 0;
|
|
}
|
|
|
|
sub invalidate_res_phase8 {
|
|
my ($cmd, $res) = @_;
|
|
_wait_delete();
|
|
return 0;
|
|
}
|
|
|
|
sub invalidate_res_phase9 {
|
|
my ($cmd, $res) = @_;
|
|
my $dst = "$mars/resource-$res/syncstatus-$host";
|
|
my $primary = _get_designated_primary($res);
|
|
ldie "Cannot execute 'invalidate' because noone is designated as primary.\n" if (!$primary || $primary eq "(none)");
|
|
ldie "Cannot invalidate the designated primary host '$primary'\n" if $primary eq $host;
|
|
|
|
_switch($cmd, $res, "$mars/resource-$res/todo-$host/attach", 1);
|
|
_switch($cmd, $res, "$mars/resource-$res/todo-$host/fetch", 1);
|
|
_switch($cmd, $res, "$mars/resource-$res/todo-$host/replay", 1);
|
|
finish_links();
|
|
my $emergency_path = "$mars/resource-$res/actual-$primary/has-emergency";
|
|
my $emergency = get_link($emergency_path, 1);
|
|
if ($emergency) {
|
|
lwarn "Primary '$primary' is in emergency mode. Cannot start sync.\n";
|
|
} else {
|
|
_switch($cmd, $res, "$mars/resource-$res/todo-$host/sync", 1);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
my %resize_device_size;
|
|
my %resize_old_size;
|
|
my %resize_new_size;
|
|
|
|
sub get_possible_size {
|
|
my ($cmd, $res) = @_;
|
|
my @actsizes = lamport_glob("$mars/resource-$res/actsize-*");
|
|
lwarn "resource $res has no actsize-* symlinks\n" unless @actsizes;
|
|
my $possible_size = 0;
|
|
foreach my $actsize (@actsizes) {
|
|
my $this_size = get_link($actsize);
|
|
if (!$possible_size || $this_size < $possible_size) {
|
|
$possible_size = $this_size;
|
|
}
|
|
}
|
|
return $possible_size;
|
|
}
|
|
|
|
sub resize_phase0 {
|
|
my ($cmd, $res, $size_arg) = @_;
|
|
ldie "mars kernel module is not loaded. This is needed for communication with some other hosts!\n" if !is_module_loaded();
|
|
my $new_size = 0;
|
|
if ($size_arg) {
|
|
$new_size = get_size($size_arg);
|
|
ldie "optional size argument '$new_size' must be numeric and positive\n" unless scalar($new_size) > 0;
|
|
lprint "new size: $new_size bytes\n";
|
|
}
|
|
check_primary($cmd, $res, 1);
|
|
my $my_size = get_link("$mars/resource-$res/actsize-$host");
|
|
my $lnk = "$mars/resource-$res/size";
|
|
my $old_size = get_link($lnk);
|
|
lprint "old_size=$old_size\n";
|
|
my $possible_size = get_possible_size($cmd, $res);
|
|
lprint "possible_size=$possible_size\n";
|
|
$new_size = $possible_size if !$new_size;
|
|
lprint "new_size=$new_size\n";
|
|
ldie "new size $new_size is higher than the possible size (minimum of all volumes) $possible_size" if $new_size > $possible_size; # no override with --force possible
|
|
# disallow decreasing
|
|
ldie "only increases of the size are possible!\n" if $new_size < $old_size;
|
|
my $waste = $my_size - $new_size;
|
|
lwarn "You are wasting $waste bytes locally\n" if $my_size > $new_size;
|
|
# remember values
|
|
$resize_device_size{$res} = _get_mars_size(@_);
|
|
$resize_old_size{$res} = $old_size;
|
|
lwarn "internal mismatch between actual device size and resource size: $resize_device_size{$res} != $resize_old_size{$res}\n" unless $resize_device_size{$res} == $resize_old_size{$res};
|
|
$resize_new_size{$res} = $new_size;
|
|
return 0;
|
|
}
|
|
|
|
sub resize_phase1 {
|
|
my ($cmd, $res) = @_;
|
|
my $old_size = $resize_old_size{$res} or ldie "bad internal size value\n";
|
|
my $new_size = $resize_new_size{$res} or ldie "bad internal size value\n";
|
|
# for safety, check again
|
|
ldie "only increases of the size are possible!\n" if $new_size < $old_size;
|
|
check_primary($cmd, $res, 1);
|
|
# Mark the primary data / its size as authoritative
|
|
my $act_lnk = "$mars/resource-$res/syncstatus-$host";
|
|
set_link($new_size, $act_lnk);
|
|
finish_links(); # Chance for errors to pop up
|
|
# Now set the new resource size
|
|
my $lnk = "$mars/resource-$res/size";
|
|
set_link($new_size, $lnk);
|
|
finish_links();
|
|
return 0;
|
|
}
|
|
|
|
sub resize_phase2 {
|
|
my ($cmd, $res) = @_;
|
|
my $old_size = $resize_old_size{$res} or ldie "bad internal size value\n";
|
|
my $new_size = $resize_new_size{$res} or ldie "bad internal size value\n";
|
|
for (;;) {
|
|
my $new_device_size = _get_mars_size(@_);
|
|
if ($new_device_size == $resize_new_size{$res}) {
|
|
lprint "Device size is now $new_device_size.\n";
|
|
last;
|
|
}
|
|
lprint "Device size $new_device_size has not yet reached the new size $resize_new_size{$res}.\n";
|
|
if ($new_device_size != $resize_device_size{$res}) {
|
|
lwarn "The size has changed, but did not reach the correct value.";
|
|
lwarn "Assuming some rounding problems (which may occur at some device types)\n";
|
|
last;
|
|
}
|
|
sleep_timeout();
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
# Remark: this is historic, and should not be used anymore.
|
|
# However, it likely needs to be kept for DRBD similarity.
|
|
sub role_cmd {
|
|
my ($cmd, $res, $peer) = @_;
|
|
$peer = $host unless (defined($peer) && $peer);
|
|
my $is_primary = is_actual_primary($cmd, $res, $peer);
|
|
my $todo_primary = _get_designated_primary($res);
|
|
my $msg = "I am actually ";
|
|
$msg .= $is_primary ? "primary" : "secondary";
|
|
if ($todo_primary ne $peer) {
|
|
$msg .= " and $todo_primary should be primary";
|
|
}
|
|
lprint $msg . "\n";
|
|
}
|
|
|
|
# Deprecated. Check whether it can be removed.
|
|
sub mars_state_cmd {
|
|
my ($cmd, $res, $peer) = @_;
|
|
$peer = $host unless (defined($peer) && $peer);
|
|
my $is_primary = is_actual_primary($cmd, $res, $peer);
|
|
my $todo_primary = _get_designated_primary($res);
|
|
if ($is_primary) {
|
|
lprint "is_primary\n";
|
|
return;
|
|
}
|
|
if ($todo_primary eq $peer) {
|
|
lprint "becoming_primary\n";
|
|
return;
|
|
}
|
|
|
|
# secondary without ambitions to become primary
|
|
|
|
my $size = get_link("$mars/resource-$res/size");
|
|
my $syncstatus = get_link("$mars/resource-$res/syncstatus-$peer");
|
|
if ($syncstatus != $size) {
|
|
lprint "secondary inconsistent ($syncstatus bytes of $size)\n";
|
|
return;
|
|
}
|
|
if ($todo_primary eq "(none)") {
|
|
lprint "secondary\n";
|
|
return;
|
|
}
|
|
my $primary_replay = get_link("$mars/resource-$res/replay-$todo_primary");
|
|
my $peer_replay = get_link("$mars/resource-$res/replay-$peer");
|
|
if ($primary_replay eq $peer_replay) {
|
|
lprint "secondary uptodate\n";
|
|
return;
|
|
}
|
|
lprint "secondary outdated ($peer_replay instead of $primary_replay)\n";
|
|
}
|
|
|
|
sub cat_cmd {
|
|
my $cmd = shift;
|
|
foreach my $path (@_) {
|
|
_get_text($path, undef, 1);
|
|
}
|
|
}
|
|
|
|
sub mars_info_cmd {
|
|
my ($cmd, $res) = @_;
|
|
my $info = "$mars/resource-$res/logstatus-$host.status";
|
|
cat_cmd($cmd, $info);
|
|
}
|
|
|
|
sub show_cmd {
|
|
my ($cmd, $res) = @_;
|
|
$res = "*" if !$res || $res eq "all";
|
|
$res = "{$res}" if $res =~ m/,/;
|
|
my $glob = "$mars/{ips/ip-$host,alive-$host,emergency-$host,rest-space-$host,resource-$res/{device,primary,size,actsize-$host,syncstatus-$host,replay-$host,actual-$host/*,todo-$host/*}}";
|
|
foreach my $link (lamport_glob($glob)) {
|
|
next unless link_exists($link);
|
|
my $res = get_link($link);
|
|
my $short = $link;
|
|
$short =~ s:^$mars/::;
|
|
lprint "$short=$res\n";
|
|
}
|
|
}
|
|
|
|
sub show_errors_cmd {
|
|
my ($cmd, $res) = @_;
|
|
my $text = get_error_text(@_);
|
|
if ($text) {
|
|
lprint $text;
|
|
ldie "resource $res has some (old) problems.\n";
|
|
} else {
|
|
lprint "no errors/warnings are reported.\n";
|
|
}
|
|
}
|
|
|
|
sub version {
|
|
lprint "$0 Version: $marsadm_version $Id\n";
|
|
#lprint "my IP is $ip\n";
|
|
exit 0;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# pretty printing
|
|
|
|
sub seconds2human {
|
|
my $seconds = shift;
|
|
return "--:--:--" unless (defined($seconds) && $seconds ne "" && $seconds >= 0);
|
|
return sprintf("%02d:%02d:%02d", $seconds / 3600, ($seconds % 3600) / 60, $seconds % 60);
|
|
}
|
|
|
|
sub number2human {
|
|
my ($number, $unit, $max) = @_;
|
|
$max = $number if !defined($max);
|
|
my $k = 1024.0;
|
|
my $flip_over = 9.99;
|
|
if (!defined($unit) || !$unit) {
|
|
if ($max >= $k * $k * $k * $k * $flip_over) {
|
|
$unit = "T";
|
|
} elsif ($max >= $k * $k * $k * $flip_over) {
|
|
$unit = "G";
|
|
} elsif ($max >= $k * $k) {
|
|
$unit = "M";
|
|
} elsif ($max >= $k) {
|
|
$unit = "K";
|
|
} else {
|
|
$unit = "";
|
|
}
|
|
}
|
|
my $i = "i";
|
|
if ($unit =~ m/^[a-z]/) {
|
|
$i = "";
|
|
$k = 1000.0;
|
|
}
|
|
$_ = $unit;
|
|
SWITCH: {
|
|
if (/t/i) {
|
|
$number /= $k * $k * $k * $k;
|
|
$unit = "T${i}B";
|
|
last SWITCH;
|
|
}
|
|
if (/g/i) {
|
|
$number /= $k * $k * $k;
|
|
$unit = "G${i}B";
|
|
last SWITCH;
|
|
}
|
|
if (/m/i) {
|
|
$number /= $k * $k;
|
|
$unit = "M${i}B";
|
|
last SWITCH;
|
|
}
|
|
if (/k/i) {
|
|
$number /= $k;
|
|
$unit = "K${i}B";
|
|
last SWITCH;
|
|
}
|
|
$unit = "B";
|
|
}
|
|
if ($unit eq "B") {
|
|
$number = int($number + 0.1);
|
|
} else {
|
|
$number = sprintf("%.3f", $number);
|
|
}
|
|
return "$number $unit";
|
|
}
|
|
|
|
sub progress_bar {
|
|
my ($length, $min, $mid, $max) = @_;
|
|
$min = 0 if $min < 0;
|
|
$mid = $min if $mid < $min;
|
|
$max = $mid if $max < $mid;
|
|
$max = 1 if $max < 1;
|
|
my $pos1 = $length * $min / $max;
|
|
my $count = $pos1 - 1;
|
|
$count = 0 if $count < 0;
|
|
my $bar = '=' x $count;
|
|
if ($pos1 < $length) {
|
|
$bar .= ">";
|
|
$pos1++;
|
|
}
|
|
my $pos2 = $length * $mid / $max;
|
|
if ($pos1 < $pos2) {
|
|
$bar .= ':' x ($pos2 - $pos1);
|
|
}
|
|
if ($pos2 < $length) {
|
|
$bar .= '.' x ($length - $pos2);
|
|
}
|
|
return "[$bar]";
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# macro evaluation
|
|
|
|
sub make_numeric {
|
|
my $number = shift;
|
|
return 0 if (!defined($number) || $number eq "");
|
|
return $number;
|
|
}
|
|
|
|
sub set_args {
|
|
my $outer_env = shift;
|
|
my $inner_env = shift;
|
|
$$inner_env{"callstack"} .= "," if $$inner_env{"callstack"};
|
|
$$inner_env{"callstack"} .= ${_[0]};
|
|
my $index = 0;
|
|
while (defined(my $next = shift)) {
|
|
my $arg = parse_macro($next, $outer_env);
|
|
$$inner_env{$index++} = $arg;
|
|
}
|
|
# clear all other number variables to avoid confusion
|
|
while (defined($$inner_env{$index})) {
|
|
undef $$inner_env{$index++};
|
|
}
|
|
}
|
|
|
|
# evaluate a primitive macro
|
|
sub eval_fn {
|
|
my $env = shift;
|
|
my $fn = shift;
|
|
# optionally allow primitive macros without prefix primitive- to be substituted
|
|
if ($fn !~ s/^primitive[-_]?//) {
|
|
my $macro = get_macro($fn, 1);
|
|
if ($macro) {
|
|
set_args($env, $env, $fn, @_);
|
|
return parse_macro($macro, $env);
|
|
}
|
|
}
|
|
my $arg1 = shift;
|
|
$_ = $fn;
|
|
SWITCH: {
|
|
if (/^$/) { # variable
|
|
my $varname;
|
|
# prefix *crement operators
|
|
if ($arg1 =~ m/^([-+]{2})$/) {
|
|
my $op = $arg1;
|
|
$varname = parse_macro(shift, $env);
|
|
if ($op =~ m/^\+/) {
|
|
$$env{$varname}++;
|
|
} else {
|
|
$$env{$varname}--;
|
|
}
|
|
} else {
|
|
$varname = parse_macro($arg1, $env);
|
|
}
|
|
my $result = "";
|
|
if (defined($$env{$varname})) {
|
|
$result = $$env{$varname};
|
|
}
|
|
# postfix *crement operators
|
|
if (defined(${_[0]}) && ${_[0]} =~ m/^([-+]{2})$/) {
|
|
my $op = shift;
|
|
if ($op =~ m/^\+/) {
|
|
$$env{$varname}++;
|
|
} else {
|
|
$$env{$varname}--;
|
|
}
|
|
}
|
|
# provisionary light-weight arrays based on CSV format
|
|
if (defined(my $delim = shift) && defined(my $index = shift)) {
|
|
$delim = parse_macro($delim, $env);
|
|
$index = parse_macro($index, $env);
|
|
my @list = split($delim, $result);
|
|
# get last element when denoted
|
|
$index = scalar(@list) - 1 if ($index eq "" || $index eq "*");
|
|
$result = $list[$index];
|
|
}
|
|
return $result;
|
|
}
|
|
if (/^let$/) { # assignment
|
|
my $varname = parse_macro($arg1, $env);
|
|
my $arg2 = shift;
|
|
my $value = parse_macro($arg2, $env);
|
|
$$env{$varname} = $value;
|
|
return "";
|
|
}
|
|
if (/^append$/) { # .=
|
|
my $varname = parse_macro($arg1, $env);
|
|
my $arg2 = shift;
|
|
my $value = parse_macro($arg2, $env);
|
|
$$env{$varname} .= $value;
|
|
return "";
|
|
}
|
|
if (/^set$/) { # provisionary light-weight arrays based on CSV format
|
|
my $varname = parse_macro($arg1, $env);
|
|
my $delim = shift;
|
|
$delim = parse_macro($delim, $env);
|
|
my $index = shift;
|
|
$index = parse_macro($index, $env);
|
|
my @list = split($delim, $$env{$varname});
|
|
# append to list when denoted
|
|
$index = scalar(@list) if ($index eq "" || $index eq "*");
|
|
my $value = shift;
|
|
$value = parse_macro($value, $env);
|
|
$list[$index] = $value;
|
|
$$env{$varname} = join($delim, @list);
|
|
return "";
|
|
}
|
|
if (/^dump[-_]?vars$/) { # write to stderr
|
|
foreach my $key (sort alphanum_cmp keys(%$env)) {
|
|
next if $key =~ m/^__.*__$/;
|
|
my $val = $$env{$key};
|
|
lprint_stderr "$key='$val'\n";
|
|
}
|
|
return "";
|
|
}
|
|
if (/^([-+*\/%&|^]|>>|<<|min|max)$/) { # arithmetic / associative operators
|
|
my $op = $1;
|
|
my $number = make_numeric(parse_macro($arg1, $env));
|
|
while (defined(my $next = shift)) {
|
|
my $operand = make_numeric(parse_macro($next, $env));
|
|
$_ = $op;
|
|
if (/^\+$/) { $number += $operand; next; }
|
|
if (/^-$/) { $number -= $operand; next; }
|
|
if (/^\*$/) { $number *= $operand; next; }
|
|
if (/^\/$/) { $number /= $operand; next; }
|
|
if (/^%$/) { $number %= $operand; next; }
|
|
if (/^&$/) { $number &= $operand; next; }
|
|
if (/^\|$/) { $number |= $operand; next; }
|
|
if (/^\^$/) { $number ^= $operand; next; }
|
|
if (/^<<$/) { $number <<= $operand; next; }
|
|
if (/^>>$/) { $number >>= $operand; next; }
|
|
if (/^min$/) { $number = $operand if $number < $operand; next; }
|
|
if (/^max$/) { $number = $operand if $number > $operand; next; }
|
|
ldie "bad arithmetic operator '$op'";
|
|
}
|
|
return $number;
|
|
}
|
|
if (/^([<>]=?|[!=]=)$/) { # numeric comparisons
|
|
my $op = $1;
|
|
my $n1 = make_numeric(parse_macro($arg1, $env));
|
|
my $arg2 = shift;
|
|
my $n2 = make_numeric(parse_macro($arg2, $env));
|
|
$_ = $op;
|
|
if (/^<$/) { return $n1 < $n2; }
|
|
if (/^>$/) { return $n1 > $n2; }
|
|
if (/^<=$/) { return $n1 <= $n2; }
|
|
if (/^>=$/) { return $n1 >= $n2; }
|
|
if (/^==$/) { return $n1 == $n2; }
|
|
if (/^!=$/) { return $n1 != $n2; }
|
|
ldie "bad comparison operator '$op'";
|
|
}
|
|
if (/^(lt|gt|le|ge|eq|ne|match|=~)$/) { # binary string operators
|
|
my $op = $1;
|
|
$op = "=~" if $op eq "match";
|
|
my $n1 = parse_macro($arg1, $env);
|
|
my $arg2 = shift;
|
|
my $n2 = parse_macro($arg2, $env);
|
|
$_ = $op;
|
|
if (/^lt$/) { return $n1 lt $n2; }
|
|
if (/^gt$/) { return $n1 gt $n2; }
|
|
if (/^le$/) { return $n1 le $n2; }
|
|
if (/^ge$/) { return $n1 ge $n2; }
|
|
if (/^eq$/) { return $n1 eq $n2; }
|
|
if (/^ne$/) { return $n1 ne $n2; }
|
|
if (/^=~$/) {
|
|
my $opts = "m";
|
|
my $arg3 = shift;
|
|
$opts = parse_macro($arg3, $env) if defined($arg3);
|
|
# unfortunately standard regex operators don't seem to accept variable options, so we use eval()
|
|
my $result = eval("\$n1 =~ m{$n2}$opts");
|
|
return "" unless defined($result);
|
|
return $result;
|
|
}
|
|
ldie "bad binary operator '$op'";
|
|
}
|
|
if (/^(&&|\|\||and|or)$/) { # shortcut operators
|
|
my $op = $1;
|
|
$op = "&&" if $op eq "and";
|
|
$op = "||" if $op eq "or";
|
|
my $number = parse_macro($arg1, $env);
|
|
while (defined(my $next = shift)) {
|
|
$_ = $op;
|
|
if (/^&&$/) { return 0 if !$number; }
|
|
if (/^\|\|$/) { return 1 if $number; }
|
|
my $operand = parse_macro($next, $env);
|
|
$_ = $op;
|
|
if (/^&&$/) { $number &= $operand; next; }
|
|
if (/^\|\|$/) { $number |= $operand; next; }
|
|
ldie "bad shortcut operator '$op'";
|
|
}
|
|
return $number;
|
|
}
|
|
if (/^([~!]|not)$/) { # unary operators
|
|
my $op = $1;
|
|
$op = "!" if $op eq "not";
|
|
my $number = parse_macro($arg1, $env);
|
|
$_ = $op;
|
|
if (/^~$/) { return ~$number; }
|
|
if (/^!$/) { return !$number; }
|
|
ldie "bad unary operator '$op'";
|
|
}
|
|
# string functions
|
|
if (/^toupper$/) {
|
|
my $txt = parse_macro($arg1, $env);
|
|
return uc($txt);
|
|
}
|
|
if (/^tolower$/) {
|
|
my $txt = parse_macro($arg1, $env);
|
|
return lc($txt);
|
|
}
|
|
if (/^length$/) { # string length
|
|
my $txt = parse_macro($arg1, $env);
|
|
return length($txt);
|
|
}
|
|
if (/^subst$/) { # regex substitution operator
|
|
my $txt = parse_macro($arg1, $env);
|
|
my $arg2 = shift;
|
|
my $regex = parse_macro($arg2, $env);
|
|
my $arg3 = shift;
|
|
my $subst = parse_macro($arg3, $env);
|
|
my $opts = "m";
|
|
my $arg4 = shift;
|
|
$opts = parse_macro($arg4, $env) if defined($arg4);
|
|
# unfortunately standard regex operators don't seem to accept variable options, so we use eval()
|
|
eval("\$txt =~ s{$regex}{$subst}$opts");
|
|
return $txt;
|
|
}
|
|
if (/s?printf$/) { # sprintf()
|
|
my $fmt = $arg1; # exception: don't evaluate the format string!
|
|
my @list = ();
|
|
while (defined(my $next = shift)) {
|
|
my $operand = parse_macro($next, $env);
|
|
push @list, $operand;
|
|
}
|
|
return sprintf($fmt, @list);
|
|
}
|
|
if (/^human[-_]?numbers?$/) { # convert numbers to readable format
|
|
my $name = $_;
|
|
my $unit = parse_macro($arg1, $env);
|
|
my $arg2 = shift;
|
|
my $delim_numbers = parse_macro($arg2, $env);
|
|
$delim_numbers = "/" if $delim_numbers eq "";
|
|
my $arg3 = shift;
|
|
my $delim_unit = parse_macro($arg3, $env);
|
|
$delim_unit = " " if $delim_unit eq "";
|
|
my $max = 0;
|
|
my @list = ();
|
|
while (defined(my $next = shift)) {
|
|
my $number = make_numeric(parse_macro($next, $env));
|
|
push @list, $number;
|
|
$max = $number if $number > $max;
|
|
}
|
|
lwarn "macro $name: no number arguments given\n" unless @list;
|
|
my $conv_unit = "";
|
|
my @results = ();
|
|
foreach my $number (@list) {
|
|
my $conv_number = number2human($number, $unit, $max);
|
|
$conv_number =~ s/ *([a-z].*)//i;
|
|
$conv_unit = $1;
|
|
push @results, $conv_number;
|
|
}
|
|
return join($delim_numbers, @results) . "$delim_unit$conv_unit";
|
|
}
|
|
if (/^human[-_]?seconds$/) { # convert numbers to readable format
|
|
# don't use make_numeric() here in order to allow the result '--:--:--'
|
|
my $number = parse_macro($arg1, $env);
|
|
return seconds2human($number);
|
|
}
|
|
if (/^progress$/) { # progress bar
|
|
my $length = make_numeric(parse_macro($arg1, $env));
|
|
$length = 20 unless ($length && $length > 0);
|
|
my $arg2 = shift;
|
|
if (!defined($arg2)) { # use default values
|
|
my @vector = split(':', eval_fn($env, "summary-vector", ":"));
|
|
return progress_bar($length, @vector);
|
|
}
|
|
my $min = make_numeric(parse_macro($arg2, $env));
|
|
my $arg3 = shift;
|
|
my $mid = make_numeric(parse_macro($arg3, $env));
|
|
my $arg4 = shift;
|
|
my $max = make_numeric(parse_macro($arg4, $env));
|
|
return progress_bar($length, $min, $mid, $max);
|
|
}
|
|
if (/^errno[-_]?text$/) {
|
|
my $code = parse_macro($arg1, $env);
|
|
return "" unless defined($code) && $code != 0;
|
|
$code = -$code if $code < 0;
|
|
return __conv_errno("", $code);
|
|
}
|
|
if (/^get[-_]?log[-_]?status/) {
|
|
return get_error_text($$env{"cmd"}, $$env{"res"});
|
|
}
|
|
if (/^get[-_]?resource[-_]?(fat|err|wrn)([-_]?count)?/) {
|
|
my $what = $1;
|
|
my $do_count = $2;
|
|
my %assoc = ("fat" => 4, "err" => 3, "wrn" => 2);
|
|
my $glob = $$env{"resdir"} . "/$assoc{$what}.*.status";
|
|
return _get_text($glob, undef, 0, $do_count);
|
|
}
|
|
if (/^warn/) {
|
|
my $txt = parse_macro($arg1, $env);
|
|
lwarn $txt;
|
|
return "";
|
|
}
|
|
if (/^die$/) {
|
|
my $txt = parse_macro($arg1, $env);
|
|
ldie $txt;
|
|
return "";
|
|
}
|
|
if (/^is-module-loaded$/) {
|
|
my $path = "/proc/sys/mars";
|
|
my $result = -d $path;
|
|
return defined($result) && $result;
|
|
}
|
|
if (/^is-(member|guest)$/) {
|
|
my $type = $1;
|
|
$arg1 = parse_macro($arg1, $env);
|
|
$arg1 = $$env{"res"} unless $arg1;
|
|
my $arg2 = shift;
|
|
$arg2 = parse_macro($arg2, $env);
|
|
$arg2 = $$env{"host"} unless $arg2;
|
|
my $result;
|
|
if ($type eq "guest") {
|
|
$result = is_guest($arg1, $arg2);
|
|
} else {
|
|
$result = is_member($arg1, $arg2);
|
|
}
|
|
return $result ? 1 : 0;
|
|
}
|
|
# list objects
|
|
if (/^(count[-_]?)?(cluster|resource|guest)[-_]?members$/) {
|
|
my $old = $_;
|
|
$_ =~ s/members/peers/;
|
|
lwarn "deprecated: please use macro '$_' instead of '$old'\n";
|
|
}
|
|
if (/^(count[-_]?)?(cluster|resource|guest)[-_]?peers$/) {
|
|
my $do_count = $1;
|
|
my $type = $2;
|
|
my @peers;
|
|
if ($type eq "cluster") {
|
|
@peers = get_total_peers();
|
|
} elsif ($type eq "guest") {
|
|
@peers = get_guest_peers($$env{"res"});
|
|
} else {
|
|
@peers = get_member_peers($$env{"res"});
|
|
}
|
|
return scalar(@peers) if defined($do_count);
|
|
my $list = "";
|
|
foreach my $peer (@peers) {
|
|
$list .= "$peer\n";
|
|
}
|
|
return $list;
|
|
}
|
|
if (/^(count[-_]?)?(my|all)[-_]?(resources|members|guests)$/) {
|
|
my $do_count = $1;
|
|
my $what = $2;
|
|
my $type = $3;
|
|
my $peer = "";
|
|
if ($what eq "my") {
|
|
$peer = parse_macro($arg1, $env);
|
|
$peer = $$env{"host"} unless $peer;
|
|
}
|
|
my @list;
|
|
if ($type eq "guests") {
|
|
@list = get_guest_resources($peer);
|
|
} elsif ($type eq "members") {
|
|
@list = get_member_resources($peer);
|
|
} else {
|
|
@list = get_total_resources($peer);
|
|
}
|
|
return scalar(@list) if defined($do_count);
|
|
my $list = "";
|
|
foreach my $item (@list) {
|
|
$list .= "$item\n";
|
|
}
|
|
return $list;
|
|
}
|
|
# low-level symlink access
|
|
if (/^readlink$/) {
|
|
my $path = parse_macro($arg1, $env);
|
|
return get_link($path, 1);
|
|
}
|
|
if (/^setlink$/) {
|
|
my $src = parse_macro($arg1, $env);
|
|
my $arg2 = shift;
|
|
my $dst = parse_macro($arg2, $env);
|
|
set_link($src, $dst);
|
|
return "";
|
|
}
|
|
# high-level state access
|
|
if (/^(get|todo|actual)[-_]?primary$/) {
|
|
my $op = $1;
|
|
my $primary;
|
|
if ($op eq "actual") {
|
|
lwarn "DEPRECATED: you are trying to uniquely identify an actual primary hostname, but this is conceptually wrong because in split-brain situations there may exist multiple ones. Use view-is-primary instead. That would be safe.\n" unless $$env{cmd} =~ m/-1and1$/;
|
|
$primary = _get_actual_primary($$env{"res"});
|
|
} else {
|
|
$primary = _get_designated_primary($$env{"res"});
|
|
}
|
|
$primary = "" if (!defined($primary) || $primary eq "(none)");
|
|
$primary = ($primary eq $$env{"host"}) if $op eq "todo";
|
|
return $primary;
|
|
}
|
|
if (/^todo[-_]?secondary$/) {
|
|
my $val = eval_fn($env, "get-primary", $arg1);
|
|
return $val eq "(none)" ? 1 : 0;
|
|
}
|
|
if (/^todo[-_]?(attach|sync|fetch|replay)?$/) {
|
|
my $what = $1;
|
|
$what = parse_macro($arg1, $env) unless defined($what);
|
|
my $lnk = $$env{"resdir"} . "/todo-" . $$env{"host"} . "/$what";
|
|
$lnk = correct_path($lnk);
|
|
return get_link($lnk, 1);
|
|
}
|
|
if (/^get[-_]?msg$/) {
|
|
my $what = parse_macro($arg1, $env);
|
|
my $lnk = $$env{"resdir"} . "/actual-" . $$env{"host"} . "/msg-$what";
|
|
return get_link($lnk, 1);
|
|
}
|
|
if (/^(all|the)[-_](pretty[-_]?)?(global[-_]?)?((?:err|wrn|inf)[-_])?(msg|count)$/) {
|
|
my $shorten = ($1 eq "the");
|
|
my $pretty = $2;
|
|
my $global = $3;
|
|
my $specific = $4;
|
|
my $type = $5;
|
|
$specific = "" unless defined($specific);
|
|
my $glob = "$mars";
|
|
$glob = $$env{"resdir"} if (!defined($global) && $$env{"res"});
|
|
$glob .= "/actual-" . $$env{"host"} . "/msg-$specific*";
|
|
my $result = "";
|
|
my $count = 0;
|
|
foreach my $msg_path (lamport_glob($glob)) {
|
|
my $val = get_link($msg_path, 1);
|
|
if ($shorten) {
|
|
# skip uninteresting messages
|
|
next if $val =~ m/^OK/;
|
|
# skip _transient_ error messages
|
|
if ($msg_path =~ m/-err-/ && $val =~ m/^([0-9]+\.[0-9]{9})/) {
|
|
my $stamp = $1;
|
|
if ($stamp) {
|
|
my $delta = $timeout > 0 ? $timeout : 30;
|
|
next if $stamp + $delta > time();
|
|
}
|
|
}
|
|
}
|
|
$val = _replace_timestamps($val, $shorten) if defined($pretty);
|
|
my $key = $msg_path;
|
|
$key =~ s:^.*/msg-::;
|
|
$result .= "$key: $val\n";
|
|
$count++;
|
|
}
|
|
return $count if $type eq "count";
|
|
return $result;
|
|
}
|
|
if (/^potential[-_]?features$/) {
|
|
my $all_flags = ~0x0;
|
|
my $result = featuresflags2txt($all_flags, 1);
|
|
return $result;
|
|
}
|
|
if (/^(implemented|usable)[-_]?(features|compressions|digests)$/) {
|
|
my $op = $1;
|
|
my $restrict = $2;
|
|
$op = "features" if $op eq "implemented";
|
|
my $str = get_alive_link($op, $$env{"host"}, 2);
|
|
my $flags;
|
|
if ($str =~ m/,(0x[0-9a-f]*)/) {
|
|
$flags = $1;
|
|
}
|
|
$flags = 0x0 unless (defined($flags) && $flags ne "");
|
|
$flags = hex($flags) if $flags =~ m/^0x/;
|
|
if ($restrict =~ m/compressions/) {
|
|
$flags &= $compress_features;
|
|
} elsif ($restrict =~ m/digests/) {
|
|
$flags &= $chksum_features;
|
|
}
|
|
my $result = featuresflags2txt($flags, 1);
|
|
return $result;
|
|
}
|
|
if (/^(enabled-(log|net)-compressions|disabled-(log|net)-digests)$/) {
|
|
my $op = $1;
|
|
my $path = "$mars/defaults/$op";
|
|
my $flags = get_link($path, 2);
|
|
$flags = 0x0 unless (defined($flags) && $flags ne "");
|
|
$flags = hex($flags) if $flags =~ m/^0x/;
|
|
#NYI
|
|
#$path = "$mars/resource-" . $$env{"res"} . "/defaults/$op";
|
|
#my $res_flags = get_link($path, 2);
|
|
#if (defined($res_flags) && $res_flags ne "") {
|
|
#$res_flags = hex($res_flags) if $res_flags =~ m/^0x/;
|
|
#if ($op =~ m/enabled/) {
|
|
#$flags |= $res_flags;
|
|
#} else {
|
|
#$flags = $res_flags;
|
|
#}
|
|
#}
|
|
if ($op =~ m/compressions/) {
|
|
$flags &= $compress_features;
|
|
} elsif ($op =~ m/digests/) {
|
|
$flags &= $chksum_features;
|
|
}
|
|
my $unusable = $flags & ~$kernel_flags_version;
|
|
if ($unusable > 0) {
|
|
my $txt = featuresflags2txt($unusable);
|
|
lwarn "features '$txt' are not compiled into the kernel module\n";
|
|
}
|
|
my $result = featuresflags2txt($flags);
|
|
return $result;
|
|
}
|
|
if (/^used-((log|net)-(compression|digest))$/) {
|
|
my $op = $1;
|
|
my $flags = get_alive_link("used-$op", $$env{"host"}, 2);
|
|
$flags = 0x0 unless (defined($flags) && $flags ne "");
|
|
$flags = hex($flags) if $flags =~ m/^0x/;
|
|
if ($op =~ m/compression/) {
|
|
$flags &= $compress_features;
|
|
} elsif ($op =~ m/digest/) {
|
|
$flags &= $chksum_features;
|
|
}
|
|
my $result = featuresflags2txt($flags);
|
|
return $result;
|
|
}
|
|
if (/^(tree|features|)[-_]?version$/) {
|
|
my $op = $1;
|
|
my $result = get_alive_link($op, $$env{"host"}, 2);
|
|
if ($result =~ m/^([^,]*,[^,]*,)(0x[0-9a-z]+),(0x[0-9a-z]+)/p) {
|
|
my $res1 = $1;
|
|
my $res2 = $2;
|
|
my $res3 = $3;
|
|
my $rest = $POSTMATCH;
|
|
$result = $res1 .
|
|
featuresflags2txt($res2) .
|
|
"," .
|
|
featuresflags2txt($res3) .
|
|
$rest;
|
|
}
|
|
return $result;
|
|
}
|
|
if (/^is[-_]?alive$/) {
|
|
my $peer = parse_macro($arg1, $env);
|
|
$peer = _get_designated_primary($$env{"res"}) unless $peer;
|
|
$peer = $$env{"host"} unless $peer;
|
|
my $stamp = get_alive_link("time", $peer, 1);
|
|
return is_recent($stamp, $$env{"window"});
|
|
}
|
|
if (/^alive[-_]?timestamp$/) {
|
|
my $peer = parse_macro($arg1, $env);
|
|
$peer = _get_designated_primary($$env{"res"}) unless $peer;
|
|
return -1 if !$peer || $peer eq "(none)";
|
|
my $result = get_alive_link("time", $peer, 2);
|
|
return $result if $result;
|
|
return get_alive_stamp("alive", $peer);
|
|
}
|
|
if (/^is[-_]?orphan$/) {
|
|
my $peer = parse_macro($arg1, $env);
|
|
$peer = $$env{"host"} unless $peer;
|
|
return 0 if eval_fn($env, "is-primary", $peer);
|
|
my $replay = get_link($$env{"resdir"} . "/replay-$peer", 1);
|
|
$replay =~ m/^(log-[^,]+),([0-9]*)/;
|
|
my $logfile = $$env{"resdir"} . "/" . $1;
|
|
my $logpos = $2;
|
|
if (! -r $logfile) {
|
|
return 1;
|
|
}
|
|
my @stat = stat($logfile);
|
|
if (!@stat) {
|
|
return 1;
|
|
}
|
|
my $size= $stat[7];
|
|
if ($size < $logpos) {
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
if (/^is[-_]?(almost[-_]?)?consistent$/) {
|
|
my $almost = $1;
|
|
# has sync finished?
|
|
my $syncrest = make_numeric(eval_fn($env, "sync-rest", ""));
|
|
return 0 if $syncrest > 0;
|
|
if (!$almost && eval_fn($env, "is-primary", "")) {
|
|
# is the replay link indicating that something is not yet applied / dirty?
|
|
my $replay = get_link($$env{"resdir"} . "/replay-" . $$env{"host"}, 1);
|
|
$replay =~ m:,[0-9]+,([0-9]+):;
|
|
my $rest = $1;
|
|
return 0 if $rest > 0;
|
|
}
|
|
# are all logfiles applied which had accumulated during sync?
|
|
my $replay = get_link($$env{"resdir"} . "/replay-" . $$env{"host"}, 1);
|
|
return 0 unless $replay;
|
|
my $syncpos = get_link($$env{"resdir"} . "/syncpos-" . $$env{"host"}, 2);
|
|
if (defined($syncpos) && $syncpos) {
|
|
my $cmp = compare_replaylinks($syncpos, $replay);
|
|
return 0 if $cmp > 0;
|
|
}
|
|
return 1;
|
|
}
|
|
if (/^get[-_]?disk$/) {
|
|
my $lnk = $$env{"resdir"} . "/data-" . $$env{"host"};
|
|
my $result = get_link($lnk, 1);
|
|
$result = "" unless defined($result);
|
|
return $result;
|
|
}
|
|
if (/^get[-_]?device$/) {
|
|
my $result = device_name($$env{"res"}, $$env{"host"});
|
|
return $result;
|
|
}
|
|
if (/^disk[-_]?error$/) {
|
|
my $lnk = $$env{"resdir"} . "/actual-" . $$env{"host"} . "/disk-error";
|
|
my $result = get_link($lnk, 2);
|
|
return $result;
|
|
}
|
|
if (/^disk[-_]?present$/) {
|
|
my $lnk = $$env{"resdir"} . "/data-" . $$env{"host"};
|
|
my $result = get_link($lnk, 1);
|
|
$result = "" unless defined($result);
|
|
if ($result) {
|
|
# TODO: make this remotely retrievable via status link
|
|
$result = -b $result;
|
|
$result = "0" unless defined($result);
|
|
}
|
|
return $result;
|
|
}
|
|
if (/^device[-_]?present$/) {
|
|
my $result = device_exists($$env{"res"}, $$env{"host"});
|
|
return $result;
|
|
}
|
|
# deprecated (irregular names)
|
|
if (/^present[-_]?(disk|device)$/) {
|
|
my $what = $1;
|
|
return eval_fn($env, "$what-present", $arg1);
|
|
}
|
|
if (/^(device)[-_]?(opened|nrflying|error|completion-stamp|completion-age)$/) {
|
|
my $what = $1;
|
|
my $op = $2;
|
|
my $peer = $$env{"host"};
|
|
my %transl =
|
|
(
|
|
"opened" => "open-count",
|
|
"nrflying" => "if-flying",
|
|
"error" => "if-state",
|
|
"completion-stamp" => "if-completion-stamp",
|
|
"completion-age" => "if-completion-stamp",
|
|
);
|
|
if ($what eq "device") {
|
|
my $other = eval_fn($env, "get-device", $arg1);
|
|
if ($other =~ m/\@(.*)/) {
|
|
$peer = $1;
|
|
}
|
|
}
|
|
my $lnk = $$env{"resdir"} . "/actual-$peer/" . $transl{$op};
|
|
my $result = get_link($lnk, 2);
|
|
return 0 unless $result;
|
|
if ($op =~ m/-age/) {
|
|
$result = mars_time() - $result;
|
|
$result = 0 if $result < 0;
|
|
}
|
|
return $result;
|
|
}
|
|
if (/^is[-_]?split([-_]?brain)?$/) {
|
|
my $split = detect_splitbrain($$env{"res"}, 0);
|
|
return $split ? 0 : 1;
|
|
}
|
|
if (/^is[-_]?(attach|sync|fetch|replay|primary|secondary|emergency)$/) {
|
|
my $what = $1;
|
|
my $is = "is";
|
|
$is = "has" if $what eq "emergency";
|
|
my $peer = parse_macro($arg1, $env);
|
|
$peer = $$env{"host"} unless $peer;
|
|
my $lnk = $$env{"resdir"} . "/actual-$peer/$is-$what";
|
|
$lnk = correct_path($lnk);
|
|
my $val = get_link($lnk, 1);
|
|
$val = $val ? 0 : 1 if $what eq "secondary";
|
|
return $val;
|
|
}
|
|
if (/^nr[-_]?(attach|sync|fetch|replay|primary|secondary)$/) {
|
|
my $what = $1;
|
|
my $is = "is";
|
|
$is = "has" if $what eq "emergency";
|
|
my $nr = 0;
|
|
foreach my $peer (get_member_peers($$env{"res"})) {
|
|
my $lnk = $$env{"resdir"} . "/actual-$peer/$is-$what";
|
|
$lnk = correct_path($lnk);
|
|
my $val = get_link($lnk, 1);
|
|
$val = !$val if $what eq "secondary";
|
|
$nr++ if $val;
|
|
}
|
|
return $nr;
|
|
}
|
|
if (/^does$/) {
|
|
my $what = parse_macro($arg1, $env);
|
|
my $is = "is";
|
|
$is = "has" if $what eq "emergency";
|
|
my $lnk = $$env{"resdir"} . "/actual-" . $$env{"host"} . "/$is-$what";
|
|
$lnk = correct_path($lnk);
|
|
return get_link($lnk, 1);
|
|
}
|
|
if (/^(tree|rest-space)$/) {
|
|
my $what = $1;
|
|
return get_alive_link($what, $$env{"host"}, 1)
|
|
}
|
|
if (/^systemd[-_]?unit$/) {
|
|
return _get_systemd_unit($$env{"cmd"}, $$env{"res"});
|
|
}
|
|
if (/^(uuid)$/) {
|
|
my $what = $1;
|
|
my $lnk = "$mars/$what";
|
|
$lnk = correct_path($lnk);
|
|
return get_link($lnk, 1);
|
|
}
|
|
if (/^replay[-_]?code$/) {
|
|
my $lnk = $$env{"resdir"} . "/actual-" . $$env{"host"} . "/replay-code";
|
|
return get_link($lnk, 2);
|
|
}
|
|
if (/^(device|sync|fetch|replay|work)[-_]?(rate|ops[-_]?rate|amount[-_]?rate|remain)$/) {
|
|
my $what = $1;
|
|
my $select = $2;
|
|
if ($what eq "work") {
|
|
my $val1 = eval_fn($env, "fetch-$select", "");
|
|
my $val2 = eval_fn($env, "replay-$select", "");
|
|
return "" if (!defined($val1) || $val1 eq "");
|
|
return "" if (!defined($val2) || $val2 eq "");
|
|
$val1 = make_numeric($val1);
|
|
$val2 = make_numeric($val2);
|
|
return $val1 + $val2 if $select eq "remain";
|
|
# take the maximum rate
|
|
return $val1 if $val1 > $val2;
|
|
return $val2;
|
|
}
|
|
my $name = $select;
|
|
$name = "amount_rate" if ($name eq "rate" || $name eq "remain");
|
|
$name =~ s/-/_/;
|
|
my %names =
|
|
(
|
|
"device" => "if",
|
|
"sync" => "sync",
|
|
"fetch" => "file",
|
|
"replay" => "replay",
|
|
);
|
|
$name =~ s/_/-$names{$what}_/;
|
|
my $lnk = $$env{"resdir"} . "/actual-" . $$env{"host"} . "/$name";
|
|
my $rate = get_link($lnk, 2);
|
|
# deprecated: compatibility with old version
|
|
if (!defined($rate) || $rate eq "") {
|
|
$lnk =~ s:/amount-:/:;
|
|
$rate = get_link($lnk, 2);
|
|
}
|
|
if ($select eq "remain") {
|
|
my $rest = make_numeric(eval_fn($env, "$what-rest", ""));
|
|
return 0 if $rest <= 0;
|
|
return -1 if (!defined($rate) || $rate eq "" || $rate <= 0);
|
|
return $rest / 1024 / $rate;
|
|
}
|
|
if ($select =~ /rate/) {
|
|
return 0 if (!defined($rate) || !$rate || $rate <= 0);
|
|
return $rate if $select =~ /^ops-/;
|
|
return $rate * 1024;
|
|
}
|
|
ldie "unknown macro $_\n";
|
|
}
|
|
if (/^sync[-_]?size$/) {
|
|
my $lnk = $$env{"resdir"} . "/size";
|
|
return get_link($lnk, 1);
|
|
}
|
|
if (/^sync[-_]?pos$/) {
|
|
my $lnk = $$env{"resdir"} . "/syncstatus-" . $$env{"host"};
|
|
return get_link($lnk, 1);
|
|
}
|
|
if (/^(replay)[-_]?(lognr|basenr)$/) {
|
|
my $op = $2;
|
|
my ($sum, $firstpos, $logpos, $level) = get_amount($$env{"resdir"}, $$env{"host"}, -1, 1);
|
|
return $firstpos if $op eq "lognr";
|
|
return $logpos;
|
|
}
|
|
if (/^(fetch)[-_]?(lognr)$/) {
|
|
my ($sum, $firstpos, $logpos, $level) = get_amount($$env{"resdir"}, $$env{"host"}, 1, 1);
|
|
return $logpos;
|
|
}
|
|
if (/^(work)[-_]?(lognr)$/) {
|
|
my ($sum, $firstpos, $logpos, $level) = get_amount($$env{"resdir"}, $$env{"host"}, 1, 0);
|
|
return $logpos;
|
|
}
|
|
if (/^replay[-_]?logcount$/) {
|
|
return eval_fn($env, "replay-lognr", "") - eval_fn($env, "replay-basenr", "");
|
|
}
|
|
if (/^(fetch|work)[-_]?logcount$/) {
|
|
my $what = $1;
|
|
return eval_fn($env, "$what-lognr", "") - eval_fn($env, "replay-lognr", "");
|
|
}
|
|
if (/^writeback[-_]?rest$/) {
|
|
my ($sum, $firstpos, $logpos, $level) = get_amount($$env{"resdir"}, $$env{"host"}, 1, 0);
|
|
return $sum;
|
|
}
|
|
if (/^(replay|work)[-_]?(pos)$/) {
|
|
my $what = $1;
|
|
my $op = $2;
|
|
my ($sum, $firstpos, $logpos, $level) = get_amount($$env{"resdir"}, $$env{"host"}, -1, 0);
|
|
return $sum;
|
|
}
|
|
if (/^(replay[-_]?size)|(fetch[-_]?pos)$/) {
|
|
my ($sum0) = get_amount($$env{"resdir"}, $$env{"host"}, -1, 0);
|
|
my ($sum, $firstpos, $logpos, $level) = get_amount($$env{"resdir"}, $$env{"host"}, 1, 1);
|
|
return $sum0 + $sum;
|
|
}
|
|
if (/^(fetch|work)[-_]?(size)$/) {
|
|
my $what = $1;
|
|
my ($sum0) = get_amount($$env{"resdir"}, $$env{"host"}, -1, 0);
|
|
my ($sum, $firstpos, $logpos, $level) = get_amount($$env{"resdir"}, $$env{"host"}, 1, 0);
|
|
return $sum0 + $sum;
|
|
}
|
|
if (/^(sync|fetch|replay|work)[-_]?(rest|(?:almost[-_]?|threshold[-_]?)?reached|percent|permille|vector)$/) {
|
|
my $what = $1;
|
|
my $op = $2;
|
|
my $size = make_numeric(eval_fn($env, "$what-size", ""));
|
|
my $pos = make_numeric(eval_fn($env, "$what-pos", ""));
|
|
my $result = 0;
|
|
if ($op eq "rest") {
|
|
$result = $size - $pos if $pos < $size;
|
|
} elsif ($op =~ m/^almost/) {
|
|
my $limit = make_numeric(parse_macro($arg1, $env)) if $arg1 ne "";
|
|
$limit = 990 if $limit <= 0;
|
|
$result = 1 if int($pos / $limit) >= int($size / 1000);
|
|
} elsif ($op =~ m/^threshold/) {
|
|
my $my_threshold = make_numeric($$env{"threshold"});
|
|
$arg1 = parse_macro($arg1, $env);
|
|
$my_threshold = make_numeric(get_size($arg1)) if $arg1 ne "";
|
|
$result = 1 if $pos + $my_threshold >= $size;
|
|
} elsif ($op eq "reached") {
|
|
$result = 1 if $pos >= $size;
|
|
} elsif ($op eq "percent") {
|
|
$result = 100;
|
|
$result = 100.5 * $pos / $size if $size > 0;
|
|
$result = 100 if $result >= 100.0;
|
|
} elsif ($op eq "permille") {
|
|
$result = 1000;
|
|
$result = 1000.5 * $pos / $size if $size > 0;
|
|
$result = 1000 if $result >= 1000.0;
|
|
} elsif ($op eq "vector") {
|
|
my $delim = parse_macro($arg1, $env);
|
|
$delim = ":" unless $delim;
|
|
$result = "$pos$delim$size";
|
|
} else {
|
|
ldie "unknown operation '$op'\n";
|
|
}
|
|
return $result;
|
|
}
|
|
if (/^summary[-_]?vector$/) {
|
|
my $pos1 = make_numeric(eval_fn($env, "replay-pos", ""));
|
|
my $pos2 = make_numeric(eval_fn($env, "fetch-pos", ""));
|
|
my $size = make_numeric(eval_fn($env, "fetch-size", ""));
|
|
my $delim = parse_macro($arg1, $env);
|
|
$delim = ":" unless $delim;
|
|
return "$pos1$delim$pos2$delim$size";
|
|
}
|
|
if (/^deletable[-_]?size$/) {
|
|
my ($min, $max) = _get_deletable_logfiles($_, $$env{"res"});
|
|
my $sum = 0;
|
|
foreach my $path (lamport_glob("$mars/resource-" . $$env{"res"} . "/log-*")) {
|
|
$path =~ m/\/log-([0-9]+)-/;
|
|
my $nr = $1;
|
|
next if $nr < $min or $nr >= $max;
|
|
my @stat = stat($path);
|
|
$sum += $stat[7];
|
|
}
|
|
return $sum;
|
|
}
|
|
if (/^occupied[-_]?size$/) {
|
|
my $sum = 0;
|
|
foreach my $path (lamport_glob("$mars/resource-" . $$env{"res"} . "/log-*")) {
|
|
my @stat = stat($path);
|
|
$sum += $stat[7];
|
|
}
|
|
return $sum;
|
|
}
|
|
if (/^(disk|resource|device)[-_]?size$/) {
|
|
my $what = $1;
|
|
my $path = "$mars/resource-" . $$env{"res"};
|
|
if ($what eq "device" && eval_fn($env, "device-present", "")) {
|
|
return _get_mars_size($$env{"cmd"}, $$env{"res"});
|
|
} elsif ($what eq "disk"){
|
|
my $peer = parse_macro($arg1, $env);
|
|
$peer = $$env{"host"} unless $peer;
|
|
$path .= "/actsize-$peer";
|
|
} else {
|
|
$path .= "/size";
|
|
}
|
|
return get_link($path, 1);
|
|
}
|
|
if (/^resource[-_]?possible[-_]?size$/) {
|
|
return get_possible_size($$env{"cmd"}, $$env{"res"});
|
|
}
|
|
# time handling and pausing
|
|
if (/^time$/) {
|
|
return mars_time();
|
|
}
|
|
if (/^real[-_]?time$/) {
|
|
my $time = time();
|
|
return $time;
|
|
}
|
|
if (/^replay[-_]?timestamp$/) {
|
|
$arg1 = parse_macro($arg1, $env);
|
|
$arg1 = $$env{"host"} unless $arg1;
|
|
my $replay = $$env{"resdir"} . "/replay-$arg1";
|
|
return get_link_stamp($replay);
|
|
}
|
|
if (/^fetch[-_]?timestamp$/) {
|
|
$arg1 = parse_macro($arg1, $env);
|
|
$arg1 = $$env{"host"} unless $arg1;
|
|
my $fetch_glob = $$env{"resdir"} . "/version-*-$arg1";
|
|
my @paths = sort alphanum_cmp lamport_glob($fetch_glob);
|
|
return -1 if !@paths;
|
|
my $fetch = pop(@paths);
|
|
return get_link_stamp($fetch);
|
|
}
|
|
if (/^work[-_]?timestamp$/) {
|
|
my $time1 = make_numeric(eval_fn($env, "fetch-timestamp", $arg1));
|
|
my $time2 = make_numeric(eval_fn($env, "replay-timestamp", $arg1));
|
|
# use the "best" of both...
|
|
return $time1 if $time1 > $time2;
|
|
return $time2;
|
|
}
|
|
if (/^(fetch|replay|work|alive)[-_]?age$/) {
|
|
my $what = $1;
|
|
my $time = make_numeric(eval_fn($env, "$what-timestamp", $arg1));
|
|
return -1 if $time <= 0;
|
|
return int(mars_time()) - $time;
|
|
}
|
|
if (/^(fetch|replay|work|alive)[-_]?lag$/) {
|
|
my $what = $1;
|
|
$arg1 = $$env{"host"} unless $arg1;
|
|
my $arg2 = shift;
|
|
my $primary = parse_macro($arg2, $env);
|
|
if (!$primary) {
|
|
$primary = _get_designated_primary($$env{"res"});
|
|
}
|
|
return -1 if !$primary || $primary eq "(none)";
|
|
my $time1 = make_numeric(eval_fn($env, "$what-timestamp", $arg1));
|
|
my $time2 = make_numeric(eval_fn($env, "$what-timestamp", $primary));
|
|
return -1 if ($time1 <= 0 || $time2 <= 0);
|
|
return 0 if $time1 < $time2;
|
|
return $time1 - $time2;
|
|
}
|
|
if (/^sleep$/) {
|
|
my $time = parse_macro($arg1, $env);
|
|
sleep($time);
|
|
return "";
|
|
}
|
|
if (/^timeout$/) {
|
|
my $time = parse_macro($arg1, $env);
|
|
sleep_timeout($time);
|
|
return "";
|
|
}
|
|
if (/^wait[-_]?((?:todo|is)[-_](?:attach|sync|fetch|replay|primary|secondary)[-_](?:on|off))$/) {
|
|
my $specific = $1;
|
|
$specific =~ s/_/-/g;
|
|
wait_cond($$env{"cmd"}, $$env{"res"}, $specific);
|
|
return "";
|
|
}
|
|
if (/^wait(?:[-_]?resource)?$/) {
|
|
my $specific = parse_macro($arg1, $env);
|
|
wait_cond($$env{"cmd"}, $$env{"res"}, $specific);
|
|
return "";
|
|
}
|
|
if (/^wait[-_]?cluster$/) {
|
|
my $specific = parse_macro($arg1, $env);
|
|
wait_cluster($$env{"cmd"}, $$env{"res"}, $specific);
|
|
return "";
|
|
}
|
|
# generic flow control and loops
|
|
if (/^(get|foreach)[-_]?glob$/) {
|
|
my $op = $1;
|
|
my $paths = parse_macro($arg1, $env);
|
|
my $arg2 = shift;
|
|
my $varname = parse_macro($arg2, $env);
|
|
my $arg3 = shift;
|
|
my @list = lamport_glob($paths);
|
|
my $result = "";
|
|
if ($op eq "get") {
|
|
my $delim = parse_macro($arg3, $env);
|
|
foreach my $path (@list) {
|
|
$result .= $delim if $result;
|
|
$result .= $path;
|
|
}
|
|
} else { # foreach
|
|
foreach my $path (@list) {
|
|
$$env{$varname} = $path;
|
|
$result .= parse_macro($arg3, $env);
|
|
}
|
|
}
|
|
return $result;
|
|
}
|
|
if (/^(if|unless)$/) {
|
|
my $op = $1;
|
|
my $cond = parse_macro($arg1, $env);
|
|
$cond = !$cond if $op eq "unless";
|
|
my $arg2 = shift;
|
|
if ($cond) {
|
|
ldie "undefined $op-part\n" unless defined($arg2);
|
|
return parse_macro($arg2, $env);
|
|
} elsif (defined(my $arg3 = shift)) {
|
|
return parse_macro($arg3, $env);
|
|
}
|
|
return "";
|
|
}
|
|
if (/^else?(if|unless)$/) {
|
|
my $op = $1;
|
|
unshift @_, $arg1;
|
|
while (defined(my $arg1 = shift)) {
|
|
if (defined(my $arg2 = shift)) {
|
|
my $cond = parse_macro($arg1, $env);
|
|
$cond = !$cond if $op eq "unless";
|
|
if ($cond) {
|
|
return parse_macro($arg2, $env);
|
|
}
|
|
} else { # odd number of arguments is treated as final "else"
|
|
return parse_macro($arg1, $env);
|
|
}
|
|
}
|
|
return "";
|
|
}
|
|
if (/^while$/) {
|
|
my $arg2 = shift;
|
|
my $result = "";
|
|
while (parse_macro($arg1, $env)) {
|
|
$result .= parse_macro($arg2, $env);
|
|
next if _control_macro($env, "__next__");
|
|
last if _control_macro($env, "__last__");
|
|
}
|
|
return $result;
|
|
}
|
|
if (/^until$/) {
|
|
my $arg2 = shift;
|
|
my $result = "";
|
|
until (parse_macro($arg1, $env)) {
|
|
$result .= parse_macro($arg2, $env);
|
|
next if _control_macro($env, "__next__");
|
|
last if _control_macro($env, "__last__");
|
|
}
|
|
return $result;
|
|
}
|
|
if (/^for$/) {
|
|
my ($arg2, $arg3, $arg4) = (shift, shift, shift);
|
|
my $result = "";
|
|
for (parse_macro($arg1, $env); parse_macro($arg2, $env); parse_macro($arg3, $env)) {
|
|
$result .= parse_macro($arg4, $env);
|
|
next if _control_macro($env, "__next__");
|
|
last if _control_macro($env, "__last__");
|
|
}
|
|
return $result;
|
|
}
|
|
if (/^foreach$/) {
|
|
my $varname = parse_macro($arg1, $env);
|
|
my $arg2 = shift;
|
|
my $txt = parse_macro($arg2, $env);
|
|
my $arg3 = shift;
|
|
my $delim = parse_macro($arg3, $env);
|
|
my $arg4 = shift;
|
|
my $result = "";
|
|
foreach my $value (split($delim, $txt)) {
|
|
$$env{$varname} = $value;
|
|
$result .= parse_macro($arg4, $env);
|
|
next if _control_macro($env, "__next__");
|
|
last if _control_macro($env, "__last__");
|
|
}
|
|
return $result;
|
|
}
|
|
if (/^protect$/) { # don't evaluate argument, take verbatim
|
|
return $arg1;
|
|
}
|
|
if (/^eval$/) { # evaluate given number of times
|
|
my $count = parse_macro($arg1, $env);
|
|
my $arg2 = shift;
|
|
while ($count-- > 0) {
|
|
$arg2 = parse_macro($arg2, $env);
|
|
}
|
|
return $arg2;
|
|
}
|
|
if (/^eval[-_]?down$/) { # evaluate until result is stable
|
|
for (;;) {
|
|
my $old = $arg1;
|
|
$arg1 = parse_macro($arg1, $env);
|
|
last if $arg1 eq $old;
|
|
}
|
|
return $arg1;
|
|
}
|
|
if (/^tmp$/) { # evaluate once in a temporary scope
|
|
my %copy_env = %$env;
|
|
my $result = parse_macro($arg1, \%copy_env);
|
|
return $result;
|
|
}
|
|
if (/^(include|call)$/) {
|
|
my $op = $1;
|
|
my $name = parse_macro($arg1, $env);
|
|
my $txt = get_macro($name);
|
|
if ($op eq "call") { # run in new sub-scope
|
|
my %copy_env = %$env;
|
|
set_args($env, \%copy_env, $name, @_);
|
|
return parse_macro($txt, \%copy_env);
|
|
}
|
|
# 'include' runs in the same scope
|
|
set_args($env, $env, $name, @_);
|
|
return parse_macro($txt, $env);
|
|
}
|
|
if (/^callstack$/) {
|
|
return $$env{"callstack"};
|
|
}
|
|
if (/^(abort|return|stop-eval)$/) {
|
|
my $op = $1;
|
|
$$env{$op} = 1;
|
|
return "";
|
|
}
|
|
if (/^(next|last)$/) {
|
|
my $op = $1;
|
|
$$env{$op} = 1;
|
|
$$env{"__return__"} = 1;
|
|
return "";
|
|
}
|
|
ldie "call to unknown macro '$fn'\n";
|
|
}
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# macro parsing
|
|
|
|
sub _control_macro {
|
|
my $env = shift;
|
|
my $control = shift;
|
|
my $result = $$env{$control};
|
|
$$env{$control} = 0;
|
|
return $result;
|
|
}
|
|
|
|
sub parse_macro {
|
|
my ($text, $env) = @_;
|
|
$text = "" unless defined($text);
|
|
my $old_callstack = $$env{"callstack"};
|
|
my $result = "";
|
|
while ($text =~ m/\\(.)|$match_fn/sp) {
|
|
my $escape = $1;
|
|
my $fn = $2;
|
|
my @args = ($3);
|
|
my $pre = $PREMATCH;
|
|
my $post = $POSTMATCH;
|
|
if (defined($escape)) {
|
|
$result .= $pre;
|
|
$text = $post;
|
|
$_ = $escape;
|
|
if (/[tnrfbae]/) {
|
|
eval "\$result .= \"\\$escape\"";
|
|
next;
|
|
}
|
|
if (/[a-zA-Z]/) {
|
|
lwarn "control sequence '\\$escape' is reserved for future use\n";
|
|
}
|
|
$result .= $escape;
|
|
next;
|
|
}
|
|
return "" if _control_macro($env, "__abort__");
|
|
return $result if _control_macro($env, "__return__");
|
|
return $result . $text if _control_macro($env, "__stop_eval__");
|
|
$result .= $pre;
|
|
$text = $post;
|
|
while ($text =~ m/\A\{($match_inner)\}/sp ) {
|
|
push @args, $1;
|
|
$text = $POSTMATCH;
|
|
}
|
|
my $new = eval_fn($env, $fn, (@args));
|
|
ldie "undefined result from evaluation of primitive macro '$fn'\n" unless defined($new);
|
|
$$env{"callstack"} = $old_callstack;
|
|
$result .= $new;
|
|
}
|
|
return "" if _control_macro($env, "__abort__");
|
|
return $result if _control_macro($env, "__return__");
|
|
return $result . $text if _control_macro($env, "__stop_eval__");
|
|
return $result . $text;
|
|
}
|
|
|
|
sub make_env {
|
|
my ($cmd, $res, $text) = (shift, shift, shift);
|
|
$text =~ s{$match_comment}{}sg;
|
|
my $ip = _get_ip($host);
|
|
my %start_env =
|
|
(
|
|
"cmd" => $cmd,
|
|
"res" => $res,
|
|
"resdir" => "$mars/resource-$res",
|
|
"mars" => $mars,
|
|
"host" => $host,
|
|
"real_host" => $real_host,
|
|
"ip" => $ip,
|
|
"timeout" => $timeout,
|
|
"threshold" => $threshold,
|
|
"window" => $window,
|
|
"force" => $force,
|
|
"dry-run" => $dry_run,
|
|
"verbose" => $verbose,
|
|
"callstack" => "",
|
|
# internal, deliberately not documented
|
|
"__abort__" => 0,
|
|
"__return__" => 0,
|
|
"__stop_eval__" => 0,
|
|
"__next__" => 0,
|
|
"__last__" => 0,
|
|
);
|
|
set_args(\%start_env, \%start_env, $cmd, @_);
|
|
return ($text, \%start_env);
|
|
}
|
|
|
|
sub eval_macro {
|
|
my ($text, $start_env) = make_env(@_);
|
|
return parse_macro($text, $start_env);
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# macro commands
|
|
|
|
my $macro = "";
|
|
|
|
my %complex_macros =
|
|
(
|
|
"default"
|
|
=> ""
|
|
. "%elsif{%is-member{}}{"
|
|
. "%call{device-info}"
|
|
. " %{res} [%count-resource-peers{%{res}}/%count-cluster-peers{}]"
|
|
. " %include{diskstate} %include{replstate} %include{flags} %include{role} %include{primarynode} %include{commstate}\n"
|
|
. "%if{%>{%-{%disk-size{}}{%resource-size{}}}{%{threshold}}}{"
|
|
. " Hint: you are wasting %human-numbers{}{ }{ }{%-{%disk-size{}}{%resource-size{}}} on disk %get-disk{}\n"
|
|
. "}"
|
|
. "%if{%>{%resource-possible-size{}}{%resource-size{}}}{"
|
|
. " Hint: marsadm resize would increase resource %{res} by %human-numbers{}{ }{ }{%-{%resource-possible-size{}}{%resource-size{}}}\n"
|
|
. "}"
|
|
. "%if{%and{%not{%is-primary{}}}{%is-attach{}}}{"
|
|
. "%if{%not{%sync-reached{}}}{"
|
|
. "%include{syncinfo}"
|
|
. "}"
|
|
. "%if{%not{%work-reached{}}}{"
|
|
. "%include{replinfo}"
|
|
. "}"
|
|
. "}"
|
|
. "%call{resource-errors}"
|
|
. "}{"
|
|
. "%call{device-info}"
|
|
. "}",
|
|
|
|
"default-resource"
|
|
=> "%if{%{res}}{"
|
|
. "%{res} %human-numbers{}{ }{ }{%resource-size{}} "
|
|
. "[%count-resource-peers{%{res}}/%count-cluster-peers{}]"
|
|
. "}",
|
|
|
|
"default-global"
|
|
=> "%call{comminfo}",
|
|
|
|
"default-header"
|
|
=> "%if{%usable-compressions{}}{"
|
|
. "used-log-compression=\"%used-log-compression{}\"\n"
|
|
. "%if{%{verbose}}{"
|
|
. " usable: \"%usable-compressions{}\"\n"
|
|
. " enabled: \"%enabled-log-compressions{}\"\n"
|
|
. "}"
|
|
. "used-net-compression=\"%used-net-compression{}\"\n"
|
|
. "%if{%{verbose}}{"
|
|
. " usable: \"%usable-compressions{}\"\n"
|
|
. " enabled: \"%enabled-net-compressions{}\"\n"
|
|
. "}"
|
|
. "}"
|
|
. "%if{%usable-digests{}}{"
|
|
. "used-log-digest=\"%used-log-digest{}\"\n"
|
|
. "%if{%{verbose}}{"
|
|
. " usable: \"%usable-digests{}\"\n"
|
|
. " disabled: \"%disabled-log-digests{}\"\n"
|
|
. "}"
|
|
. "used-net-digest=\"%used-log-digest{}\"\n"
|
|
. "%if{%{verbose}}{"
|
|
. " usable: \"%usable-digests{}\"\n"
|
|
. " disabled: \"%disabled-net-digests{}\"\n"
|
|
. "}"
|
|
. "}",
|
|
|
|
"default-footer"
|
|
=> "",
|
|
|
|
"1and1"
|
|
=> "%if{%{res}}{"
|
|
. " %{res} %include{diskstate-1and1} %include{replstate-1and1} %include{flags-1and1} %include{role-1and1} %include{primarynode-1and1}\n"
|
|
. "%if{%and{%is-attach{}}{%not{%sync-reached{}}}}{"
|
|
. "%include{syncinfo-1and1}"
|
|
. "}"
|
|
. "%if{%and{%is-attach{}}{%not{%work-threshold-reached{}}}}{"
|
|
. "%include{replinfo-1and1}"
|
|
. "}"
|
|
. "%call{resource-errors-1and1}"
|
|
. "}{"
|
|
. "%the-pretty-global-msg{}"
|
|
. "}",
|
|
|
|
"device-stats"
|
|
=> ""
|
|
. "%if{%device-opened{}}{"
|
|
. "Opened, "
|
|
. "%if{%device-error{}}{"
|
|
. "ERROR %device-error{} %errno-text{%device-error{}}, "
|
|
. "}"
|
|
. "%device-ops-rate{} IOPS"
|
|
. "%if{%device-nrflying{}}{"
|
|
. ", %device-nrflying{} Flying"
|
|
. "%if{%>{%device-completion-age{}}{%{window}}}{"
|
|
. ", HANGING age: %human-seconds{%device-completion-age{}}"
|
|
. "}"
|
|
. "}"
|
|
. "}{"
|
|
. "Closed"
|
|
. "}",
|
|
|
|
"device-info"
|
|
=> "%if{%device-present{}}{"
|
|
. " LocalDevice %get-device{}"
|
|
. ""
|
|
. " [%call{device-stats}]\n"
|
|
. "}",
|
|
|
|
"diskstate"
|
|
=> "%if{%disk-error{}}{"
|
|
. "DISK_ERROR %disk-error{} %errno-text{%disk-error{}} "
|
|
. "}"
|
|
. "%elsif{%not{%get-disk{}}}{"
|
|
. "NotJoined"
|
|
. "}{%not{%disk-present{}}}{"
|
|
. "NotPresent"
|
|
. "}{%not{%todo-attach{}}}{"
|
|
. "%if{%is-attach{}}{"
|
|
. "Detaching"
|
|
. "}{"
|
|
. "Detached"
|
|
. "}"
|
|
. "}{%and{%replay-code{}}{%<{%replay-code{}}{0}}}{"
|
|
. "DefectiveLog[%errno-text{%replay-code{}}]"
|
|
. "}{%is-orphan{}}{"
|
|
. "Orphan"
|
|
. "}{%not{%is-attach{}}}{"
|
|
. "NoAttach"
|
|
. "}{%not{%is-consistent{}}}{"
|
|
. "%elsif{%not{%todo-primary{}}}{"
|
|
. "InConsistent"
|
|
. "}{%is-primary{}}{"
|
|
. "WriteBack[%human-numbers{}{ }{ }{%writeback-rest{}}]"
|
|
. "}{"
|
|
. "Recovery"
|
|
. "}"
|
|
. "}{%not{%work-reached{}}}{"
|
|
. "%elsif{%not{%todo-primary{}}}{"
|
|
. "OutDated[%call{outdated-flags}]"
|
|
. "}{%is-primary{}}{"
|
|
. "WriteBack[%human-numbers{}{ }{ }{%writeback-rest{}}]"
|
|
. "}{"
|
|
. "Recovery"
|
|
. "}"
|
|
. "}{%and{%ne{%get-primary{}}{}}{%tmp{%let{host}{%get-primary{}}%is-emergency{}}}}{"
|
|
. "EmergencyMode"
|
|
. "}{"
|
|
. "UpToDate"
|
|
. "}",
|
|
|
|
"diskstate-1and1"
|
|
=> "%if{%disk-present{}}{"
|
|
. "%if{%does{attach}}{"
|
|
. "%if{%is-almost-consistent{}}{"
|
|
. "%if{%work-reached{}}{"
|
|
. "Uptodate"
|
|
. "}{"
|
|
. "Outdated[%call{outdated-flags-1and1}]"
|
|
. "}"
|
|
. "}{Inconsistent}"
|
|
. "}{Detached}"
|
|
. "}{Detached}",
|
|
|
|
"outdated-flags"
|
|
=> "%if{%fetch-reached{}}{}{F}%if{%replay-reached{}}{}{R}",
|
|
|
|
"outdated-flags-1and1"
|
|
=> "%if{%fetch-reached{}}{}{F}%if{%replay-reached{}}{}{R}",
|
|
|
|
"replstate"
|
|
=> "%elsif{%not{%is-module-loaded{}}}{"
|
|
. "ModuleNotLoaded"
|
|
. "}{%not{%is-alive{%{host}}}}{"
|
|
. "UnResponsive"
|
|
. "}{%not{%get-disk{}}}{"
|
|
. "NotJoined"
|
|
. "}{%not{%todo-attach{}}}{"
|
|
. "NotStarted"
|
|
. "}{%todo-primary{}}{" # I am designated primary
|
|
. "%elsif{%is-emergency{}}{"
|
|
. "EmergencyMode"
|
|
. "}{%is-primary{}}{"
|
|
. "Replicating"
|
|
. "}{"
|
|
. "NotYetPrimary"
|
|
. "}"
|
|
. "}{" # Secondary
|
|
. "%elsif{%and{%sync-rest{}}{%not{%todo{sync}}}}{"
|
|
. "PausedSync"
|
|
. "}{%is-sync{}}{"
|
|
. "Syncing"
|
|
. "}{%not{%todo{fetch}}}{"
|
|
. "PausedFetch"
|
|
. "}{%not{%todo{replay}}}{"
|
|
. "PausedReplay"
|
|
. "}{%not{%get-primary{}}}{"
|
|
. "NoPrimaryDesignated"
|
|
. "}{%not{%is-alive{}}}{"
|
|
. "PrimaryUnreachable"
|
|
. "}{%is-orphan{}}{"
|
|
. "Orphan"
|
|
. "}{"
|
|
. "Replaying"
|
|
. "}"
|
|
. "}",
|
|
|
|
"replstate-1and1"
|
|
=> "%if{%disk-present{}}{"
|
|
. "%if{%is-primary{}}{"
|
|
. "Replicating"
|
|
. "}{"
|
|
. "%if{%is-alive{}}{"
|
|
. "%if{%and{%not{%sync-reached{}}}{%not{%todo{sync}}}}{"
|
|
. "PausedSync"
|
|
. "}{"
|
|
. "%if{%does{sync}}{"
|
|
. "Syncing"
|
|
. "}{"
|
|
. "%unless{%and{%todo-fetch{}}{%todo-replay{}}}{"
|
|
. "PausedReplay"
|
|
. "}{Replaying}"
|
|
. "}"
|
|
. "}"
|
|
. "}{PrimaryUnreachable}"
|
|
. "}"
|
|
. "}{NotJoined}",
|
|
|
|
"flags"
|
|
=> "%if{%disk-present{}}{%if{%device-present{}}{D}{d}}{-}"
|
|
. "%if{%is-consistent{}}{C}{%if{%disk-present{}}{c}{-}}"
|
|
. "%if{%does{attach}}{%if{%todo{attach}}{A}{a}}{%if{%todo{attach}}{a}{-}}"
|
|
. "%if{%sync-reached{}}{S}{%if{%todo{sync}}{s}{-}}"
|
|
. "%if{%fetch-reached{}}{F}{%if{%todo{fetch}}{f}{-}}"
|
|
. "%if{%replay-reached{}}{R}{%if{%todo{replay}}{r}{-}}",
|
|
|
|
"flags-1and1"
|
|
=> "-%if{%todo{sync}}{S}{-}%if{%todo{fetch}}{F}{-}%if{%todo{replay}}{R}{-}-",
|
|
|
|
"todo-role"
|
|
=> "%if{%disk-present{}}{"
|
|
. "%if{%todo-primary{}}{"
|
|
. "Primary"
|
|
. "}{"
|
|
. "Secondary"
|
|
. "}"
|
|
. "}{None}",
|
|
|
|
"role"
|
|
=> "%if{%disk-present{}}{"
|
|
. "%if{%todo-primary{}}{"
|
|
. "%if{%is-primary{}}{"
|
|
. "%if{%>{%nr-primary{}}{1}}{"
|
|
. "Forced"
|
|
. "}"
|
|
. "Primary"
|
|
. "}{"
|
|
. "NotYetPrimary"
|
|
. "}"
|
|
. "}{"
|
|
. "%if{%is-primary{}}{"
|
|
. "RemainsPrimary"
|
|
. "}{"
|
|
. "Secondary"
|
|
. "}"
|
|
. "}"
|
|
. "}{None}",
|
|
|
|
"role-1and1"
|
|
=> "%if{%disk-present{}}{"
|
|
. "%if{%is-primary{}}{"
|
|
. "Primary"
|
|
. "}{"
|
|
. "Secondary"
|
|
. "}"
|
|
. "}{Secondary}",
|
|
|
|
"primarynode"
|
|
=> "%if{%todo-primary{}}{"
|
|
. "%{host}"
|
|
. "}{"
|
|
. "%get-primary{}"
|
|
. "}",
|
|
|
|
"primarynode-1and1"
|
|
=> "%if{%disk-present{}}{"
|
|
. "%if{%is-primary{}}{"
|
|
. "%{host}"
|
|
. "}{"
|
|
. "%if{%actual-primary{}}{"
|
|
. "%actual-primary{}"
|
|
. "}{-}"
|
|
. "}"
|
|
. "}{-}",
|
|
|
|
"commstate"
|
|
=> "%let{comm}{%alive-age{}}"
|
|
. "%if{%>={%{comm}}{%{window}}}{"
|
|
. "%human-seconds{%{comm}}"
|
|
. "}",
|
|
|
|
"syncinfo"
|
|
=> "%let{amount}{%human-numbers{}{ }{ }{%sync-pos{}}{%sync-size{}}}"
|
|
. "%let{rate}{%human-numbers{}{ }{ }{%sync-rate{}}}"
|
|
. "%sprintf{ syncing: %s %.2f%% (%d/%d)%s rate: %.2f %s/sec remaining: %s hrs\n}"
|
|
. "{%progress{20}{%sync-pos{}}{0}{%sync-size{}}}"
|
|
. "{%sync-percent{}}"
|
|
. "{%{amount}{ }{0}}"
|
|
. "{%{amount}{ }{1}}"
|
|
. "{%{amount}{ }{2}}"
|
|
. "{%{rate}{ }{0}}"
|
|
. "{%{rate}{ }{1}}"
|
|
. "{%human-seconds{%sync-remain{}}}"
|
|
. "%call{sync-line}",
|
|
|
|
"syncinfo-1and1"
|
|
=> "%let{amount}{%human-numbers{}{ }{ }{%sync-pos{}}{%sync-size{}}}"
|
|
. "%let{rate}{%human-numbers{}{ }{ }{%sync-rate{}}}"
|
|
. "%sprintf{ syncing: %s %.2f%% (%d/%d)%s rate: %.2f %s/sec remaining: %s hrs\n}"
|
|
. "{%progress{20}{%sync-pos{}}{0}{%sync-size{}}}"
|
|
. "{%sync-percent{}}"
|
|
. "{%{amount}{ }{0}}"
|
|
. "{%{amount}{ }{1}}"
|
|
. "{%{amount}{ }{2}}"
|
|
. "{%{rate}{ }{0}}"
|
|
. "{%{rate}{ }{1}}"
|
|
. "{%human-seconds{%sync-remain{}}}"
|
|
. "%call{sync-line-1and1}",
|
|
|
|
"replinfo"
|
|
=> "%let{amount}{%human-numbers{}{ }{ }{%replay-pos{}}{%fetch-size{}}}"
|
|
. "%let{logs}{%replay-lognr{}}"
|
|
. "%let{l2}{%fetch-lognr{}}"
|
|
. "%let{l3}{%work-lognr{}}"
|
|
. "%if{%>{%{l2}}{%{logs}}}{%append{logs}{::%{l2}}}"
|
|
. "%if{%>{%{l3}}{%{l2}}}{%append{logs}{..%{l3}}}"
|
|
. "%sprintf{ replaying: %s %.2f%% (%d/%d)%s logs: [%s]\n}"
|
|
. "{%progress{20}{%replay-pos{}}{%fetch-pos{}}{%fetch-size{}}}"
|
|
. "{%work-percent{}}"
|
|
. "{%{amount}{ }{0}}"
|
|
. "{%{amount}{ }{1}}"
|
|
. "{%{amount}{ }{2}}"
|
|
. "{%{logs}}"
|
|
. "%call{fetch-line}"
|
|
. "%call{replay-line}",
|
|
|
|
"replinfo-1and1"
|
|
=> "%let{amount}{%human-numbers{}{ }{ }{%replay-pos{}}{%fetch-size{}}}"
|
|
. "%sprintf{ replaying: %s %.2f%% (%d/%d)%s logs: [%d..%d]\n}"
|
|
. "{%progress{20}{%replay-pos{}}{%fetch-pos{}}{%fetch-size{}}}"
|
|
. "{%work-percent{}}"
|
|
. "{%{amount}{ }{0}}"
|
|
. "{%{amount}{ }{1}}"
|
|
. "{%{amount}{ }{2}}"
|
|
. "{%replay-lognr{}}"
|
|
. "{%fetch-lognr{}}"
|
|
. "%call{fetch-line-1and1}"
|
|
. "%call{replay-line-1and1}",
|
|
|
|
"sync-line"
|
|
=> "%let{amount}{%human-numbers{}{}{}{%sync-pos{}}{%sync-size{}}}"
|
|
. "%let{rate}{%human-numbers{}{}{}{%sync-rate{}}}"
|
|
. "%let{remain}{%human-seconds{%sync-remain{}}}"
|
|
. " > sync: %{amount} rate: %{rate}/sec remaining: %{remain} hrs\n",
|
|
|
|
"sync-line-1and1"
|
|
=> "%let{amount}{%human-numbers{}{}{}{%sync-pos{}}{%sync-size{}}}"
|
|
. "%let{rate}{%human-numbers{}{}{}{%sync-rate{}}}"
|
|
. "%let{remain}{%human-seconds{%sync-remain{}}}"
|
|
. " > sync: %{amount} rate: %{rate}/sec remaining: %{remain} hrs\n",
|
|
|
|
"fetch-line"
|
|
=> "%let{amount}{%human-numbers{}{}{}{%fetch-rest{}}}"
|
|
. "%let{rate}{%human-numbers{}{}{}{%fetch-rate{}}}"
|
|
. "%let{remain}{%human-seconds{%fetch-remain{}}}"
|
|
. "%let{age}{%if{%and{%fetch-remain{}}{%>={%fetch-age{}}{%{window}}}}{ age: %human-seconds{%fetch-age{}}}}"
|
|
. "%let{lag}{%if{%and{%fetch-remain{}}{%>={%fetch-lag{}}{%{window}}}}{ lag: %human-seconds{%fetch-lag{}}}}"
|
|
. " > fetch: %{amount}%{age}%{lag} rate: %{rate}/s remaining: %{remain}\n",
|
|
|
|
"fetch-line-1and1"
|
|
=> "%let{amount}{%human-numbers{}{}{}{%fetch-rest{}}}"
|
|
. "%let{rate}{%human-numbers{}{}{}{%fetch-rate{}}}"
|
|
. "%let{remain}{%human-seconds{%fetch-remain{}}}"
|
|
. " > fetch: %{amount} rate: %{rate}/sec remaining: %{remain} hrs\n",
|
|
|
|
"replay-line"
|
|
=> "%let{amount}{%human-numbers{}{}{}{%replay-rest{}}}"
|
|
. "%let{rate}{%human-numbers{}{}{}{%replay-rate{}}}"
|
|
. "%let{remain}{%human-seconds{%replay-remain{}}}"
|
|
. "%let{age}{%if{%and{%replay-remain{}}{%>={%replay-age{}}{%{window}}}}{ age: %human-seconds{%replay-age{}}}}"
|
|
. " > replay: %{amount}%{age} rate: %{rate}/s remaining: %{remain}\n",
|
|
|
|
"replay-line-1and1"
|
|
=> "%let{amount}{%human-numbers{}{}{}{%replay-rest{}}}"
|
|
. "%let{rate}{%human-numbers{}{}{}{%replay-rate{}}}"
|
|
. "%let{remain}{%human-seconds{%replay-remain{}}}"
|
|
. " > replay: %{amount} rate: %{rate}/sec remaining: %{remain} hrs\n",
|
|
|
|
"resource-errors"
|
|
=> "%let{fat-count}{%get-resource-fat-count{}}"
|
|
. "%if{%{fat-count}}{"
|
|
. "FATALS FILE (%{fat-count})"
|
|
. "%if{%{verbose}}{"
|
|
. ":\n%get-resource-fat{}"
|
|
. "}{"
|
|
. ": available with --verbose\n"
|
|
. "}"
|
|
. "}"
|
|
|
|
. "%let{errs}{%the-err-count{}}"
|
|
. "%if{%{errs}}{"
|
|
. "ERRORS LNK (%{errs}):\n"
|
|
. "%the-pretty-err-msg{}"
|
|
. "}"
|
|
|
|
. "%let{err-count}{%get-resource-err-count{}}"
|
|
. "%if{%{err-count}}{"
|
|
. "ERRORS FILE (%{err-count})"
|
|
. "%if{%{verbose}}{"
|
|
. ":\n%get-resource-err{}"
|
|
. "}{"
|
|
. ": available with --verbose\n"
|
|
. "}"
|
|
. "}"
|
|
|
|
. "%let{wrns}{%the-wrn-count{}}"
|
|
. "%if{%{wrns}}{"
|
|
. "WARNINGS LNK (%{wrns}):\n"
|
|
. "%the-pretty-wrn-msg{}"
|
|
. "}"
|
|
|
|
. "%let{wrn-count}{%get-resource-wrn-count{}}"
|
|
. "%if{%and{%{verbose}}{%{wrn-count}}}{"
|
|
. "WARNINGS FILE (%{wrn-count})"
|
|
. "%if{%{verbose}}{"
|
|
. ":\n%get-resource-wrn{}"
|
|
. "}{"
|
|
. ": available with --verbose\n"
|
|
. "}"
|
|
. "}"
|
|
|
|
. "%let{infs}{%the-inf-count{}}"
|
|
. "%if{%and{%>={%{verbose}}{2}}{%{infs}}}{"
|
|
. "INFOS LNK (%{infs}):\n"
|
|
. "%the-pretty-inf-msg{}"
|
|
. "}"
|
|
|
|
. "%let{status_msg}{%get-log-status-{}}"
|
|
. "%if{%and{%>={%{verbose}}{2}}{%{status_msg}}}{"
|
|
. "STATUS FILE:\n%{status_msg}"
|
|
. "}",
|
|
|
|
|
|
"resource-errors-1and1"
|
|
=> "%let{fat-count}{%get-resource-fat-count{}}"
|
|
. "%if{%{fat-count}}{"
|
|
. "FATALS FILE (%{fat-count}):\n"
|
|
. "%get-resource-fat{}"
|
|
. "}"
|
|
|
|
. "%let{errs}{%the-err-count{}}"
|
|
. "%if{%{errs}}{"
|
|
. "ERRORS LNK (%{errs}):\n"
|
|
. "%the-pretty-err-msg{}"
|
|
. "}"
|
|
|
|
. "%let{err-count}{%get-resource-err-count{}}"
|
|
. "%if{%{err-count}}{"
|
|
. "ERRORS FILE (%{err-count}):\n"
|
|
. "%get-resource-err{}"
|
|
. "}"
|
|
|
|
. "%let{wrns}{%the-wrn-count{}}"
|
|
. "%if{%{wrns}}{"
|
|
. "WARNINGS LNK (%{wrns}):\n"
|
|
. "%the-pretty-wrn-msg{}"
|
|
. "}"
|
|
|
|
. "%let{wrn-count}{%get-resource-wrn-count{}}"
|
|
. "%if{%and{%{verbose}}{%{wrn-count}}}{"
|
|
. "WARNINGS FILE (%{wrn-count})"
|
|
. "%if{%{verbose}}{"
|
|
. ":\n%get-resource-wrn{}"
|
|
. "}{"
|
|
. ": available with --verbose\n"
|
|
. "}"
|
|
. "}"
|
|
|
|
. "%let{infs}{%the-inf-count{}}"
|
|
. "%if{%and{%{verbose}}{%{infs}}}{"
|
|
. "INFOS LNK (%{infs}):\n"
|
|
. "%the-pretty-inf-msg{}"
|
|
. "}"
|
|
|
|
. "%let{status_msg}{%get-log-status-{}}"
|
|
. "%if{%and{%{verbose}}{%{status_msg}}}{"
|
|
. "STATUS FILE:\n%{status_msg}"
|
|
. "}",
|
|
|
|
"comminfo"
|
|
=> "%let{txt}{%the-pretty-global-msg{}}"
|
|
. "%let{txt}{%subst{%{txt}}{^.*CONNECTED.*\$\n*}{}{mg}}"
|
|
. "%if{%{txt}}{"
|
|
. "++++++++++++ Global / Communication Status ++++++++++++\n"
|
|
. "%{txt}"
|
|
. "}"
|
|
. "%let{timeslip}{%-{%time{}}{%real-time{}}}"
|
|
. "%if{%>{%{timeslip}}{%{window}}}{"
|
|
. "++++++++++++ Lamport time slip: %human-seconds{%{timeslip}} ++++++++++++\n"
|
|
. "}",
|
|
|
|
# drbd similar ones
|
|
"state"
|
|
=> "NYI Please override macro \\%%{0}\\{\\}",
|
|
"cstate"
|
|
=> "NYI Please override macro \\%%{0}\\{\\}",
|
|
"dstate"
|
|
=> "NYI Please override macro \\%%{0}\\{\\}",
|
|
"status"
|
|
=> "NYI Please override macro \\%%{0}\\{\\}",
|
|
);
|
|
|
|
my %view_macros = %complex_macros;
|
|
|
|
# add some trivial macros to the command line interface
|
|
# FIXME: only at most one argument is allowed for now.
|
|
my %trivial_globs =
|
|
(
|
|
|
|
# intended for human use
|
|
"{all,the}-{pretty-,}{global-,}{{err,wrn,inf}-,}msg"
|
|
=> "",
|
|
"{is,todo,nr}-{attach,sync,fetch,replay,primary,secondary}"
|
|
=> "",
|
|
"is-{split-brain,consistent,emergency,orphan}"
|
|
=> "",
|
|
"is-{member,guest}"
|
|
=> "",
|
|
"rest-space"
|
|
=> "",
|
|
"get-{disk,device}"
|
|
=> "",
|
|
"present-{disk,device}"
|
|
=> "(deprecated, use *-present instead)",
|
|
"disk-error"
|
|
=> "",
|
|
"{disk,device}-present"
|
|
=> "",
|
|
"device-{opened,nrflying,error,completion-{stamp,age}}"
|
|
=> "",
|
|
"get-log-status"
|
|
=> "",
|
|
"get-resource-{fat,err,wrn}{,-count}"
|
|
=> "",
|
|
|
|
# intended for scripting
|
|
"{my,all}-{resources,members,guests}"
|
|
=> "",
|
|
"count-{my,all}-{resources,members,guests}"
|
|
=> "",
|
|
"{cluster,resource}-members"
|
|
=> "deprecated",
|
|
"count-{cluster,resource,guest}-members"
|
|
=> "deprecated",
|
|
"{cluster,resource,guest}-peers"
|
|
=> "",
|
|
"count-{cluster,resource,guest}-peers"
|
|
=> "",
|
|
"{disk,resource,device}-size"
|
|
=> "",
|
|
"resource-possible-size"
|
|
=> "",
|
|
"deletable-size"
|
|
=> "",
|
|
"occupied-size"
|
|
=> "",
|
|
"replay-code"
|
|
=> "When negative, this indidates that a replay/recovery error has occurred.",
|
|
"errno-text"
|
|
=> "Convert errno numbers (positive or negative) into human readable text.",
|
|
"{sync,fetch,replay,work,syncpos}-{size,pos}"
|
|
=> "",
|
|
"{sync,fetch,replay,work}-{rest,{almost-,threshold-,}reached,percent,permille,vector}"
|
|
=> "",
|
|
"writeback-rest"
|
|
=> "",
|
|
"{sync,fetch,replay}-{ops-rate,amount-rate,rate,remain}"
|
|
=> "",
|
|
"device-{ops-rate,amount-rate,rate}"
|
|
=> "",
|
|
"replay-basenr"
|
|
=> "",
|
|
"{fetch,replay,work}-{lognr,logcount}"
|
|
=> "",
|
|
"summary-vector",
|
|
=> "",
|
|
"{time,real-time}"
|
|
=> "",
|
|
"{alive,fetch,replay,work}-{timestamp,age,lag}"
|
|
=> "",
|
|
"{potential,implemented,usable}-features"
|
|
=> "",
|
|
"{implemented,usable}-{digests,compressions}"
|
|
=> "",
|
|
"enabled-{log|net}-compressions"
|
|
=> "",
|
|
"disabled-{log|net}-digests"
|
|
=> "",
|
|
"used-{log,net}-{digest,compression}"
|
|
=> "",
|
|
"{tree,features}-version"
|
|
=> "",
|
|
|
|
|
|
"{get,actual}-primary"
|
|
=> "",
|
|
"is-{alive}"
|
|
=> "",
|
|
"systemd-unit"
|
|
=> "",
|
|
"uuid"
|
|
=> "",
|
|
"tree"
|
|
=> "",
|
|
|
|
|
|
"wait-{is,todo}-{attach,sync,fetch,replay,primary,secondary}-{on,off}"
|
|
=> "",
|
|
);
|
|
|
|
my $glob = "";
|
|
foreach my $new_glob (sort alphanum_cmp keys(%trivial_globs)) {
|
|
$glob .= "," if $glob;
|
|
$glob .= $new_glob;
|
|
}
|
|
foreach my $name (lamport_glob("{$glob}")) {
|
|
$view_macros{$name} = "\%primitive-$name\{\%\{1}}";
|
|
}
|
|
|
|
sub _get_pre {
|
|
my ($rest, $add) = @_;
|
|
$rest =~ s{(\A.*\n)}{}sp;
|
|
$add = 0 if $1;
|
|
return length($rest) + $add;
|
|
}
|
|
|
|
sub _break_line {
|
|
my ($result, $add, $indent) = @_;
|
|
my $pre_len = _get_pre($result, $add);
|
|
if ($pre_len != $indent) {
|
|
$result .= "\\\n" . ' ' x $indent;
|
|
}
|
|
return $result;
|
|
}
|
|
|
|
sub _pretty_macro {
|
|
my ($text, $add, $indent) = @_;
|
|
$text =~ s/\\n/\n/gs;
|
|
my $result = "";
|
|
# look for function calls
|
|
while ($text =~ m/^($match_fn_head(?:{})*)/mp) {
|
|
$result .= $PREMATCH;
|
|
my $fn = $1;
|
|
$text = $POSTMATCH;
|
|
$result = _break_line($result, $add, $indent);
|
|
$add = 0;
|
|
$result .= $fn;
|
|
while ($text =~ m/\A\{/sp) {
|
|
# don't break simple / non-recursive / unbreakable arguments
|
|
if ($text =~ m/\A(\{(?:\s$match_inner|[^%{}]*)\})/sp) {
|
|
my $shortcut = $1;
|
|
$text = $POSTMATCH;
|
|
# make newlines non-verbatim
|
|
$shortcut =~ s{\n}{\\n}spg;
|
|
$result .= $shortcut;
|
|
next;
|
|
}
|
|
# break more complex arguments
|
|
#$result .= "{\\\n" . ' ' x $indent;
|
|
$result .= "{";
|
|
if ($text =~ m/\A\{([\s]$match_inner|)\}/sp) {
|
|
$result .= "$1}";
|
|
$text = $POSTMATCH;
|
|
} elsif ($text =~ m/\A\{([^\s]$match_inner)\}/sp ) {
|
|
my $arg = $1;
|
|
$text = $POSTMATCH;
|
|
my $sub_add = _get_pre($result, 0);
|
|
my $subst = _pretty_macro($arg, $sub_add, $indent + 2);
|
|
$result .= $subst;
|
|
$result = _break_line($result, 0, $indent);
|
|
$result .= "}";
|
|
} else {
|
|
ldie "wtf '$text'?\n";
|
|
}
|
|
}
|
|
}
|
|
$result .= $text;
|
|
return $result;
|
|
}
|
|
|
|
sub pretty_macro {
|
|
my $txt = _pretty_macro(@_);
|
|
# always add a trailing newline (for vi ;)
|
|
$txt .= "\\\n" unless $txt =~ m{\n\Z}sp;
|
|
return $txt;
|
|
}
|
|
|
|
get_global_versions();
|
|
|
|
if (defined($ARGV[0]) && $ARGV[0] =~ m/^dump-(all-)?macros$/) {
|
|
my %macros = %complex_macros;
|
|
%macros = %view_macros if defined($1);
|
|
foreach my $name (keys %macros) {
|
|
my $txt = $macros{$name};
|
|
$txt = pretty_macro($txt, 0, 0);
|
|
my $file = "$name.tpl";
|
|
if (-r $file) { # backup already existing files
|
|
for (my $i = 0; ; $i++) {
|
|
my $file_old = "$file.old$i";
|
|
unless (-r $file_old) {
|
|
rename($file, $file_old);
|
|
last;
|
|
}
|
|
}
|
|
}
|
|
open(OUT, ">", $file) or ldie "cannot create file '$file'\n";
|
|
print OUT $txt;
|
|
close(OUT);
|
|
}
|
|
exit(0);
|
|
}
|
|
|
|
sub get_macro {
|
|
my ($cmd, $tolerate) = @_;
|
|
if ($macro) {
|
|
my $orig = $macro;
|
|
$macro = ""; # consume once
|
|
return $orig;
|
|
}
|
|
foreach my $path (@MARS_PATH) {
|
|
my $file = "$path/$cmd.tpl";
|
|
if (-r $file) {
|
|
lprint_stderr "using macro file '$file'\n" if (defined($view_macros{$cmd}));
|
|
local $/; # slurp
|
|
open(IN, "<", $file) or next;
|
|
my $tpl = <IN>;
|
|
close(IN);
|
|
$tpl =~ s{$match_comment}{}sg;
|
|
$tpl =~ s{\\n}{\n}sg;
|
|
return $tpl;
|
|
}
|
|
}
|
|
return $view_macros{$cmd} if (defined($view_macros{$cmd}));
|
|
ldie "cannot find macro '$cmd'\n" unless defined($tolerate);
|
|
return "";
|
|
}
|
|
|
|
sub view_cmd {
|
|
my ($cmd, $res) = (shift, shift);
|
|
if ($cmd =~ s/^prettyprint-//) {
|
|
my $txt = get_macro($cmd);
|
|
print pretty_macro($txt, 0);
|
|
return;
|
|
}
|
|
$cmd =~ s/^view-?//;
|
|
$cmd = "default" unless $cmd;
|
|
my $tpl = get_macro($cmd);
|
|
my $result = eval_macro($cmd, $res, $tpl, @_);
|
|
if ($result ne "") { # add trailing newline if none exists
|
|
chomp $result;
|
|
$result .= "\n";
|
|
}
|
|
print $result;
|
|
}
|
|
|
|
##################################################################
|
|
|
|
# command table of all commands
|
|
|
|
my %cmd_table =
|
|
(
|
|
# new keywords
|
|
"create-uuid"
|
|
=> [
|
|
"verbose 3",
|
|
"usage: create-uuid (no parameters)",
|
|
"Deprecated.",
|
|
"This is only needed if you have a very old $mars/",
|
|
"directory structure from MARS version light0.1beta05",
|
|
"or earlier.",
|
|
\&create_uuid,
|
|
],
|
|
"create-cluster"
|
|
=> [
|
|
"usage: create-cluster (no parameters)",
|
|
"This must be called exactly once when creating a new cluster.",
|
|
"Don't call this again! Use join-cluster on the secondary nodes.",
|
|
"Please read the PDF manual for details.",
|
|
\&create_cluster,
|
|
],
|
|
"join-cluster"
|
|
=> [
|
|
"usage: join-cluster <hostname_of_primary>",
|
|
"Establishes a new cluster membership.",
|
|
"This must be called once on any new cluster member.",
|
|
"This is a prerequisite for join-resource.",
|
|
\&join_cluster,
|
|
],
|
|
"merge-cluster"
|
|
=> [
|
|
"usage: merge-cluster <hostname_of_other_cluster> [<host_ip>]",
|
|
"Precondition: the resource names of both clusters must be disjoint.",
|
|
"Create the union of two clusters, consisting of the",
|
|
"union of all machines, and the union of all resources.",
|
|
"The members of each resource are _not_ changed by this.",
|
|
"This is useful for creating a big \"virtual LVM cluster\" where",
|
|
"resources can be almost arbitrarily migrated between machines via",
|
|
"later join-resource / leave-resource operations.",
|
|
\&merge_cluster,
|
|
],
|
|
"merge-cluster-list"
|
|
=> [
|
|
"usage: merge-cluster-list",
|
|
"Determine the local list of resources.",
|
|
"Useful for checking or analysis of merge-cluster disjointness by hand.",
|
|
\&merge_cluster,
|
|
],
|
|
"merge-cluster-check"
|
|
=> [
|
|
"usage: merge-cluster-check <hostname_of_other_cluster>",
|
|
"Check whether the resources of both clusters are disjoint.",
|
|
"Useful for checking in advance whether merge-cluster would be",
|
|
"possible.",
|
|
\&merge_cluster,
|
|
],
|
|
"split-cluster"
|
|
=> [
|
|
"usage: split-cluster (no parameters)",
|
|
"NOT OFFICIALLY SUPPORTED - ONLY FOR EXPERTS.",
|
|
"RTFS = Read The Fucking Sourcecode.",
|
|
"Use this only if you know what you are doing.",
|
|
\&split_cluster,
|
|
],
|
|
"leave-cluster"
|
|
=> [
|
|
"usage: leave-cluster (no parameters)",
|
|
"This can be used for final deconstruction of a cluster member.",
|
|
"Prior to this, all resources must have been left",
|
|
"via leave-resource.",
|
|
"Notice: this will never destroy the cluster UID on the /mars/",
|
|
"filesystem.",
|
|
"Please read the PDF manual for details.",
|
|
\&leave_cluster,
|
|
],
|
|
"create-resource"
|
|
=> [
|
|
"usage: create-resource <resource_name> </dev/lv/mydata>",
|
|
"(further syntax variants are described in the PDF manual).",
|
|
"Create a new resource out of a pre-existing disk (backing",
|
|
"block device) /dev/lv/mydata (or similar).",
|
|
"The current node will start in primary role, thus",
|
|
"/dev/mars/<resource_name> will appear after a short time, initially",
|
|
"showing the same contents as the underlying disk /dev/lv/mydata.",
|
|
"It is good practice to name the resource <resource_name> and the",
|
|
"disk name identical.",
|
|
\&create_res,
|
|
],
|
|
"join-resource"
|
|
=> [
|
|
"usage: join-resource <resource_name> </dev/lv/mydata>",
|
|
"(further syntax variants are described in the PDF manual).",
|
|
"The resource <resource_name> must have been already created on",
|
|
"another cluster node, and the network must be healthy.",
|
|
"The contents of the local replica disk /dev/lv/mydata will be",
|
|
"overwritten by the initial fast full sync from the currently",
|
|
"designated primary node.",
|
|
"After the initial full sync has finished, the current host will",
|
|
"act in secondary role.",
|
|
"For details on size constraints etc, refer to the PDF manual.",
|
|
\&create_res,
|
|
],
|
|
"leave-resource"
|
|
=> [
|
|
"Precondition: the local host must be in secondary role.",
|
|
"Stop being a member of the resource, and thus stop all",
|
|
"replication activities. The status of the underlying disk",
|
|
"will remain in its current state (whatever it is).",
|
|
\&leave_res_phase0,
|
|
"check preconditions",
|
|
\&leave_res_phase1,
|
|
"switch state",
|
|
\&leave_res_phase2,
|
|
"purge logfiles",
|
|
\&leave_res_phase3,
|
|
"wait for deletions",
|
|
\&leave_res_phase4,
|
|
"trigger systemd",
|
|
],
|
|
"delete-resource"
|
|
=> [
|
|
"CAUTION! This is dangerous when the network is somehow",
|
|
"interrupted, or when damaged nodes are later re-surrected",
|
|
"in any way.\n",
|
|
"Precondition: the resource must no longer have any members",
|
|
"(see leave-resource).",
|
|
"This is only needed when you _insist_ on re-using a damaged",
|
|
"resource for re-creating a new one with exactly the same",
|
|
"old <resource_name>.",
|
|
"HINT: best practice is to not use this, but just create a _new_",
|
|
"resource with a new <resource_name> out of your local disks.",
|
|
"Please read the PDF manual on potential consequences.",
|
|
\&delete_res_phase1,
|
|
"set links",
|
|
\&delete_res_phase2,
|
|
"trigger systemd",
|
|
],
|
|
"set-systemd-unit"
|
|
=> [
|
|
"usage: set-systemd-unit <resource_name> <start_unit_name> [<stop_unit_name>]",
|
|
"This activates the systemd template engine of marsadm.",
|
|
"Please read mars-user-manual.pdf on this.",
|
|
"When <stop_unit_name> is omitted, it will be treated equal to",
|
|
"<start_unit_name>.",
|
|
"You may also use special keywords like DEFAULT, please read the manuals.",
|
|
"PARALLEL",
|
|
"FORK",
|
|
\&set_systemd_unit_phase1,
|
|
"set links",
|
|
\&set_systemd_unit_phase2,
|
|
"trigger systemd",
|
|
],
|
|
"get-systemd-unit"
|
|
=> [
|
|
"usage: get-systemd-unit <resource_name>",
|
|
"Show the system units (for start and stop), or empty when unset.",
|
|
\&get_systemd_unit,
|
|
],
|
|
"set-systemd-want"
|
|
=> [
|
|
"usage: set-systemd-want <resource_name> <host_name>",
|
|
"Override the current location where the complete systemd unit stack",
|
|
"should be started.",
|
|
"Useful for a _temporary_ stop of the systemd unit stack by supplying",
|
|
"the special hostname \"(none)\".",
|
|
"For a _permanent_ stop, use \"marsadm set-systemd-unit <resource>\"",
|
|
"instead.",
|
|
"PARALLEL",
|
|
"FORK",
|
|
\&set_systemd_want_phase1,
|
|
"set links",
|
|
\&set_systemd_want_phase2,
|
|
"trigger systemd",
|
|
"LOOP",
|
|
\&set_systemd_want_phase3,
|
|
"wait for effect",
|
|
],
|
|
"get-systemd-want"
|
|
=> [
|
|
"usage: get-systemd-want <resource_name>",
|
|
"Show the current hostname where the complete systemd unit stack",
|
|
"between start- and stop-unit should appear.",
|
|
"Reports empty when unset, or \"(none)\" when stopped.",
|
|
\&get_systemd_want,
|
|
],
|
|
"set-connect-pref-list"
|
|
=> [
|
|
"verbose 2",
|
|
"usage: set-connect-pref-list <resource_name> <host_list>",
|
|
"Provisionary command for internal use at 1&1. Will be replaced by",
|
|
"a better concept somewhen. The <host_list> must be comma-separated.",
|
|
\&set_connect_pref_list,
|
|
],
|
|
"get-connect-pref-list"
|
|
=> [
|
|
"verbose 2",
|
|
"Provisionary command for internal use at 1&1. Will be replaced by",
|
|
"a better concept somewhen.",
|
|
"Shows the outcome of set-connect-pref-list.",
|
|
\&set_connect_pref_list,
|
|
],
|
|
"set-global-enabled-log-compressions"
|
|
=> [
|
|
"usage: set-global-enabled-log-compressions <features>",
|
|
"Tell the whole cluster which compression features to use globally",
|
|
"for logfile compression. The effective value can be checked via",
|
|
"\"marsadm view-enabled-log-compressions\".",
|
|
"See \"marsadm view-potential-features\" and",
|
|
"\"marsadm --help\" for a list of compression feature names,",
|
|
"which must be separated by | symbols.",
|
|
\&set_defaults,
|
|
],
|
|
"set-global-enabled-net-compressions"
|
|
=> [
|
|
"usage: set-global-enabled-net-compressions <features>",
|
|
"Tell the whole cluster which compression features to use globally for",
|
|
"network transport compression. This is independent from log compression.",
|
|
"The effective value can be checked via",
|
|
"\"marsadm view-enabled-log-compressions\".",
|
|
"See \"marsadm view-potential-features\" and",
|
|
"\"marsadm --help\" for a list of compression feature names,",
|
|
"which must be separated by | symbols.",
|
|
\&set_defaults,
|
|
],
|
|
"set-global-disabled-log-digests"
|
|
=> [
|
|
"usage: set-global-disabled-log-digests <features>",
|
|
"Tell the whole cluster which checksumming digests to disable globally",
|
|
"for the payload in transaction logfiles.",
|
|
"The effective value can be checked via \"marsadm view-disabled-log-digests\".",
|
|
"See \"marsadm view-potential-features\" and",
|
|
"\"marsadm --help\" for a list of digest feature names,",
|
|
"which must be separated by | symbols.",
|
|
\&set_defaults,
|
|
],
|
|
"set-global-disabled-net-digests"
|
|
=> [
|
|
"usage: set-global-disabled-net-digests <features>",
|
|
"Tell the whole cluster which checksumming digests to disable globally",
|
|
"for cluster-wide data comparisons, like fast full-sync.",
|
|
"The effective value can be checked via \"marsadm view-disabled-net-digests\".",
|
|
"See \"marsadm view-potential-features\" and",
|
|
"\"marsadm --help\" for a list of digest feature names,",
|
|
"which must be separated by | symbols.",
|
|
\&set_defaults,
|
|
],
|
|
"set-disabled-digests"
|
|
=> [
|
|
"verbose 99", # NYI
|
|
"usage: set-disabled-digests <resource_name> <features>",
|
|
"Tell the whole cluster which checksumming digests to disable at the",
|
|
"resource <resource_name>, potentially overriding the global value",
|
|
"as set by set-global-disabled-digests.",
|
|
"The effective value can be checked via \"marsadm view-disabled-digests\".",
|
|
"See \"marsadm view-potential-features\" and",
|
|
"\"marsadm --help\" for a list of digest feature names,",
|
|
"which must be separated by | symbols.",
|
|
\&set_defaults,
|
|
],
|
|
"log-rotate"
|
|
=> [
|
|
"Only useful at the primary side.",
|
|
"Start writing transaction logs into a new transaction logfile.",
|
|
"This should be regularly called by a cron job or similar.",
|
|
"For regular maintainance cron jobs, please prefer 'marsadm cron'.",
|
|
"For details and best practices, please refer to the PDF manual.",
|
|
\&logrotate_res,
|
|
],
|
|
"log-delete-one"
|
|
=> [
|
|
"When possible, globally delete at most one old transaction logfile",
|
|
"which is known to be superfluous, i.e. all secondaries no longer",
|
|
"need to replay it.",
|
|
"Hint: use this only for testing and manual inspection.",
|
|
"For regular maintainance cron jobs, please prefer cron",
|
|
"or log-delete-all.",
|
|
\&logdelete_res,
|
|
],
|
|
"log-delete"
|
|
=> [
|
|
"When possible, globally delete all old transaction logfiles which",
|
|
"are known to be superflous, i.e. all secondaries no longer need",
|
|
"to replay them.",
|
|
"This must be regularly called by a cron job or similar, in order",
|
|
"to prevent overflow of the /mars/ directory.",
|
|
"For regular maintainance cron jobs, please prefer 'marsadm cron'.",
|
|
"For details and best practices, please refer to the PDF manual.",
|
|
\&logdelete_res,
|
|
],
|
|
"log-delete-all"
|
|
=> [
|
|
"Alias for log-delete",
|
|
\&logdelete_res,
|
|
],
|
|
"cron"
|
|
=> [
|
|
"usage: cron (no parameters)",
|
|
"Do all necessary regular housekeeping tasks.",
|
|
"This is equivalent to log-rotate all; sleep 7; log-delete-all all.",
|
|
[
|
|
\&link_purge_global,
|
|
],
|
|
\&cron_phase1,
|
|
"purge links and rotate logfiles",
|
|
"SLEEP",
|
|
\&cron_phase2,
|
|
"delete old logfiles",
|
|
],
|
|
"log-purge-all"
|
|
=> [
|
|
"This is potentially dangerous.",
|
|
"Use this only if you are really desperate in trying to resolve a",
|
|
"split brain. Use this only after reading the PDF manual!",
|
|
\&log_purge_res,
|
|
],
|
|
"err-purge-all"
|
|
=> [
|
|
"Remove any err message from the given resources.",
|
|
\&err_purge_res,
|
|
],
|
|
"link-purge-all"
|
|
=> [
|
|
"Remove any .deleted links.",
|
|
[
|
|
\&link_purge_global,
|
|
],
|
|
\&link_purge_res,
|
|
],
|
|
"fake-sync"
|
|
=> [
|
|
"verbose 1",
|
|
"Attention: this is potentially dangerous.",
|
|
"Only for experts.",
|
|
"Please read the PDF manual to understand the risks!",
|
|
\&fake_sync_phase1,
|
|
"switch sync off",
|
|
\&fake_sync_phase2,
|
|
"wait for sync off",
|
|
\&fake_sync_phase3,
|
|
"fake sync",
|
|
],
|
|
"set-link"
|
|
=> [
|
|
"verbose 1",
|
|
"usage: set-link <path> <value>",
|
|
"Only for experts.",
|
|
\&set_link_cmd,
|
|
],
|
|
"get-link"
|
|
=> [
|
|
"verbose 1",
|
|
"usage: get-link <path>",
|
|
"Only for experts.",
|
|
\&set_link_cmd,
|
|
],
|
|
"set-sync-limit-value"
|
|
=> [
|
|
"usage: set-sync-limit-value <new_value>",
|
|
"Set the maximum number of resources which should by syncing",
|
|
"concurrently.",
|
|
\&set_sync_limit_value,
|
|
],
|
|
"get-sync-limit-value"
|
|
=> [
|
|
"usage: get-sync-limit-value (no parameters)",
|
|
"For retrieval of the value set by set-sync-limit-value.",
|
|
\&set_sync_limit_value,
|
|
],
|
|
"delete-file"
|
|
=> [
|
|
"verbose 1",
|
|
"usage: delete-file <path>",
|
|
"VERY dangerous!",
|
|
"Only for experts.",
|
|
\&delete_file_cmd,
|
|
],
|
|
"set-emergency-limit"
|
|
=> [
|
|
"usage: set-emergency-limit <resource_name> <value>",
|
|
"Set a per-resource emergency limit for disk space in /mars.",
|
|
"See PDF manual for details.",
|
|
\&emergency_limit_res,
|
|
],
|
|
"get-emergency-limit"
|
|
=> [
|
|
"Counterpart of set-emergency-limit (per-resource emergency limit)",
|
|
\&emergency_limit_res,
|
|
],
|
|
"emergency-limit" => \&emergency_limit_res,
|
|
"cat"
|
|
=> [
|
|
"usage: cat <path>",
|
|
"Print internal debug output in human readable form.",
|
|
"Numerical timestamps and numerical error codes are replaced",
|
|
"by more readable means.",
|
|
"Example: marsadm cat /mars/5.total.status",
|
|
\&cat_cmd,
|
|
],
|
|
"show"
|
|
=> [
|
|
"verbose 3",
|
|
"Deprecated old low-level tool. Don't use. Use macros instead.",
|
|
\&show_cmd,
|
|
],
|
|
"show-errors"
|
|
=> [
|
|
"verbose 3",
|
|
"Deprecated old low-level tool. Don't use. Use macros instead.",
|
|
\&show_errors_cmd,
|
|
],
|
|
"show-state"
|
|
=> [
|
|
"verbose 3",
|
|
"Deprecated old low-level tool. Don't use. Use macros instead.",
|
|
\&mars_state_cmd,
|
|
],
|
|
"mars-state"
|
|
=> [
|
|
"verbose 3",
|
|
"Deprecated old low-level tool. Don't use. Use macros instead.",
|
|
\&mars_state_cmd,
|
|
],
|
|
"show-info"
|
|
=> [
|
|
"verbose 3",
|
|
"Deprecated old low-level tool. Don't use. Use macros instead.",
|
|
\&mars_info_cmd,
|
|
],
|
|
"mars-info" => \&mars_info_cmd, # deprecated
|
|
"pause-replay-local"
|
|
=> [
|
|
"Stop replaying transaction logfiles for now.",
|
|
"This is independent from any {pause,resume}-fetch operations.",
|
|
"This may be used for freezing the state of your replica for some",
|
|
"time, if you have enough space on /mars/.",
|
|
"Only useful on a secondary node.",
|
|
\&pause_replay_local_res,
|
|
],
|
|
"pause-replay-global"
|
|
=> [
|
|
"Like pause-replay-local, but affects all resource members",
|
|
"in the cluster (remotely).",
|
|
\&pause_replay_global_res,
|
|
],
|
|
"pause-replay"
|
|
=> [
|
|
"See pause-replay-local.",
|
|
\&pause_replay_local_res,
|
|
],
|
|
"resume-replay-local"
|
|
=> [
|
|
"Restart replaying transaction logfiles, when there is some",
|
|
"data left.",
|
|
"This is independent from any {pause,resume}-fetch operations.",
|
|
"This should be used for unfreezing the state of your local replica.",
|
|
"Only useful on a secondary node.",
|
|
\&pause_replay_local_res,
|
|
],
|
|
"resume-replay-global"
|
|
=> [
|
|
"Like resume-replay-local, but affects all resource members",
|
|
"in the cluster (remotely).",
|
|
\&pause_replay_global_res,
|
|
],
|
|
"resume-replay"
|
|
=> [
|
|
"See resume-replay-local.",
|
|
\&pause_replay_local_res,
|
|
],
|
|
"set-replay"
|
|
=> [
|
|
"verbose 1",
|
|
"VERY dangerous!",
|
|
"Only for experts.",
|
|
\&set_replay_res,
|
|
],
|
|
"wait-umount"
|
|
=> [
|
|
"Wait until /dev/mars/<resource_name> has disappeared in the",
|
|
"cluster (even remotely).",
|
|
"Useful on both primary and secondary nodes.",
|
|
\&wait_umount_res,
|
|
],
|
|
"wait-cluster"
|
|
=> [
|
|
"usage: wait-resource [<resource_name>]",
|
|
"Waits until a ping-pong communication has succeeded in the",
|
|
"whole cluster (or only the members of <resource_name>).",
|
|
"NOTICE: this is extremely useful for avoiding races when scripting",
|
|
"in a cluster.",
|
|
\&wait_cluster,
|
|
],
|
|
"wait-resource"
|
|
=> [
|
|
"usage: wait-resource <resource_name>",
|
|
" [[attach|fetch|replay|sync][-on|-off]]",
|
|
"Wait until the given condition is met on the resource, locally.",
|
|
\&wait_cond,
|
|
],
|
|
"update-cluster"
|
|
=> [
|
|
"usage: update-cluster [<resource_name>]",
|
|
"Fetch all the links from all joined cluster hosts.",
|
|
"Use this between create-resource and join-resource.",
|
|
"NOTICE: this is extremely useful for avoiding races when scripting",
|
|
"in a cluster.",
|
|
\&update_cluster,
|
|
],
|
|
"activate-guest"
|
|
=> [
|
|
"usage: activate-guest <resource_name>",
|
|
"Conditional update-cluster, so that <resource_name> will be locally",
|
|
"known at the local machine, and mark the resource as a guest.",
|
|
"Useful inbetween create-resource and join-resource.",
|
|
"A guest will receive any symlink updates much more frequently.",
|
|
"Prefer this over update-cluster when interested in a resource.",
|
|
\&activate_guest,
|
|
],
|
|
"deactivate-guest"
|
|
=> [
|
|
"usage: deactivate-guest <resource_name>",
|
|
"Precondition: the resource must not have local storage assigned.",
|
|
"Useful for cleaning up a pure guest relastionship.",
|
|
\&activate_guest,
|
|
],
|
|
|
|
# compatible keywords (or their derivatives)
|
|
"attach"
|
|
=> [
|
|
"Attaches the local disk (backing block device) to the resource.",
|
|
"The disk must have been previously configured at",
|
|
"{create,join}-resource.",
|
|
"When designated as a primary, /dev/mars/\$res will also appear.",
|
|
"This does not change the state of {fetch,replay}.",
|
|
"For a complete local startup of the resource, use 'marsadm up'.",
|
|
\&attach_res_phase0,
|
|
"check preconditions",
|
|
\&attach_res_phase1,
|
|
"switch state",
|
|
\&attach_res_phase2,
|
|
"wait for effect",
|
|
],
|
|
"detach"
|
|
=> [
|
|
"Detaches the local disk (backing block device) from the",
|
|
"MARS resource.",
|
|
"Caution! you may read data from the local disk afterwards,",
|
|
"but ensure that no data is written to it!",
|
|
"Otherwise, you are likely to produce harmful inconsistencies.",
|
|
"When running in primary role, /dev/mars/\$res will also disappear.",
|
|
"This does not change the state of {fetch,replay}.",
|
|
"For a complete local shutdown of the resource, use 'marsadm down'.",
|
|
\&attach_res_phase0,
|
|
"check preconditions",
|
|
"FORK",
|
|
\&attach_res_phase0b,
|
|
"wait for systemd device release",
|
|
\&attach_res_phase1,
|
|
"switch state",
|
|
\&attach_res_phase2,
|
|
"wait for effect",
|
|
],
|
|
"resume-fetch-local"
|
|
=> [
|
|
"Start fetching transaction logfiles from the current",
|
|
"designated primary node, if there is one.",
|
|
"This is independent from any {pause,resume}-replay operations.",
|
|
"Only useful on a secondary node.",
|
|
\&fetch_local_res,
|
|
],
|
|
"resume-fetch-global"
|
|
=> [
|
|
"Like resume-fetch-local, but affects all resource members",
|
|
"in the cluster (remotely).",
|
|
\&fetch_global_res,
|
|
],
|
|
"resume-fetch"
|
|
=> [
|
|
"See resume-fetch-local.",
|
|
\&fetch_local_res,
|
|
],
|
|
"pause-fetch-local"
|
|
=> [
|
|
"Stop fetching transaction logfiles from the current",
|
|
"designated primary.",
|
|
"This is independent from any {pause,resume}-replay operations.",
|
|
"Only useful on a secondary node.",
|
|
\&fetch_local_res,
|
|
],
|
|
"pause-fetch-global"
|
|
=> [
|
|
"Like pause-fetch-local, but affects all resource members",
|
|
"in the cluster (remotely).",
|
|
\&fetch_global_res,
|
|
],
|
|
"pause-fetch"
|
|
=> [
|
|
"See pause-fetch-local.",
|
|
\&fetch_local_res,
|
|
],
|
|
"connect-local"
|
|
=> [
|
|
"See resume-fetch-local.",
|
|
\&fetch_local_res,
|
|
],
|
|
"connect-global"
|
|
=> [
|
|
"Like resume-fetch-local, but affects all resource members",
|
|
"in the cluster (remotely).",
|
|
\&fetch_global_res,
|
|
],
|
|
"connect"
|
|
=> [
|
|
"See resume-fetch-local.",
|
|
\&fetch_local_res,
|
|
],
|
|
"disconnect-local"
|
|
=> [
|
|
"See pause-fetch-local.",
|
|
\&fetch_local_res,
|
|
],
|
|
"disconnect-global"
|
|
=> [
|
|
"Like pause-fetch-local, but affects all resource members",
|
|
"in the cluster (remotely).",
|
|
\&fetch_global_res,
|
|
],
|
|
"disconnect"
|
|
=> [
|
|
"See pause-fetch-local.",
|
|
\&fetch_local_res,
|
|
],
|
|
"syncer" => \&ignore_cmd,
|
|
"up"
|
|
=> [
|
|
"Shortcut for attach + resume-sync + resume-fetch + resume-replay.",
|
|
\&up_res_phase0,
|
|
"check preconditions",
|
|
\&up_res_phase1,
|
|
"switch state",
|
|
\&up_res_phase2,
|
|
"wait for effect",
|
|
],
|
|
"down"
|
|
=> [
|
|
"Shortcut for detach + pause-sync + pause-fetch + pause-replay.",
|
|
\&up_res_phase0,
|
|
"check preconditions",
|
|
"FORK",
|
|
\&attach_res_phase0b,
|
|
"wait for systemd device release",
|
|
\&up_res_phase1,
|
|
"switch state",
|
|
\&up_res_phase2,
|
|
"wait for effect",
|
|
],
|
|
"primary"
|
|
=> [
|
|
"Promote the resource into primary role.",
|
|
"This is necessary for /dev/mars/\$res to appear on the local host.",
|
|
"Notice: by concept there can be only _one_ designated primary",
|
|
"in a cluster at the same time.",
|
|
"The role change is automatically distributed to the other nodes",
|
|
"in the cluster, provided that the network is healthy.",
|
|
"The old primary node will _automatically_ go",
|
|
"into secondary role first. This is different from DRBD!",
|
|
"With MARS, you don't need an intermediate 'secondary' command",
|
|
"for switching roles.",
|
|
"It is usually better to _directly_ switch the primary roles",
|
|
"between both hosts.",
|
|
"When --force is not given, a planned handover is started:",
|
|
"the local host will only become actually primary _after_ the",
|
|
"old primary is gone, and all old transaction logs have been",
|
|
"fetched and replayed at the new designated priamry.",
|
|
"When --force is given, no handover is attempted. A a consequence,",
|
|
"a split brain situation is likely to emerge.",
|
|
"Thus, use --force only after an ordinary handover attempt has",
|
|
"failed, and when you don't care about the split brain.",
|
|
"For more details, please refer to the PDF manual.",
|
|
[
|
|
\&wait_cluster_noforce,
|
|
],
|
|
\&primary_phase0,
|
|
"check preconditions",
|
|
"FORK",
|
|
\&primary_phase0a,
|
|
"conditionally wait for fetch off",
|
|
\&primary_phase0b,
|
|
"wait for systemd",
|
|
"LOOP",
|
|
\&primary_phase1,
|
|
"leave primary state",
|
|
"LOOP",
|
|
\&primary_phase1b,
|
|
"trigger remote",
|
|
"LOOP",
|
|
\&primary_phase2,
|
|
"wait for cluster when necessary",
|
|
"LOOP",
|
|
\&primary_phase2b,
|
|
"avoid split brain",
|
|
\&primary_phase3,
|
|
"switch to primary",
|
|
\&primary_phase3b,
|
|
"trigger remote",
|
|
\&primary_phase4,
|
|
"wait for device",
|
|
"LOOP",
|
|
\&primary_phase5,
|
|
"trigger systemd",
|
|
],
|
|
"secondary"
|
|
=> [
|
|
"Promote all cluster members into secondary role, globally.",
|
|
"In contrast to DRBD, this is not needed as an intermediate step",
|
|
"for planned handover between an old and a new primary node.",
|
|
"The only reasonable usage is before the last leave-resource of the",
|
|
"last cluster member, immediately before leave-cluster is executed",
|
|
"for final deconstruction of the cluster.",
|
|
"In all other cases, please prefer 'primary' for direct handover",
|
|
"between cluster nodes.",
|
|
"Notice: 'secondary' sets the global designated primary node",
|
|
"to '(none)' which in turn prevents the execution of 'invalidate'",
|
|
"or 'join-resource' or 'resize' anywhere in the cluster.",
|
|
"Therefore, don't unnecessarily give 'secondary'!",
|
|
\&primary_phase0,
|
|
"check preconditions",
|
|
"FORK",
|
|
\&primary_phase1,
|
|
"leave primary state",
|
|
\&primary_phase1b,
|
|
"trigger remote",
|
|
\&primary_phase4,
|
|
"wait for effect",
|
|
"LOOP",
|
|
\&primary_phase5,
|
|
"trigger systemd",
|
|
],
|
|
"invalidate"
|
|
=> [
|
|
"Only useful on a secondary node.",
|
|
"Forces MARS to consider the local replica disk as being",
|
|
"inconsistent, and therefore starting a fast full-sync from",
|
|
"the currently designated primary node (which must exist;",
|
|
"therefore avoid the 'secondary' command).",
|
|
"This is usually needed for resolving emergency mode.",
|
|
"When having k=2 replicas, this can be also used for",
|
|
"quick-and-simple split-brain resolution.",
|
|
"In other cases, or when the split-brain is not resolved by",
|
|
"this command, please use the 'leave-resource' / 'join-resource'",
|
|
"method as described in the PDF manual (in the right order as",
|
|
"described there).",
|
|
\&invalidate_res_phase0,
|
|
"check preconditions",
|
|
\&invalidate_res_phase1,
|
|
"switch off everything",
|
|
\&invalidate_res_phase2,
|
|
"wait for everything switched off",
|
|
\&invalidate_res_phase3,
|
|
"start deletion of old version links",
|
|
\&invalidate_res_phase4,
|
|
"wait for effect",
|
|
\&invalidate_res_phase5,
|
|
"start purging old logfiles",
|
|
\&invalidate_res_phase6,
|
|
"wait for effect",
|
|
\&invalidate_res_phase7,
|
|
"set new replaylink",
|
|
\&invalidate_res_phase8,
|
|
"wait for effect",
|
|
\&invalidate_res_phase9,
|
|
"switch on everything again",
|
|
],
|
|
"invalidate-remote" => \&forbidden_cmd,
|
|
"resize"
|
|
=> [
|
|
"Prerequisite: all underlying disks (usually /dev/vg/\$res) must",
|
|
"have been already increased, e.g. at the LVM layer (cf. lvresize).",
|
|
"Causes MARS to re-examine all sizing constraints on all members of",
|
|
"the resource, and increase the global logical size of the resource",
|
|
"accordingly.",
|
|
"Shrinking is currently not yet implemented.",
|
|
"When successful, /dev/mars/\$res at the primary will be increased",
|
|
"in size. In addition, all secondaries will start an incremental",
|
|
"fast full-sync to get the enlarged parts from the primary.",
|
|
\&resize_phase0,
|
|
"check preconditions",
|
|
\&resize_phase1,
|
|
"set new size",
|
|
\&resize_phase2,
|
|
"wait for change",
|
|
],
|
|
"check-resize" => \&ignore_cmd,
|
|
"create-md" => \&senseless_cmd,
|
|
"get-gi" => \&ignore_cmd,
|
|
"show-gi" => \&ignore_cmd,
|
|
"dump-md" => \&senseless_cmd,
|
|
"outdate" => \&ignore_cmd,
|
|
"adjust" => \&ignore_cmd,
|
|
"wait-connect"
|
|
=> [
|
|
"usage: wait-connect [<resource_name>]",
|
|
"See wait-cluster.",
|
|
\&wait_cluster,
|
|
],
|
|
"role"
|
|
=> [
|
|
"verbose 3",
|
|
"Deprecated.",
|
|
"Please use the macro command 'view-role' instead.",
|
|
"For even better summary information, use plain 'view'.",
|
|
\&role_cmd,
|
|
],
|
|
"state"
|
|
=> [
|
|
"verbose 3",
|
|
"Deprecated.",
|
|
"Please use the macro command 'view-role' instead.",
|
|
"For even better summary information, use plain 'view'.",
|
|
\&role_cmd,
|
|
],
|
|
"cstate" => \&nyi_cmd,
|
|
"dstate" => \&nyi_cmd,
|
|
"status" => \&nyi_cmd,
|
|
"dump" => \&senseless_cmd,
|
|
"verify" => \&forbidden_cmd,
|
|
"pause-sync-local"
|
|
=> [
|
|
"Pause the initial data sync at current stage.",
|
|
"This has only an effect if a sync is actually running (i.e.",
|
|
"there is something to be actually synced).",
|
|
"Don't pause too long, because the local replica will remain",
|
|
"inconsistent during the pause.",
|
|
"Use this only for limited reduction of system load.",
|
|
"Only useful on a secondary node.",
|
|
\&pause_sync_local_res,
|
|
],
|
|
"pause-sync-global"
|
|
=> [
|
|
"Like pause-sync-local, but affects all resource members",
|
|
"in the cluster (remotely).",
|
|
\&pause_sync_global_res,
|
|
],
|
|
"pause-sync"
|
|
=> [
|
|
"See pause-sync-local.",
|
|
\&pause_sync_local_res,
|
|
],
|
|
"resume-sync-local"
|
|
=> [
|
|
"Resume any initial / incremental data sync at the stage where it",
|
|
"had been interrupted by pause-sync.",
|
|
"Only useful on a secondary node.",
|
|
\&pause_sync_local_res,
|
|
],
|
|
"resume-sync-global"
|
|
=> [
|
|
"Like resume-sync-local, but affects all resource members",
|
|
"in the cluster (remotely).",
|
|
\&pause_sync_global_res,
|
|
],
|
|
"resume-sync"
|
|
=> [
|
|
"See resume-sync-local.",
|
|
\&pause_sync_local_res,
|
|
],
|
|
"new-current-uuid" => \&senseless_cmd,
|
|
"hidden-commands" => \&ignore_cmd,
|
|
|
|
# lowlevel tools
|
|
"lowlevel-ls-host-ips"
|
|
=> [
|
|
"usage: lowlevel-ls-host-ips",
|
|
"List cluster member names and IP addresses.",
|
|
\&lowlevel_ls_host_ips,
|
|
],
|
|
"lowlevel-set-host-ip"
|
|
=> [
|
|
"usage: lowlevel-set-host-ip <hostname> [<new_ip>]",
|
|
"Set IP address <new_ip> for host.",
|
|
"When <new_ip> is not given, try to determine the old address",
|
|
"from the symlink tree, or from old backups.",
|
|
"Often, you want to set a new IP address in place of an old one.",
|
|
"Hint: you may also use the --ip-<hostname>=<new_ip> option.",
|
|
\&lowlevel_set_host_ip,
|
|
],
|
|
"lowlevel-delete-host"
|
|
=> [
|
|
"usage: lowlevel-delete-host <hostname>",
|
|
"Delete cluster member.",
|
|
\&lowlevel_delete_host,
|
|
],
|
|
|
|
# systemd interface
|
|
"systemd-trigger"
|
|
=> [
|
|
\&systemd_trigger_extern,
|
|
],
|
|
"systemd-trigger-extern"
|
|
=> [
|
|
\&systemd_trigger_extern,
|
|
],
|
|
);
|
|
|
|
|
|
sub helplist {
|
|
my $msg = shift;
|
|
print "ERROR: $msg" if ($msg);
|
|
print "
|
|
Thorough documentation is in mars-user-manual.pdf. Please use the PDF manual
|
|
as authoritative reference! Here is only a short summary of the most
|
|
important sub-commands / options:
|
|
|
|
marsadm [<global_options>] <command> [<resource_names> | all | <args> ]
|
|
marsadm [<global_options>] view[-<macroname>] [<resource_names> | all ]
|
|
|
|
<global_option> =
|
|
--force
|
|
Skip safety checks.
|
|
Use this only when you really know what you are doing!
|
|
Warning! This is dangerous! First try --dry-run.
|
|
Not combinable with 'all'.
|
|
--ignore-sync
|
|
Allow primary handover even when some sync is running somewhere.
|
|
This is less rude than --force because it checks for all else
|
|
preconditions.
|
|
--dry-run
|
|
Don't modify the symlink tree, but tell what would be done.
|
|
Use this before starting potentially harmful actions such as
|
|
'delete-resource'.
|
|
--verbose
|
|
Increase speakyness of some commands.
|
|
--parallel
|
|
Only resonable when combined with \"all\".
|
|
For each resource, fork() a sub-process running independently
|
|
from other resources. May seepd up handover a lot.
|
|
However, several cluster managers are known to have problems
|
|
with a high parallelism degree (up to deadlocks).
|
|
Only use this after thorough testing in combination with your
|
|
whole operation stack!
|
|
Turns off --singlestep.
|
|
--parallel=<number>
|
|
Like --parallel, but limit the parallelism degree to the given
|
|
number of parallel processes.
|
|
Turns off --singlestep.
|
|
--singlestep
|
|
Debugging aid for multi-phase commands.
|
|
Interactively step through the various phases of commands.
|
|
Turns off --parallel.
|
|
--error-injection-phase=<number>
|
|
Only for testing. NEVER use in production.
|
|
--delete-method=<code>
|
|
EXPERIMENTAL! Only for testing! This option will disappear again!
|
|
<code> == 0: Use new deletion method
|
|
<code> == 1: Use old deletion method
|
|
default is 1 for compatibility.
|
|
--logger=/path/to/usr/bin/logger
|
|
Use an alternative syslog messenger.
|
|
When empty, disable syslogging.
|
|
--max-deletions=<number>
|
|
When your network or your firewall rules are defective over a
|
|
longer time, too many deletion links may accumulate at
|
|
/mars/todo-global/delete-* and sibling locations.
|
|
This limit is preventing overflow of the filesystem as well
|
|
as overloading the worker threads.
|
|
--thresh-logfiles=<number>
|
|
--thresh-logsize=<number>
|
|
Prevention of too many small logfiles when secondaries are not
|
|
catching up. When more than thresh-logfiles are already present,
|
|
the next one is only created when the last one has at least
|
|
size thresh-logsize (in units of GB).
|
|
--timeout=<seconds>
|
|
Current default: $timeout
|
|
Abort safety checks and waiting loops after timeout with an error.
|
|
When giving 'all' as resource agument, this works for each
|
|
resource independently.
|
|
The special value -1 means \"infinite\".
|
|
--window=<seconds>
|
|
Current default: $window
|
|
Treat other cluster nodes as healthy when some communcation has
|
|
occured during the given time window.
|
|
--keep-backups=<hours>
|
|
link-purge-all and cron will delete old backup files and old
|
|
symlinks after this number of hours.
|
|
Current default: $keep_backups
|
|
--threshold=<bytes>
|
|
Some macros like 'fetch-threshold-reached' use this for determining
|
|
their sloppyness.
|
|
--systemd-enable=<0|1>
|
|
Enable / disable any systemd actions.
|
|
On by default.
|
|
--host=<hostname>
|
|
Act as if the command was running on cluster node <hostname>.
|
|
Warning! This is dangerous! First try --dry-run
|
|
--backup-dir=</absolute_path>
|
|
Only for experts.
|
|
Used by several special commands like merge-cluster, split-cluster
|
|
etc for creating backups of important data.
|
|
--ip-<peer>=<ip>
|
|
Override the IP address of <peer> from the symlink tree, or as determined
|
|
from old IP backups, or as determined from the list of network interfaces.
|
|
Usually you will need this only at 'create-cluster' or
|
|
'join-cluster' / 'merge-cluster' / 'split-cluster' for resolving
|
|
ambiguities, or for telling the IP address of yet unknown peers.
|
|
It is also useful at 'lowlevel-set-host-ip' for updating an
|
|
already existing IP address.
|
|
Hint: this option may be given multiple times for different <peer>
|
|
parts.
|
|
--ip=<ip>
|
|
Equivalent to --peer-\$host=<ip>
|
|
where \$host is usually the same as \$(hostname), but you may
|
|
use --host=<hostname> as an _earlier_ argument for overriding
|
|
the default <hostname>.
|
|
--ssh-port=<port_nr>
|
|
Override the default ssh port (22) for ssh and rsync.
|
|
Useful for running {join,merge}-cluster on non-standard ssh ports.
|
|
--no-ssh
|
|
Equivalent to --ssh-port=0
|
|
Disable ssh and rsync completely.
|
|
Dead peers / interrupted networks / firewalling may lead to (temporary)
|
|
hangs of ssh probes, which are used by default for backwards compatibility.
|
|
Hint: ssh_config options like ConnectTimeout may also help.
|
|
Use this to disable any probes, and no time loss.
|
|
--ssh-opts=\"<ssh_commandline_options>\"
|
|
Override the default ssh commandline options. Also used for rsync.
|
|
--macro=<text>
|
|
Handy for testing short macro evaluations at the command line.
|
|
|
|
<command> =
|
|
";
|
|
my $item_count = 0;
|
|
foreach my $cmdname (sort alphanum_cmp keys(%cmd_table)) {
|
|
my $list = $cmd_table{$cmdname};
|
|
next unless ref($list) eq "ARRAY";
|
|
my @copy = @$list;
|
|
my $line_count = 0;
|
|
my $verb_level = 0;
|
|
while (my $txt = shift @copy) {
|
|
last if ref($txt) eq "CODE";
|
|
next if ref($txt) eq "ARRAY";
|
|
my $skip_this = 0;
|
|
if ($txt =~ m/^verbose *([0-9]+)/) {
|
|
$verb_level = $1;
|
|
$skip_this = 1;
|
|
}
|
|
if ($verbose < 0) {
|
|
last if $verb_level != -$verbose;
|
|
} else {
|
|
last if $verb_level > $verbose;
|
|
}
|
|
next if $skip_this;
|
|
if (!$line_count) {
|
|
print "\n" if $item_count++;
|
|
print " $cmdname\n";
|
|
print " usage: $cmdname <resource_name>\n" if $txt !~ m/usage:/;
|
|
}
|
|
print " $txt\n";
|
|
$line_count++;
|
|
}
|
|
}
|
|
|
|
print "
|
|
<resource_names> = comma-separated list of resource names or \"all\" for all resources
|
|
";
|
|
|
|
print "
|
|
|
|
<macroname> = <complex_macroname> | <primitive_macroname>
|
|
|
|
<complex_macroname> =
|
|
";
|
|
foreach my $macroname (sort alphanum_cmp keys(%complex_macros)) {
|
|
print " $macroname\n";
|
|
}
|
|
|
|
print "
|
|
|
|
<primitive_macroname> =
|
|
";
|
|
foreach my $glob (sort alphanum_cmp keys(%trivial_globs)) {
|
|
my $txt = $trivial_globs{$glob};
|
|
print " $glob\n";
|
|
print " $txt\n" if $txt;
|
|
}
|
|
|
|
print "
|
|
|
|
<features> =
|
|
";
|
|
my $nr = 0;
|
|
foreach my $flagname (sort alphanum_cmp keys(%FEATURES_FLAGS)) {
|
|
print " |\n" if ($nr++);
|
|
print " $flagname";
|
|
}
|
|
print "\n";
|
|
|
|
exit 0;
|
|
}
|
|
|
|
my @args;
|
|
|
|
foreach my $arg (@ARGV) {
|
|
if ($arg eq "--force" || $arg eq "-f") {
|
|
$force++;
|
|
next;
|
|
} elsif ($arg eq "--ignore-sync") {
|
|
$ignore_sync++;
|
|
next;
|
|
} elsif ($arg eq "--dry-run" || $arg eq "-d") {
|
|
$dry_run++;
|
|
next;
|
|
} elsif ($arg =~ m/--max-deletions\s*=\s*(-?[0-9]+)/) {
|
|
$max_deletions = $1;
|
|
next;
|
|
} elsif ($arg =~ m/--thresh-logfiles\s*=\s*([0-9]+)/) {
|
|
$thresh_logfiles = $1;
|
|
next;
|
|
} elsif ($arg =~ m/--thresh-logsize\s*=\s*([0-9]+)/) {
|
|
$thresh_logsize = $1;
|
|
next;
|
|
} elsif ($arg =~ m/--parallel(\s*=\s*(-?[0-9]+)?)?/) {
|
|
$parallel = defined($2) ? $2 : 0;
|
|
$single_step = 0;
|
|
next;
|
|
} elsif ($arg eq "--singlestep") {
|
|
$single_step++;
|
|
$parallel = -1;
|
|
next;
|
|
} elsif ($arg =~ m/--error-injection-phase\s*=\s*([0-9]+)/) {
|
|
$inject_phase = $1;
|
|
next;
|
|
} elsif ($arg =~ m/--delete-method\s*=\s*([0-9]+)/) {
|
|
$compat_deletions = $1;
|
|
my $compat_path = "$mars/compat-deletions";
|
|
my $old = get_link($compat_path, 2);
|
|
if (!defined($old) || $old ne $compat_deletions) {
|
|
if (-d "/mars" && !$dry_run) {
|
|
lprint "Setting '$compat_path' to '$compat_deletions'\n";
|
|
set_link($compat_deletions, $compat_path);
|
|
finish_links();
|
|
} else {
|
|
lwarn "Cannot set '$compat_path' to '$compat_deletions'\n";
|
|
}
|
|
}
|
|
next;
|
|
} elsif ($arg =~ s/--verbose\s*=\s*(-?[0-9]+)/$1/) {
|
|
$verbose = $arg;
|
|
next;
|
|
} elsif ($arg eq "--verbose" || $arg eq "-v") {
|
|
$verbose++;
|
|
next;
|
|
} elsif ($arg =~ s/--logger\s*=\s*(.*)/$1/) {
|
|
$logger = $arg;
|
|
next;
|
|
} elsif ($arg =~ s/--timeout\s*=\s*([0-9]+)/$1/) {
|
|
$timeout = $arg;
|
|
next;
|
|
} elsif ($arg =~ s/--window\s*=\s*([0-9]+)/$1/) {
|
|
$window = $arg;
|
|
next;
|
|
} elsif ($arg =~ s/--keep-backups\s*=\s*([0-9]+)/$1/) {
|
|
$keep_backups = $arg;
|
|
next;
|
|
} elsif ($arg =~ s/--threshold\s*=\s*([0-9]+)/$1/) {
|
|
$threshold = get_size($arg);
|
|
next;
|
|
} elsif ($arg =~ s/--systemd-enable\s*=\s*([0-9]+)?/$1/) {
|
|
$systemd_enabled = $1;
|
|
$cm3_checked = 1;
|
|
next;
|
|
} elsif ($arg =~ s/--host\s*=\s*([-_A-Za-z0-9]+)/$1/) {
|
|
check_id($arg, 1);
|
|
if ($arg ne $host) {
|
|
lprint "ATTENTION: acting as if I were host '$arg'\n";
|
|
lwarn "some commands require local knowledge not available here.\n";
|
|
lwarn "thus something may fail or go wrong - use this at your risk!\n";
|
|
$host = $arg;
|
|
}
|
|
next;
|
|
} elsif ($arg =~ m/--backup-dir\s*=\s*(\/[^\s\\:;|<>]+)/) {
|
|
$backup_dir = $1;
|
|
system("mkdir -p $backup_dir") and ldie "Cannot create backup directory '$backup_dir'\n";
|
|
next;
|
|
} elsif ($arg =~ m/--ip(-(.*?))?\s*=\s*([0-9.:\[\]]+)/) {
|
|
my $peer = $2;
|
|
my $ip = $3;
|
|
$peer = $host unless $peer;
|
|
lprint_stderr "Using IP '$ip' from command line for '$peer'.\n";
|
|
$known_ips{$peer} = $ip;
|
|
next;
|
|
} elsif ($arg =~ m/--no-ssh/) {
|
|
$ssh_port = 0;
|
|
next;
|
|
} elsif ($arg =~ s/--ssh[-_]port\s*=\s*([0-9]+)/$1/) {
|
|
$ssh_port = $arg;
|
|
lprint_stderr "Using SSH port '$ssh_port' from command line.\n";
|
|
next;
|
|
} elsif ($arg =~ s/--ssh[-_]opts\s*=\s*(.*)/$1/) {
|
|
$ssh_opts = $arg;
|
|
lprint_stderr "Using SSH options '$ssh_opts' from command line.\n";
|
|
next;
|
|
} elsif ($arg =~ s/--macro\s*=\s*(.*)/$1/) {
|
|
$macro = $arg;
|
|
$macro =~ s/\\n/\n/mg;
|
|
next;
|
|
} elsif ($arg =~ m/^--help$/ || $arg =~ m/^-h$/) {
|
|
helplist;
|
|
next;
|
|
} elsif ($arg =~ m/^--version$/ || $arg =~ m/^-v$/) {
|
|
version;
|
|
next;
|
|
} elsif ($arg =~ m/^-(.*)/) {
|
|
ldie "unrecognized argument '-$1' (bad syntax)\n";
|
|
}
|
|
if ($arg =~ s/^force-//) {
|
|
$force++;
|
|
}
|
|
push @args, $arg;
|
|
}
|
|
|
|
my $cmd = shift @args || helplist "command argument is missing\n";
|
|
|
|
if ($cmd =~ m/^help$/ || $cmd =~ m/^h$/) {
|
|
helplist;
|
|
}
|
|
|
|
if ($cmd =~ m/^version$/ || $cmd =~ m/^v$/) {
|
|
version;
|
|
}
|
|
|
|
ldie "only root may use this tool\n" if $< != 0 && $cmd !~ m/^(cat|view.*|pretty.*)$/; # getpid() seems to be missing in perlfunc
|
|
helplist "unknown command $cmd\n" if (!exists $cmd_table{$cmd} && !$cmd =~ m/view/);
|
|
|
|
# setup syslogging
|
|
|
|
if ($cmd !~ m/^(version$|v$|view)/ && -x $logger) {
|
|
$notify = "(cmd: $cmd)";
|
|
my $print_id = $Id;
|
|
$print_id =~ s/\$|Id:| //g;
|
|
$print_id = substr($print_id, 0, 8);
|
|
llog "$print_id $host $0 @ARGV\n";
|
|
}
|
|
|
|
# checks
|
|
|
|
ldie "The $mars directory does not exist.\n" unless -d $mars;
|
|
|
|
my @mars_stat = stat($mars) or ldie "Cannot stat '$mars'\n";
|
|
# check uid
|
|
if ($mars_stat[4]) {
|
|
lwarn "Directory $mars has wrong owner uid $mars_stat[4].\n";
|
|
chown(0, 0, $mars) or ldie "Cannot chown $mars.\n";
|
|
lwarn "Fixed by setting to 0.\n";
|
|
}
|
|
# check permissions
|
|
my $perms = ($mars_stat[2] & 07777);
|
|
if ($perms != 0700) {
|
|
my $txt = sprintf("Directory $mars has wrong permissions 0%o.\n", $perms);
|
|
lwarn($txt);
|
|
chmod(0700, $mars) or ldie "Cannot chmod $mars.\n";
|
|
lwarn "Fixed to mode 0700 for security reasons.\n";
|
|
}
|
|
|
|
# lowlevel tools
|
|
if ($cmd =~ m/^lowlevel-/) {
|
|
my $func = $cmd_table{$cmd};
|
|
if ($func && ref($func) eq "ARRAY") {
|
|
lwarn "EXPERTS ONLY -- risky lowlevel command '$cmd'\n";
|
|
my @list = @$func;
|
|
while (@list) {
|
|
my $memb_func = shift @list;
|
|
next unless ref($memb_func) eq "CODE";
|
|
&{$memb_func}($cmd, @args);
|
|
}
|
|
finish_links();
|
|
exit(0);
|
|
} else {
|
|
ldie "Internal error: command table is wrong for '$cmd'";
|
|
}
|
|
}
|
|
|
|
if ($cmd !~ m/(create|join)-cluster|create-uuid|systemd|cat|view|pretty/) {
|
|
my $uuid = get_link("$mars/uuid", 1);
|
|
ldie "No valid cluster UUID $mars/uuid found. You need {create,join}-cluster first (or create-uuid).\n" unless $uuid;
|
|
}
|
|
|
|
my $res = "";
|
|
if ($cmd =~ "show|cron") {
|
|
$res = shift @args || "all";
|
|
} elsif ($cmd =~ m/^(view|pretty)/) {
|
|
$res = shift @args || "";
|
|
} elsif ($cmd =~ m/^set-.*-list$/) {
|
|
$res = shift @args || helplist "comma-separated list argument is missing\n";
|
|
} elsif ($cmd =~ m/^set-.*-value$/) {
|
|
$res = shift @args || helplist "numeric argument is missing\n";
|
|
ldie "argument '$res' isn't numeric\n" unless $res =~ m/^[0-9.]+$/;
|
|
} elsif ($cmd =~ m/^set-global-/) {
|
|
$res = "";
|
|
} elsif ($cmd =~ m/^set-/) {
|
|
$res = shift @args || helplist "resource argument is missing\n";
|
|
check_id_list($res);
|
|
} elsif ($cmd =~ m/^(join|merge)-cluster$/) {
|
|
$res = shift @args || helplist "peer argument is missing\n";
|
|
} elsif (!($cmd =~ m/^(create|split|leave|wait|update)-cluster|merge-cluster-list|create-uuid|cat|[a-z]+-file|trigger/)) {
|
|
$res = shift @args || helplist "resource argument is missing\n";
|
|
check_id_list($res);
|
|
}
|
|
|
|
|
|
lwarn "Using FORCE option -- hopefully you know what you are doing!\n" if $force;
|
|
|
|
my %checked_res;
|
|
|
|
sub do_one_res {
|
|
my $func = shift;
|
|
my ($cmd, $res) = @_;
|
|
if ($inject_phase && $phase_nr == $inject_phase) {
|
|
ldie "ERROR INJECTION cmd='$cmd' res='$res' phase='$phase_nr'\n";
|
|
}
|
|
if ($cmd =~ m/^cat|^set-global-|-file$|-list$|-link$|-value$/) { # no resource argument
|
|
} elsif (!$checked_res{"$cmd$res"}) {
|
|
$res = check_res($res) unless (!$res || $cmd =~ m/^(join|create|merge|leave|wait)-cluster|(create|join)-resource|show/);
|
|
check_res_member($cmd, $res) unless (!$res || $cmd =~ m/^(join|create|delete)-(cluster|resource)|^(merge|leave|wait)-cluster|activate-guest|-purge-|^show|^view/);
|
|
detect_splitbrain($res, 1);
|
|
$checked_res{"$cmd$res"} = 1;
|
|
}
|
|
call_hook(!$force, "pre", @_);
|
|
my $status = &{$func}(@_);
|
|
call_hook(!$force, "post", @_);
|
|
return $status;
|
|
}
|
|
|
|
sub expand_res_list {
|
|
my ($cmd, $res) = @_;
|
|
my @res_list=();
|
|
if ($res eq "all" && $cmd !~ m/show|cat|cluster|set-link|delete-file/) {
|
|
@res_list = get_any_resources($host);
|
|
} elsif ($res =~ m/,/) {
|
|
@res_list = split(",", $res);
|
|
}
|
|
# check for any systemd activations once
|
|
if ($parallel <= -999) {
|
|
$parallel = -998;
|
|
my $count = scalar(@res_list);
|
|
if ($count > 1) {
|
|
my $systemd_activated = 0;
|
|
foreach my $this_res (@res_list) {
|
|
if (systemd_present($cmd, $this_res)) {
|
|
$systemd_activated++;
|
|
last;
|
|
}
|
|
}
|
|
if ($systemd_activated) {
|
|
lprint "Systemd is activated, acting like --parallel=0\n" if $verbose;
|
|
$parallel = 0;
|
|
}
|
|
}
|
|
}
|
|
return sort alphanum_cmp @res_list;
|
|
}
|
|
|
|
sub do_all_res {
|
|
my $func = shift;
|
|
my $do_abort = shift;
|
|
my $cmd = shift;
|
|
my $res = shift;
|
|
my @res_list = expand_res_list($cmd, $res);
|
|
if (@res_list) {
|
|
ldie "Cannot combine command '$cmd' with 'all' existing resources - you must explicitly name a single new resource\n" if $cmd =~ m/create|join/;
|
|
my $any_success = 0;
|
|
my $any_fail = 0;
|
|
my $any_member = 0;
|
|
call_hook(!$force, "all-pre", $cmd, "all", @_) if $do_abort;
|
|
foreach $res (@res_list) {
|
|
$any_member++;
|
|
$res =~ s/^.*\/resource-(.*)$/$1/;
|
|
next if defined($skip_res{$res});
|
|
if ($verbose || $cmd !~ m/^log-/) {
|
|
my $tpl = get_macro("default-resource");
|
|
my $hint = eval_macro($cmd, $res, $tpl, @_);
|
|
my $type = "guest";
|
|
$type = "resource" if is_member($res, $host);
|
|
lprint "--------- $type $hint\n";
|
|
}
|
|
if (!$do_abort) {
|
|
# LOOP RETRY mode
|
|
# Retry when exit code is not 0
|
|
# catch internal ldie() via eval{} failure
|
|
my $status = 1;
|
|
my $has_died = 0;
|
|
eval {
|
|
$status = do_one_res($func, $cmd, $res, @_);
|
|
1;
|
|
} or
|
|
# eval{} has failed
|
|
(
|
|
$has_died = 1
|
|
);
|
|
$any_fail++ if $status;
|
|
# Only ldie() implies future skipping of this resource.
|
|
if ($has_died && !$skip_res{$res}) {
|
|
$any_fail++;
|
|
$skip_res{$res} = 1;
|
|
fail_action($cmd, $res);
|
|
}
|
|
next;
|
|
}
|
|
# NO LOOP RETRY
|
|
# Any non-zero exit code means to skip this resource in future.
|
|
# catch internal ldie() via eval{} failure
|
|
my $has_died = 0;
|
|
eval {
|
|
do_one_res($func, $cmd, $res, @_);
|
|
1;
|
|
} and
|
|
# eval{} has succeeded
|
|
$any_success = 1 or
|
|
# eval{} has failed
|
|
(
|
|
$has_died = 1
|
|
);
|
|
if ($has_died && !$skip_res{$res}) {
|
|
$skip_res{$res} = 1;
|
|
fail_action($cmd, $res);
|
|
}
|
|
}
|
|
return $any_fail unless $do_abort;
|
|
if (!$any_success) {
|
|
if (!$any_member) {
|
|
lprint "I am not member/guest of any resource\n";
|
|
return 1;
|
|
}
|
|
ldie "all resources have errors\n";
|
|
}
|
|
call_hook(!$force, "all-post", $cmd, "all", @_);
|
|
return !$any_success;
|
|
} elsif ($res eq "all") {
|
|
lwarn "resource qualifier 'all' does not match any resource or guest names\n";
|
|
return 0;
|
|
} elsif (!$do_abort) {
|
|
return do_one_res($func, $cmd, $res, @_);
|
|
} else {
|
|
my $has_died = 0;
|
|
my $status = 0;
|
|
eval {
|
|
$status = do_one_res($func, $cmd, $res, @_);
|
|
1;
|
|
} or
|
|
# eval{} has failed
|
|
(
|
|
$has_died = 1
|
|
);
|
|
if ($has_died && !$skip_res{$res}) {
|
|
fail_action($cmd, $res);
|
|
$status = 1;
|
|
}
|
|
return $status;
|
|
}
|
|
}
|
|
|
|
if ($cmd =~ m/^(view|pretty)/) {
|
|
lwarn "mars kernel module is not loaded\n" unless is_module_loaded();
|
|
if ($res eq "all" && $cmd =~ m/^view-?(.*)/) {
|
|
my $global_macro_name = $1 ? "$1-header" : "default-header";
|
|
my $global_macro = get_macro($global_macro_name, 1);
|
|
view_cmd($global_macro_name, "", @args) if $global_macro;
|
|
}
|
|
do_all_res(\&view_cmd, 1, $cmd, $res, @args);
|
|
if ($res eq "all" && $cmd =~ m/^view-?(.*)/) {
|
|
my $prefix = $1;
|
|
foreach my $suffix ("global", "footer") {
|
|
my $global_macro_name = $prefix ? "${prefix}-$suffix" : "default-$suffix";
|
|
my $global_macro = get_macro($global_macro_name, 1);
|
|
view_cmd($global_macro_name, "", @args) if $global_macro;
|
|
}
|
|
}
|
|
finish_links();
|
|
# report any dead peers
|
|
get_alive_links("all", "alive", "*", 1, 1);
|
|
exit($error_count);
|
|
}
|
|
|
|
my $func = $cmd_table{$cmd};
|
|
ldie "unknown command '$cmd'\n" unless $func;
|
|
|
|
my %kid_res;
|
|
|
|
sub wait_pid_list {
|
|
my @pid_list = @_;
|
|
foreach my $pid (@pid_list) {
|
|
my $check_pid = waitpid($pid, 0);
|
|
my $status = $?;
|
|
my $sub_res = $kid_res{$pid};
|
|
if ($status > 0) {
|
|
lwarn "RESOURCE $sub_res CHILD $pid terminated with status=$status\n";
|
|
$error_count++ if $status;
|
|
} elsif ($check_pid == $pid) {
|
|
lprint_stderr "RESOURCE $sub_res CHILD $pid terminated successfully\n";
|
|
} else {
|
|
lwarn "RESOURCE $sub_res CHILD $pid terminated with unknown state\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
sub wait_any_pid {
|
|
my ($flags) = @_;
|
|
lprint_stderr "WAITING for termination of a child...\n";
|
|
my $pid = waitpid(-1, $flags);
|
|
my $status = $?;
|
|
if ($pid > 0) {
|
|
my $sub_res = $kid_res{$pid};
|
|
if ($status != 0) {
|
|
lwarn "RESOURCE $sub_res CHILD $pid terminated with status=$status\n";
|
|
$error_count++ if $status;
|
|
} else {
|
|
lprint_stderr "RESOURCE $sub_res CHILD $pid terminated successfully\n";
|
|
}
|
|
}
|
|
return $pid;
|
|
}
|
|
|
|
sub do_fork {
|
|
my @res_list = expand_res_list($cmd, $res);
|
|
my $child_count = 0;
|
|
# only fork() when beneficial
|
|
if (@res_list && scalar(@res_list) > 1) {
|
|
lprint_stderr "FORKING (error_count=$error_count)\n";
|
|
$error_count = 0;
|
|
foreach my $child_res (@res_list) {
|
|
# when necessary, limit the parallelism degree
|
|
if ($parallel > 0 && $child_count >= $parallel) {
|
|
my $done_pid = wait_any_pid(0);
|
|
if ($done_pid > 0) {
|
|
$child_count--;
|
|
delete $kid_res{$done_pid};
|
|
}
|
|
}
|
|
my $pid = fork();
|
|
ldie "Cannot fork()\n" unless defined($pid);
|
|
if ($pid) {
|
|
lprint_stderr "RESOURCE $child_res starting CHILD $pid\n";
|
|
$child_count++;
|
|
$kid_res{$pid} = $child_res;
|
|
} else {
|
|
# child: simply continue with new $res
|
|
$res = $child_res;
|
|
$child_count = 0;
|
|
$child_prefix = "CHILD $child_res: ";
|
|
if ($skip_res{$res}) {
|
|
ldie "SKIPPING\n";
|
|
}
|
|
lprint_stderr "STARTING\n";
|
|
last;
|
|
}
|
|
}
|
|
}
|
|
if ($child_count) {
|
|
my @wait_list = sort alphanum_cmp keys(%kid_res);
|
|
wait_pid_list(@wait_list);
|
|
lprint_stderr "EXIT $error_count\n";
|
|
exit($error_count);
|
|
}
|
|
}
|
|
|
|
if (ref($func) eq "ARRAY") {
|
|
my @list = @$func;
|
|
while (@list) {
|
|
my $do_loop = 0;
|
|
my $memb_func = shift @list;
|
|
# check whether fork() is possible
|
|
if ($memb_func) {
|
|
if ($memb_func eq "PARALLEL") {
|
|
if ($parallel < -99) {
|
|
lprint "Treating as --parallel=0\n" if $verbose;
|
|
$parallel = 0;
|
|
}
|
|
next;
|
|
}
|
|
if ($memb_func eq "FORK") {
|
|
$memb_func = shift @list;
|
|
# check whether fork() is requested
|
|
if ($parallel >= 0) {
|
|
do_fork();
|
|
}
|
|
}
|
|
# check whether busy-waiting loop is requested
|
|
if ($memb_func eq "LOOP") {
|
|
$memb_func = shift @list;
|
|
$do_loop++;
|
|
} elsif ($memb_func eq "SLEEP") {
|
|
$memb_func = shift @list;
|
|
sleep(7);
|
|
}
|
|
}
|
|
# nested arrays may be used for _global_ workers
|
|
if (ref($memb_func) eq "ARRAY") {
|
|
my @sub_list = @$memb_func;
|
|
foreach my $sub_func (@sub_list) {
|
|
next unless ref($sub_func) eq "CODE";
|
|
&{$sub_func}($cmd, $res, @args);
|
|
}
|
|
}
|
|
next unless ref($memb_func) eq "CODE";
|
|
my $headline = shift @list;
|
|
$phase_nr++;
|
|
lprint "-------- PHASE $phase_nr -------- $headline:\n" if defined($headline);
|
|
$headline = "" unless defined($headline);
|
|
if ($single_step) {
|
|
ldie "DEBUGGING: standard input is no tty\n" unless -t STDIN;
|
|
lprint "DEBUGGING: type RETURN for starting phase $phase_nr: ";
|
|
my $dummy = <STDIN>;
|
|
lprint "DEBUGGING: continue phase $phase_nr: $headline\n";
|
|
}
|
|
my $start_time = mars_time();
|
|
my $do_abort = !$do_loop;
|
|
my $status;
|
|
my $count = 0;
|
|
for (;;) {
|
|
$status = do_all_res($memb_func, $do_abort, $cmd, $res, @args);
|
|
last if !$do_loop;
|
|
last if (!defined($status) || !$status);
|
|
# we have a busy wait condition
|
|
$count++;
|
|
lprint "--- status='$status' check again LOOP $count\n" if $verbose;
|
|
sleep(1);
|
|
my $now = mars_time();
|
|
if ($now - $start_time > $timeout) {
|
|
lwarn "Condition '$headline' for resources '$res' not reached withing $timeout s\n";
|
|
last;
|
|
}
|
|
}
|
|
finish_links();
|
|
if (defined($status) && $status) {
|
|
lprint_stderr "STATUS='$status'\n" if $verbose;
|
|
last;
|
|
}
|
|
}
|
|
} elsif (ref($func) eq "CODE") {
|
|
do_all_res($func, 1, $cmd, $res, @args);
|
|
} else {
|
|
ldie "internal error: command table is wrong for '$cmd'";
|
|
}
|
|
|
|
finish_links();
|
|
|
|
# do not disturb stdout of informational commands
|
|
lprint_stderr "EXIT $error_count\n" if $verbose;
|
|
|
|
exit($error_count);
|