marsadm: safeguard overflow of deletion links

This commit is contained in:
Thomas Schoebel-Theuer 2017-09-08 08:06:41 +02:00
parent 1ad5de090d
commit 28d5b0e5d1
1 changed files with 22 additions and 0 deletions

View File

@ -34,6 +34,7 @@ umask 0077;
my $threshold = 10 * 1024 * 1024; my $threshold = 10 * 1024 * 1024;
my $window = 30; my $window = 30;
my $verbose = 0; my $verbose = 0;
my $max_deletions = 2000;
my $dry_run = 0; my $dry_run = 0;
my @MARS_PATH = $ENV{MARS_PATH} ? my @MARS_PATH = $ENV{MARS_PATH} ?
split(/:/, $ENV{MARS_PATH}) : split(/:/, $ENV{MARS_PATH}) :
@ -2119,6 +2120,18 @@ sub _get_delete_nr {
$path =~ m:/delete-0*([0-9]+):; $path =~ m:/delete-0*([0-9]+):;
$max_nr = $1 if (defined($1) && $1 > $max_nr); $max_nr = $1 if (defined($1) && $1 > $max_nr);
} }
my $nr_links = $max_nr - $min_nr;
if ($nr_links > $max_deletions / 2) {
$verbose = 1 if $verbose <= 0;
lwarn "Too many deletion links have accumulated into directory $basedir/.\n";
lwarn "Probably your networking / your firewall rules / another setup problem is causing this, and your monitoring does not notice it.\n";
lwarn "Please fix it.\n";
lwarn "When necessary, remove $basedir/delete-* link by hand.\n";
if ($nr_links > $max_deletions) {
lwarn "URGENT: please fix it.\n";
ldie "Aborting for safety reasons\n" unless $force;
}
}
$delete_nrs{$basedir} = [$min_nr, $max_nr]; $delete_nrs{$basedir} = [$min_nr, $max_nr];
return @{$delete_nrs{$basedir}}; return @{$delete_nrs{$basedir}};
} }
@ -5186,6 +5199,12 @@ marsadm [<global_options>] view[-<macroname>] [<resource_name> | all ]
--logger=/path/to/usr/bin/logger --logger=/path/to/usr/bin/logger
Use an alternative syslog messenger. Use an alternative syslog messenger.
When empty, disable syslogging. When empty, disable syslogging.
--max-deletions=<number>
When your network or your firewall rules are defective over a
longer time, too many deletion links may accumulate at
/mars/todo-global/delete-* and sibling locations.
This limit is preventing overflow of the filesystem as well
as overloading the worker threads.
--timeout=<seconds> --timeout=<seconds>
Abort safety checks after timeout with an error. Abort safety checks after timeout with an error.
When giving 'all' as resource agument, this works for each When giving 'all' as resource agument, this works for each
@ -5280,6 +5299,9 @@ foreach my $arg (@ARGV) {
} elsif ($arg eq "--dry-run" || $arg eq "-d") { } elsif ($arg eq "--dry-run" || $arg eq "-d") {
$dry_run++; $dry_run++;
next; next;
} elsif ($arg =~ m/--max-deletions\s*=\s*(-?[0-9]+)/) {
$max_deletions = $1;
next;
} elsif ($arg =~ s/--verbose\s*=\s*(-?[0-9]+)/$1/) { } elsif ($arg =~ s/--verbose\s*=\s*(-?[0-9]+)/$1/) {
$verbose = $arg; $verbose = $arg;
next; next;