mirror of https://github.com/schoebel/mars
monitoring: update doc, monitoring and mars-status
This commit is contained in:
parent
8429aaf773
commit
3ec20ba8e5
|
@ -3,11 +3,6 @@
|
|||
#
|
||||
# last update at now ...
|
||||
|
||||
### TODO:
|
||||
### - check em-mode
|
||||
### - check join/leave cluster/resource
|
||||
|
||||
|
||||
###
|
||||
use warnings;
|
||||
use strict;
|
||||
|
@ -20,23 +15,19 @@ use File::Basename;
|
|||
binmode STDOUT, ":utf8";
|
||||
|
||||
### defaults
|
||||
my $version = "0.072q";
|
||||
my $version = "0.073";
|
||||
my $alife_timeout = "30"; # sec for remote-nodes timeout
|
||||
my $is_tty = 0;
|
||||
my $mars_dir = '/mars';
|
||||
my $himself = `uname -n` or die "cannot determine my network node name\n";
|
||||
my $clearscreen = `clear`;
|
||||
my $StatusCode = 'UpToDate';
|
||||
my @StatusText = ();
|
||||
my $NodeStatusCode = 'UpToDate';
|
||||
my @NodeStatusText = ();
|
||||
my $MarsTreeVer = 0.1;
|
||||
chomp $himself;
|
||||
|
||||
### ARGV
|
||||
# Optionen in Hash-Ref parsen
|
||||
my $params = {};
|
||||
GetOptions( $params, 'help', 'h', 'version', 'v', 'monitor', 'role', 'cstate', 'dstate', 'resource=s', 'system', 'interval=i', 'history', 'ascii', 'debug' );
|
||||
GetOptions( $params, 'help', 'h', 'version', 'v', 'resource=s', 'system', 'interval=i', 'history', 'ascii', 'debug' );
|
||||
|
||||
#########################################################################################
|
||||
### terminal settings
|
||||
|
@ -66,21 +57,15 @@ sub display_help {
|
|||
print "$HelpText\n\n" if ($HelpText);
|
||||
print "Usage: mars-status [--help]\n";
|
||||
print "Usage: mars-status [--version]\n";
|
||||
print "Usage: mars-status (without specification of parameters, an abstract of all the informations spent)\n";
|
||||
print "Usage: mars-status (without specification of parameters, an abstract of all the information sent)\n";
|
||||
print "Usage: mars-status [--resource <RESNAME>] [--interval <seconds>] | [--history] | [--debug ] | [--system]\n";
|
||||
print "Usage: mars-status [--resource <RESNAME>] --monitor \n";
|
||||
print "Usage: mars-status --resource <RESNAME> [--role | --cstate | --dstate]\n";
|
||||
print " --resource : limits the display to the specified resource\n";
|
||||
print " --interval : refreshes the display every second xxx\n";
|
||||
print " --history : shows information about the log files, version numbers and their status\n";
|
||||
print " --ascii : display history in ascii code letters\n";
|
||||
print " --system : display mars-system informations\n";
|
||||
print " --monitor : indicator to use for monitoring on all state (by local node only !)\n";
|
||||
print " --role|--cstate|--dstate single state on lokal node\n";
|
||||
print " --debug : additional display debug messages\n\n";
|
||||
print "Usage small include rotate : mars-status --interval 2\n";
|
||||
print "Usage monitoring : mars-status --monitor\n";
|
||||
print "Usage monitoring drbd-linke : mars-status --cstate (or --dstate or --role)\n";
|
||||
print "Usage small include refresh : mars-status --interval 2\n";
|
||||
print "Usage full, include debug : mars-status --system --history --debug\n\n";
|
||||
exit;
|
||||
}
|
||||
|
@ -117,28 +102,10 @@ sub print_screen {
|
|||
my $Color = shift;
|
||||
my $Level = shift;
|
||||
|
||||
### default
|
||||
if ( !$params->{'monitor'} && !$Level ){
|
||||
$Color = 'FAINT' if (!$Color);
|
||||
print color "$Color" if ( $is_tty );
|
||||
print "$Text";
|
||||
print color 'reset' if ( $is_tty );
|
||||
|
||||
### monitor
|
||||
} elsif ( $params->{'monitor'} && $Level ) {
|
||||
if ( $params->{'role'} && $Level eq 'Rmonitor' ) {
|
||||
print "$Text\n";
|
||||
exit;
|
||||
} elsif ( $params->{'dstate'} && $Level eq 'Dmonitor' ) {
|
||||
print "$Text\n";
|
||||
exit;
|
||||
} elsif ( $params->{'cstate'} && $Level eq 'Cmonitor' ) {
|
||||
print "$Text\n";
|
||||
exit;
|
||||
} elsif ( !$params->{'role'} && !$params->{'dstate'} && !$params->{'cstate'}) {
|
||||
print "$Text\n";
|
||||
}
|
||||
}
|
||||
$Color = 'FAINT' if (!$Color);
|
||||
print color "$Color" if ( $is_tty );
|
||||
print "$Text";
|
||||
print color 'reset' if ( $is_tty );
|
||||
}
|
||||
|
||||
|
||||
|
@ -169,37 +136,6 @@ sub convert_link {
|
|||
}
|
||||
|
||||
|
||||
#########################################################################################
|
||||
### StatusCode
|
||||
sub monitoring {
|
||||
my $Code = shift;
|
||||
my $Text = shift;
|
||||
# UpToDate - eishokey
|
||||
# UpDateIng - worker
|
||||
# OutDate - replaying
|
||||
# InvaliDate - syncing
|
||||
# SwitchOff - SwitchOff
|
||||
# Failed - system, network, uae.
|
||||
# unknown - not joined
|
||||
|
||||
$Code = "UpToDate" if ( $Code eq '' );
|
||||
|
||||
# global
|
||||
if ( $StatusCode ne 'UpToDate') {
|
||||
$StatusCode = $Code;
|
||||
}
|
||||
|
||||
# local-node
|
||||
if ( $Code ne 'UpToDate' ) {
|
||||
$NodeStatusCode = $Code;
|
||||
}
|
||||
|
||||
push @StatusText, $Text;
|
||||
push @NodeStatusText, $Text;
|
||||
|
||||
}
|
||||
|
||||
|
||||
#########################################################################################
|
||||
### sub display resource-partner
|
||||
sub display_partner {
|
||||
|
@ -216,20 +152,15 @@ sub display_partner {
|
|||
##########################################################################
|
||||
### check status
|
||||
if ( $PStatus eq $PName ) {
|
||||
print_screen "Primary", "$Color_blue";
|
||||
print_screen "Primary [$PRes on $PName]",'', 'Rmonitor';
|
||||
monitoring '', "joined";
|
||||
print_screen "Primary", "$Color_blue bold";
|
||||
} else {
|
||||
if ( $PDevice eq 0 ) {
|
||||
print_screen "not joined","$Color_red";
|
||||
print_screen "not joined\n",'', 'Rmonitor';
|
||||
print_screen " -> Resource is not joined to this node\n", "$Color_red";
|
||||
monitoring "unknown", "not joined";
|
||||
return;
|
||||
} else {
|
||||
print_screen "Secondary", "$Color_blue";
|
||||
print_screen "Secondary [$PRes on $PName]",'', 'Rmonitor';
|
||||
monitoring "", "joined";
|
||||
print_screen "Secondary", "$Color_blue bold";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -241,48 +172,58 @@ sub display_partner {
|
|||
my $PAlive = time()- $PAlive[9] - $alife_timeout;
|
||||
print_screen ", System", '';
|
||||
if ( $PAlive > 1 ) {
|
||||
print_screen " unknown (last message before $PAlive sec) !!!\n", "$Color_red";
|
||||
monitoring "Failed", "not alive"
|
||||
print_screen " unknown (last message before $PAlive sec) !!!", "$Color_red";
|
||||
} else {
|
||||
print_screen " alive\n", "$Color_green";
|
||||
monitoring "", "alive";
|
||||
print_screen " alive", "$Color_green";
|
||||
}
|
||||
|
||||
|
||||
##########################################################################
|
||||
### check device
|
||||
my $CheckDiskDev = check_link "$mars_dir/$PRes/data-$PName";
|
||||
my $CheckDiskMrs = check_link "$mars_dir/$PRes/data-$PName";
|
||||
print_screen "\tDevice : Disk-Device ";
|
||||
print_screen "$CheckDiskDev", "$Color_blue";
|
||||
print_screen ", used as Mars-Device ";
|
||||
print_screen "$CheckDiskMrs", "$Color_blue";
|
||||
# disk-device
|
||||
my $DiskDev = check_link "$mars_dir/$PRes/data-$PName";
|
||||
print_screen "\n\tDevice : Disk-Device ";
|
||||
print_screen "$DiskDev", "$Color_blue";
|
||||
|
||||
# resize
|
||||
my $ASize = check_link "$mars_dir/$PRes/actsize-$PName";
|
||||
if ( $PSize eq $ASize) {
|
||||
print_screen ", not resized";
|
||||
print_screen ", not enlarged";
|
||||
} else {
|
||||
print_screen "\n\t\t---> HINT: resizing used ($PSize != $ASize)","$Color_red";
|
||||
print_screen ", resize active","$Color_red bold";
|
||||
}
|
||||
|
||||
|
||||
##########################################################################
|
||||
### check mountpint
|
||||
if ( $himself eq $PName ) {
|
||||
my $PUDevice = "/dev/mars/$PDevice";
|
||||
if ( stat( $PUDevice) ) {
|
||||
open my $fh, '<', '/proc/mounts' or die $!;
|
||||
$PUDevice = ( grep { /^$PUDevice / } <$fh> )[0];
|
||||
if ( $PUDevice ) {
|
||||
$PUDevice = ( split / /, $PUDevice )[1];
|
||||
print_screen " and mounted as $PUDevice\n", "$Color_blue";
|
||||
} else {
|
||||
print_screen "\n\t\t---> TODO: enable to mount\n", "$Color_green";
|
||||
}
|
||||
} else {
|
||||
print_screen "\n\t\t---> HINT: unable to mount, Device is Secondary or mars is starting\n", "$Color_blue";
|
||||
}
|
||||
} else {
|
||||
print_screen "\n";
|
||||
# mars-device
|
||||
my $MarsDev = "/dev/mars/$PDevice";
|
||||
my $Temp = "";
|
||||
if ( $PName eq $himself ) { # himself
|
||||
if ( $PName eq $PStatus) { # himself=primary
|
||||
print_screen ", used as Mars-Device ";
|
||||
print_screen "$MarsDev", "$Color_blue";
|
||||
if ( stat( $MarsDev) ) {
|
||||
open my $fh, '<', '/proc/mounts' or die $!;
|
||||
$MarsDev = ( grep { /^$MarsDev / } <$fh> )[0];
|
||||
if ( $MarsDev ) {
|
||||
$MarsDev = ( split / /, $MarsDev )[1];
|
||||
print_screen "\n\t\t---> WORK: mounted as $MarsDev", "$Color_blue";
|
||||
} else {
|
||||
print_screen "\n\t\t---> TODO: enable to mount", "$Color_green";
|
||||
}
|
||||
} else {
|
||||
print_screen "\n\t\t---> HINT: unable to mount, mars is starting or defective", "$Color_red";
|
||||
}
|
||||
} else { # himself secondary
|
||||
if ( stat( $MarsDev) ) {
|
||||
open my $fh, '<', '/proc/mounts' or die $!;
|
||||
$MarsDev = ( grep { /^$MarsDev / } <$fh> )[0];
|
||||
if ( !$MarsDev ) {
|
||||
print_screen "\n\t\t---> HINT: Mars-Device on Secondary available", "$Color_red";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( $PSize ne $ASize) {
|
||||
print_screen "\n\t\t---> HINT: resizing used ($PSize != $ASize)","$Color_red";
|
||||
}
|
||||
$$ref_ResPartner++;
|
||||
|
||||
|
@ -292,27 +233,25 @@ sub display_partner {
|
|||
### sync - status
|
||||
my $PSyncsize = check_link "$mars_dir/$PRes/syncstatus-$PName";
|
||||
my $SStatus = sprintf ("%.2f", ($PSyncsize / $PSize * 100));
|
||||
print_screen (sprintf "\tSync : %s bytes (%.3fTB) synced = ", $PSyncsize, ( $PSyncsize/1024/1024/1024/1024));
|
||||
print_screen (sprintf "\n\tSync : %s bytes (%.3fTB) synced = ", $PSyncsize, ( $PSyncsize/1024/1024/1024/1024));
|
||||
|
||||
### sync - speed
|
||||
my $SSpeed = check_link "$mars_dir/$PRes/actual-$PName/sync_rate";
|
||||
$SSpeed = sprintf ("%.2f", $SSpeed / 1024 / 1024);
|
||||
$SSpeed = sprintf ("%.3f", $SSpeed / 1024 / 1024);
|
||||
my $SEndTime = ($PSize - $PSyncsize ) / 1024 / 1024 / 1024;
|
||||
if ( $SSpeed eq "0.00" ) {
|
||||
if ( $SSpeed eq "0.000" ) {
|
||||
$SSpeed = "%";
|
||||
} else {
|
||||
$SEndTime = sprintf ("%.2f", $SEndTime / $SSpeed / 60);
|
||||
$SSpeed = "%, by $SSpeed gb/s (hypothetically ends in $SEndTime min)";
|
||||
$SSpeed = "%, at $SSpeed gb/s (done in $SEndTime min)";
|
||||
}
|
||||
|
||||
### sync - results
|
||||
if ( $SStatus < 100) {
|
||||
print_screen "$SStatus$SSpeed\n";
|
||||
print_screen "\t\t---> WORK: Sync in progress = ($SStatus% < 100.00%)\n", "$Color_red";
|
||||
monitoring "InvaliDate", "not in sync ($SStatus%)";
|
||||
print_screen "\t\t---> WORK: Sync in progress = ($SStatus% < 100.00%)\n", "$Color_blue";
|
||||
} else {
|
||||
print_screen "$SStatus$SSpeed\n", "$Color_green";
|
||||
monitoring "", "synced";
|
||||
}
|
||||
|
||||
|
||||
|
@ -344,7 +283,7 @@ sub display_partner {
|
|||
print_screen ", received with $LogSpeed gb/s" if ( $LogSpeed ne "0.00" );
|
||||
print_screen "\n";
|
||||
if ( $Ljoined eq "0" || $PLogSize eq "0.0001" ) {
|
||||
print_screen "\t\t---> WORK: Logfile wait for starting ...\n", "$Color_red";
|
||||
print_screen "\t\t---> WORK: Logfile wait for starting ...\n", "$Color_blue";
|
||||
}
|
||||
if ( ( $ref_AULogfile ) && !($PLogName eq $ref_AULogfile) ) {
|
||||
print_screen "\t\t---> HINT: Logfile Version not actual = ($PLogName ! $ref_AULogfile)\n", "$Color_red";
|
||||
|
@ -357,7 +296,7 @@ sub display_partner {
|
|||
my $RStatus = sprintf("%.2f", ( $PLogFile[1] / $PLogSize * 100));
|
||||
$RStatus = 0 if ( $Ljoined eq "0" || $PLogSize eq "1" );
|
||||
$RStatus = 99.99 if (( $PLogFile[1] ne $PLogSize ) && ( $RStatus eq "100.00" ));
|
||||
print_screen sprintf ("\tReplayed: %s bytes (%.3fGB) now replayed, Todo %d = ",
|
||||
print_screen sprintf ("\tReplayed: %s bytes (%.3fGB) now replayed, Todo %d, completed ",
|
||||
$PLogFile[1], ( $PLogFile[1]/1024/1024/1024 ), $PLogFile[2]);
|
||||
|
||||
### replay - speed
|
||||
|
@ -366,38 +305,33 @@ sub display_partner {
|
|||
if ( $RSpeed eq "0.00" ) {
|
||||
$RSpeed = "%";
|
||||
} else {
|
||||
$RSpeed = "%, by $RSpeed gb/s";
|
||||
$RSpeed = "%, at $RSpeed gb/s";
|
||||
}
|
||||
|
||||
### replay - results
|
||||
if (( $RStatus < 1 ) && ( $PLogSize != 0.0001 )) {
|
||||
print_screen "$RStatus$RSpeed\n";
|
||||
print_screen "\t\t---> HINT: Replay not started, Logfile inactive = (Size: $PLogSize)\n", "$Color_red";
|
||||
monitoring "OutDate", "replay stopped";
|
||||
|
||||
} elsif (( $RStatus < 100 ) && ( $PLogSize != 0.0001 )) {
|
||||
print_screen "$RStatus$RSpeed\n";
|
||||
print_screen "\t\t---> WORK: Replay in progress = ($RStatus% < 100.00%)\n", "$Color_red";
|
||||
monitoring "UpDateIng", "replay running1";
|
||||
print_screen "\t\t---> WORK: Replay in progress = ($RStatus% < 100.00%)\n", "$Color_blue";
|
||||
|
||||
} elsif ( $PLogFile[2] > 0 ) {
|
||||
$RStatus = sprintf("%.2f", ($PLogFile[1]-$PLogFile[2])/$PLogFile[1] * 100);
|
||||
print_screen "$RStatus$RSpeed\n", "$Color_red";
|
||||
monitoring "UpDateIng", "replay running2";
|
||||
|
||||
} elsif ( $PLogSize = 0.0001 ) {
|
||||
$RStatus = "100.00";
|
||||
print_screen "$RStatus$RSpeed\n", "$Color_green";
|
||||
monitoring '', "replay wait";
|
||||
|
||||
} else {
|
||||
print_screen "$RStatus% $RSpeed\n", "$Color_green";
|
||||
monitoring '', "replaying";
|
||||
}
|
||||
|
||||
### replay - hints
|
||||
if ($PLogFile[2] != 0) {
|
||||
print_screen "\t\t---> HINT: Replay-Todo is actualy $PLogFile[2], ", "$Color_blue";
|
||||
print_screen "\t\t---> WORK: Replay-Todo is actualy $PLogFile[2], ", "$Color_blue";
|
||||
if ( $PLogFile[2] < 0 ) {
|
||||
print_screen "replaying backwards ??? Check this !!!\n", "$Color_red";
|
||||
} elsif ( $PLogFile[2] > 0 ) {
|
||||
|
@ -411,17 +345,31 @@ sub display_partner {
|
|||
##########################################################################
|
||||
### check actual
|
||||
my $ActStatus = check_link "$mars_dir/$PRes/actual-$PName/is-primary";
|
||||
my $ActDevice = check_link "$mars_dir/$PRes/actual-$PName/device-$PDevice";
|
||||
print_screen "\tActual : Status=";
|
||||
if ( $ActStatus eq 1 ) {
|
||||
print_screen "\tActual : Status=Primary, used Device=";
|
||||
convert_link "$mars_dir/$PRes/actual-$PName/device-$PDevice";
|
||||
print_screen "\n";
|
||||
print_screen "Primary", "$Color_green";
|
||||
print_screen ", used Device=";
|
||||
# hack for multiple linkversions
|
||||
if ( $ActDevice eq "off") {
|
||||
if ( convert_link "$mars_dir/$PRes/actual-$PName/device-$PDevice" eq "off" ) {
|
||||
print_screen "on", "$Color_red";
|
||||
} else {
|
||||
print_screen "on", "$Color_green";
|
||||
}
|
||||
} else {
|
||||
print_screen "on", "$Color_green";
|
||||
}
|
||||
} else {
|
||||
print_screen "\tActual : Status=Secondary, Syncstatus=";
|
||||
print_screen "Secondary", "$Color_green";
|
||||
print_screen ", Syncstatus=";
|
||||
convert_link "$mars_dir/$PRes/actual-$PName/copy-syncstatus-$PName";
|
||||
print_screen ", Logfileupdate=";
|
||||
convert_link "$mars_dir/$PRes/actual-$PName/logfile-update";
|
||||
print_screen "\n";
|
||||
}
|
||||
print_screen ", Attached=";
|
||||
convert_link "$mars_dir/$PRes/actual-$PName/is-attached";
|
||||
print_screen "\n";
|
||||
|
||||
##########################################################################
|
||||
### check switches
|
||||
|
@ -429,59 +377,31 @@ sub display_partner {
|
|||
print_screen "\tSwitches: Attach=";
|
||||
if ( readlink "$mars_dir/$PRes/todo-$PName/attach" eq 1 ) { ### Use of uninitialized value in string
|
||||
print_screen "on", "$Color_green";
|
||||
monitoring "", "attached";
|
||||
} else {
|
||||
print_screen "off", "$Color_red";
|
||||
monitoring "SwitchOff", "attach off";
|
||||
}
|
||||
print_screen " [masked:" if ( $ActStatus eq 1 );
|
||||
print_screen " Connect=";
|
||||
if ( readlink "$mars_dir/$PRes/todo-$PName/connect" eq 1 ) { ### Use of uninitialized value in string
|
||||
print_screen "on", "$Color_green";
|
||||
monitoring "", "connected";
|
||||
} else {
|
||||
print_screen "off", "$Color_red";
|
||||
monitoring "SwitchOff", "connect off";
|
||||
}
|
||||
print_screen " Sync=";
|
||||
if ( readlink "$mars_dir/$PRes/todo-$PName/sync" eq 1 ) { ### Use of uninitialized value in string
|
||||
print_screen "on", "$Color_green";
|
||||
monitoring "", "synced";
|
||||
} else {
|
||||
print_screen "off", "$Color_red";
|
||||
monitoring "SwitchOff", "sync off";
|
||||
}
|
||||
print_screen " AllowReplay=" ;
|
||||
if ( readlink "$mars_dir/$PRes/todo-$PName/allow-replay" eq 1 ) { ### Use of uninitialized value in string
|
||||
print_screen "on", "$Color_green";
|
||||
monitoring "", "replayed";
|
||||
} else {
|
||||
print_screen "off", "$Color_red";
|
||||
monitoring "SwitchOff", "replay off";
|
||||
}
|
||||
print_screen "]" if ( $ActStatus eq 1 );
|
||||
print_screen "\n";
|
||||
|
||||
|
||||
##########################################################################
|
||||
### node status
|
||||
my $NodeStatusText = '';
|
||||
foreach (@NodeStatusText) {
|
||||
$NodeStatusText = "$NodeStatusText($_)";
|
||||
}
|
||||
### normal-modus
|
||||
print_screen "\tStatus : $NodeStatusCode = $NodeStatusText\n", '';
|
||||
### monitor-modus
|
||||
print_screen "$NodeStatusCode [$NodeStatusText]", '', 'Dmonitor';
|
||||
if ( $NodeStatusCode ne 'SwitchOff' ) {
|
||||
print_screen "Disconnect [$PRes on $PName]", '', 'Cmonitor';
|
||||
} else {
|
||||
print_screen "Connect [$PRes on $PName]", '', 'Cmonitor';
|
||||
}
|
||||
### reset values
|
||||
$NodeStatusCode = 'UpToDate';
|
||||
@NodeStatusText = ();
|
||||
|
||||
return $PLogName;
|
||||
}
|
||||
|
||||
|
@ -527,7 +447,7 @@ sub check_ressource {
|
|||
|
||||
|
||||
### joined (und nicht monitor)...
|
||||
if (( $ResPartner eq 1 ) && ( !$params->{'monitor'} )) {
|
||||
if ( $ResPartner eq 1 ) {
|
||||
### partners
|
||||
opendir my $server_dh, "$mars_dir/$res" or die "Cannot open $mars_dir/$res: $!";
|
||||
my @servers = grep { /^data/ && readlink "$mars_dir/$res/$_" } readdir $server_dh;
|
||||
|
@ -638,6 +558,7 @@ sub check_logfile {
|
|||
my $VersionErrorChk = 0;
|
||||
my $VersionLastChk = 0;
|
||||
my @VersionFile = <$mars_dir/$LResource/version-$VersionNr*>;
|
||||
my $VersionNode = "";
|
||||
foreach my $VersionFile (@VersionFile) {
|
||||
my @VersionDetail = check_link "$VersionFile";
|
||||
@VersionDetail = split (',|:', "@VersionDetail" );
|
||||
|
@ -656,8 +577,7 @@ sub check_logfile {
|
|||
} else {
|
||||
print_screen "\t$Gls$Gfr$Gls$Gfr$Gfr $Gao$Gab", "$Color_red";
|
||||
}
|
||||
|
||||
print_screen "Primary ";
|
||||
$VersionNode = "Primary \t ";
|
||||
|
||||
} elsif ( $LogHost eq "" ) {
|
||||
# none
|
||||
|
@ -676,9 +596,9 @@ sub check_logfile {
|
|||
} else {
|
||||
print_screen "\t$Gls$Gfr$Gls$Gfr$Gfr $Gls$Gfr$Gau$Gab", "$Color_red";
|
||||
}
|
||||
print_screen "Secondary ";
|
||||
$VersionNode = "Secondary";
|
||||
};
|
||||
print_screen sprintf ("Node: $VersionSource\t\tCheck: $VersionDetail[0]\t\tSize: $VersionDetail[2] bytes (%.3fGB)", $VersionDetail[2] / 1024 / 1024 / 1024 );
|
||||
print_screen sprintf ("$VersionNode $VersionSource \tCheck: $VersionDetail[0] \tSize: $VersionDetail[2] bytes (%.3fGB)", $VersionDetail[2] / 1024 / 1024 / 1024 );
|
||||
|
||||
|
||||
if ( $LogFile ) {
|
||||
|
@ -722,7 +642,7 @@ sub check_logfile {
|
|||
}
|
||||
if ( $LogFile && $DeleteFiles eq $LogFile ) {
|
||||
print_screen "\t$Gls$Gfr$Gkr$Gab", "$Color_red";
|
||||
print_screen "\t\t---> HINT: LogFile ist marked for delete ($DeleteFiles).\n", "$Color_green";
|
||||
print_screen "\t\t---> TODO: LogFile ist marked for delete ($DeleteFiles).\n", "$Color_green";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -731,22 +651,22 @@ sub check_logfile {
|
|||
### same checks
|
||||
if ( $VersionFileCount ne $LPartner ) {
|
||||
print_screen "\t$Gls$Gfr$Gkr$Gab", "$Color_red";
|
||||
print_screen "TODO: Count of Logfiles different = (Cluster has $LPartner Nodes, but only find $VersionFileCount Node)\n", "$Color_red";
|
||||
print_screen "HINT: Count of Logfiles different = (Cluster has $LPartner Nodes, but only find $VersionFileCount Node)\n", "$Color_red";
|
||||
}
|
||||
|
||||
if ( $VersionErrorSize eq 1 && $VersionErrorChk eq 1) {
|
||||
# print_screen "\t$Gls$Gfr$Gao$Gab\n", "$Color_red";
|
||||
} elsif ( $VersionErrorSize eq 1 ) {
|
||||
print_screen "\t$Gls$Gfr$Gkr$Gab", "$Color_red";
|
||||
print_screen "TODO: Logfiles has not equal size and same Checksums, ups ... \n","$Color_red";
|
||||
print_screen "HINT: Logfiles has not equal size and same Checksums, ups ... \n","$Color_red";
|
||||
} elsif ( $VersionErrorSize ne 1 && $VersionErrorChk eq 1 ) {
|
||||
print_screen "\t$Gls$Gfr$Gkr$Gab", "$Color_red";
|
||||
print_screen "TODO: Logfiles has not equal Checksums and same size, check for SPLIT-BRAIN\n","$Color_red";
|
||||
print_screen "HINT: Logfiles has not equal Checksums and same size, check for SPLIT-BRAIN\n","$Color_red";
|
||||
}
|
||||
|
||||
if ( `ls -l $mars_dir/$LResource/replay-* | grep $VersionNr` ) {
|
||||
print_screen "\t$Gls$Gfr$Gao$Gab", "$Color_red";
|
||||
print_screen "WORK: Version are actual and used. ", "$Color_green";
|
||||
print_screen "WORK: Version are actual and used. ", "$Color_blue";
|
||||
if ( $VersionErrorSize ne 1 && $VersionErrorChk ne 1) {
|
||||
print_screen "Wait for start replay ...\n", "$Color_green";
|
||||
} else {
|
||||
|
@ -754,10 +674,10 @@ sub check_logfile {
|
|||
}
|
||||
} elsif ( !$LogFile ) {
|
||||
print_screen "\t$Gls$Gfr$Gao$Gab", "$Color_red";
|
||||
print_screen "WORK: Version is deleted the next log-rotate ...\n", "$Color_green";
|
||||
print_screen "WORK: Version is deleted the next log-rotate ...\n", "$Color_blue";
|
||||
} elsif ( !`ls -l $mars_dir/$LResource/replay-* | grep $VersionNr` && $LogFile ) {
|
||||
print_screen "\t$Gls$Gfr$Gao$Gab", "$Color_red";
|
||||
print_screen "WORK: Logfiles has all equal Sizes and Checksums, can be deleted ...\n", "$Color_green";
|
||||
print_screen "TODO: Logfiles has all equal Sizes and Checksums, can be deleted ...\n", "$Color_green";
|
||||
} else {
|
||||
print "ups ??";
|
||||
}
|
||||
|
@ -766,37 +686,6 @@ sub check_logfile {
|
|||
}
|
||||
|
||||
|
||||
#########################################################################################
|
||||
### diskfull
|
||||
sub check_disk_is_full {
|
||||
my @diskfull = glob("$mars_dir/rest-space-*");
|
||||
my $diskfull_mars = "";
|
||||
print_screen "-> Diskspace on Cluster:", 'bold';
|
||||
if ( @diskfull ) {
|
||||
foreach ( @diskfull ) {
|
||||
my $diskfull_space = check_link "$_";
|
||||
my $diskfull_system = $_;
|
||||
$diskfull_system =~ s!/mars/rest-space-!!;
|
||||
if ( $diskfull_space < 1 ) {
|
||||
$diskfull_space = sprintf ("%.2f", $diskfull_space / 1024 );
|
||||
if ( $diskfull_system eq $himself ) {
|
||||
print_screen "\n\t-> TODO: Local Partition $mars_dir full ($diskfull_space kb Limit) !!! mars is stopping !!!\n\n", "red";
|
||||
$diskfull_mars = "$diskfull_mars,$diskfull_system";
|
||||
monitoring 'Failed', 'System: Mars-Disk full, MARS stopping';
|
||||
} else {
|
||||
print_screen "\n\t-> TODO: Remotesystem $diskfull_system have mars-disk full ($diskfull_space kb Limit) !!!\n\n", "red";
|
||||
$diskfull_mars = "$diskfull_mars,$diskfull_system";
|
||||
monitoring 'Failed', 'System: Remote-Mars-Disk full';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
### TODO: /0
|
||||
if ( !$diskfull_mars ) {
|
||||
print_screen " ok\n", "$Color_green";
|
||||
}
|
||||
}
|
||||
|
||||
#########################################################################################
|
||||
### check debug-files
|
||||
sub check_debugfile {
|
||||
|
@ -868,15 +757,74 @@ sub info_version {
|
|||
|
||||
|
||||
#########################################################################################
|
||||
### avg_limit
|
||||
### diskfull
|
||||
sub check_diskfull {
|
||||
my @diskfull = glob("$mars_dir/rest-space-*");
|
||||
my $diskfull_mars = "";
|
||||
print_screen "-> Cluster Diskspace:", 'bold';
|
||||
if ( @diskfull ) {
|
||||
foreach ( @diskfull ) {
|
||||
my $diskfull_space = check_link "$_";
|
||||
my $diskfull_system = $_;
|
||||
$diskfull_system =~ s!/mars/rest-space-!!;
|
||||
if ( $diskfull_space < 1 ) {
|
||||
$diskfull_space = sprintf ("%.2f", $diskfull_space / 1024 );
|
||||
if ( $diskfull_system eq $himself ) {
|
||||
print_screen "\n\t-> HINT: Local Partition $mars_dir full ($diskfull_space kb Limit) !!! mars is stopping !!!\n->", "$Color_red bold";
|
||||
$diskfull_mars = "$diskfull_mars,$diskfull_system";
|
||||
} else {
|
||||
print_screen "\n\t-> HINT: Remotesystem $diskfull_system have mars-disk full ($diskfull_space kb Limit) !!!\n->", "$Color_red bold";
|
||||
$diskfull_mars = "$diskfull_mars,$diskfull_system";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
### TODO: /0
|
||||
if ( !$diskfull_mars ) {
|
||||
print_screen " smoothly ", "$Color_green";
|
||||
}
|
||||
}
|
||||
|
||||
#########################################################################################
|
||||
### emergency
|
||||
sub check_jammed {
|
||||
my $jammed = check_link "$mars_dir/emergency-$himself";
|
||||
print_screen "-> Mars-Transaktion ", 'bold';
|
||||
if (!$jammed) {
|
||||
print_screen "running normaly\n", "$Color_green";
|
||||
print_screen " Transaktions:", 'bold';
|
||||
if ( !$jammed ) {
|
||||
print_screen " smoothly ", "$Color_green";
|
||||
} else {
|
||||
print_screen "and Replication not runnunig !!!\n", "$Color_red";
|
||||
monitoring 'Failed', 'System: Replikation not running';
|
||||
print_screen " and Replication not running !!!\n-> ", "$Color_red";
|
||||
}
|
||||
}
|
||||
|
||||
#########################################################################################
|
||||
### connects
|
||||
sub check_connects {
|
||||
my $jammed = check_link "$mars_dir/emergency-$himself";
|
||||
print_screen " Connects:", 'bold';
|
||||
if ( !$jammed ) {
|
||||
print_screen " TODO ", "$Color_green";
|
||||
} else {
|
||||
print_screen " TODO !!!\n", "$Color_red";
|
||||
}
|
||||
}
|
||||
|
||||
#########################################################################################
|
||||
### synclimit
|
||||
sub check_synclimit {
|
||||
my $synclimit;
|
||||
if ( open (MARS_LIMIT, "< /proc/sys/mars/sync_limit") ) {
|
||||
while (<MARS_LIMIT>) {
|
||||
$synclimit .= $_;
|
||||
$synclimit =~ s/[\n\t]//g;
|
||||
}
|
||||
close MARS_LIMIT;
|
||||
}
|
||||
print_screen " Synclimit:", 'bold';
|
||||
if ( !$synclimit ) {
|
||||
print_screen " smoothly ", "$Color_green";
|
||||
} else {
|
||||
print_screen " set to $synclimit !!!\n", "$Color_red";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1048,23 +996,6 @@ while(1) {
|
|||
}
|
||||
|
||||
|
||||
##########################################################################
|
||||
### check and set monitor
|
||||
|
||||
### big monitor
|
||||
if ( $params->{'monitor'} || $params->{'cstate'} || $params->{'dstate'} || $params->{'role'} ) {
|
||||
$params->{'system'} = 1;
|
||||
$params->{'history'} = 1;
|
||||
### TODO: check!
|
||||
$params->{'debug'} = 0;
|
||||
$params->{'monitor'} = 1;
|
||||
}
|
||||
### small-monitor
|
||||
if (( $params->{'cstate'} || $params->{'dstate'} || $params->{'role'} ) && ( !$params->{'resource'})) {
|
||||
display_help "Syntax-Error: Option resource is missing by --cstate / --dstate / --role!";
|
||||
}
|
||||
|
||||
|
||||
##########################################################################
|
||||
### read mars infos
|
||||
info_version;
|
||||
|
@ -1074,17 +1005,18 @@ while(1) {
|
|||
### check system limits
|
||||
if ( $params->{'system'} ) {
|
||||
check_systemstatus;
|
||||
|
||||
}
|
||||
|
||||
|
||||
##########################################################################
|
||||
### check system params
|
||||
check_disk_is_full;
|
||||
check_diskfull;
|
||||
check_jammed;
|
||||
|
||||
check_connects;
|
||||
check_synclimit;
|
||||
|
||||
### check resources
|
||||
print_screen "---> Resources <---\n", "$Color_blue bold";
|
||||
print_screen "\n---> Resources <---\n", "$Color_blue bold";
|
||||
check_ressource;
|
||||
|
||||
|
||||
|
@ -1101,17 +1033,6 @@ while(1) {
|
|||
}
|
||||
|
||||
|
||||
##########################################################################
|
||||
### end, exit for monitor
|
||||
if ( $params->{'monitor'} ) {
|
||||
if (( $StatusCode eq 'InvaliDate' || $StatusCode eq 'Failed' || $StatusCode eq 'OutDate' || $StatusCode eq 'SwitchOff' )) {
|
||||
exit 1;
|
||||
} else {
|
||||
exit 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
##########################################################################
|
||||
### end, next loop
|
||||
print color 'reset';
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH mars-status 8 "December 12, 2012" "" "Mars Admin"
|
||||
.TH mars-status 8 "18.03.2014" "" "Mars Status"
|
||||
|
||||
.SH NAME
|
||||
mars-status \- program to display status information from mars
|
||||
|
@ -12,10 +12,6 @@ mars-status \- program to display status information from mars
|
|||
.br
|
||||
.B "Usage: mars-status [--resource <RESNAME>] [--interval <seconds>] [[--history] [--debug] [--system]]"
|
||||
.br
|
||||
.B "Usage: mars-status [--resource <RESNAME>] --monitor "
|
||||
.br
|
||||
.B "Usage: mars-status --resource <RESNAME> [--role | --cstate | --dstate]"
|
||||
.br
|
||||
.B " --resource : limits the display to the specified resource"
|
||||
.br
|
||||
.B " --interval : refreshes the display every second xxx"
|
||||
|
@ -24,10 +20,6 @@ mars-status \- program to display status information from mars
|
|||
.br
|
||||
.B " --system : display mars-system informations"
|
||||
.br
|
||||
.B " --monitor : indicator to use for monitoring on all state"
|
||||
.br
|
||||
.B " --role|--cstate|--dstate single state on lokal node"
|
||||
.br
|
||||
.B " --debug : additional display error messages and warnings to internal"
|
||||
.br
|
||||
.br
|
||||
|
@ -75,22 +67,6 @@ Shows information about the log files, version numbers and their status.
|
|||
Additional display error messages and warnings to internal.
|
||||
.TP
|
||||
|
||||
.BR monitor
|
||||
Indicator to use for monitoring.
|
||||
.TP
|
||||
.BR cstate
|
||||
Indicator to use for monitoring.
|
||||
.TP
|
||||
|
||||
.BR dstate
|
||||
Indicator to use for monitoring.
|
||||
.TP
|
||||
|
||||
.BR role
|
||||
Indicator to use for monitoring.
|
||||
.TP
|
||||
|
||||
|
||||
|
||||
.SH BUGS
|
||||
Problems and errors in the program are not known ;)
|
||||
|
|
Binary file not shown.
|
@ -1,9 +1,10 @@
|
|||
#!/bin/bash
|
||||
# v 0.02
|
||||
# modify by jms at Fri Jan 31 14:11:36 CET 2014
|
||||
# v 0.04
|
||||
# modify by jms at Mon Feb 17 10:44:28 CET 2014
|
||||
# zabbix mars
|
||||
|
||||
TEMPFILE="/tmp/zabbix.mars"
|
||||
FINALTEMPFILE="/tmp/zabbix.mars"
|
||||
TEMPFILE="${FINALTEMPFILE}.tmp"
|
||||
echo -n >$TEMPFILE
|
||||
|
||||
|
||||
|
@ -23,6 +24,12 @@ else
|
|||
echo "# todo $RESNAME $HOSTNAME" >>$TEMPFILE
|
||||
ls -ld /mars/resource-$RESNAME/todo-$HOSTNAME/* | sed -e 's!.*/todo-.*/!!g' | awk '{print "status '$RESNAME' " $1 " " $3}' >>$TEMPFILE
|
||||
done
|
||||
|
||||
|
||||
# emergency
|
||||
echo -n "emergency-modus " >>$TEMPFILE
|
||||
ls -l /mars/emergency-$HOSTNAME | awk '{print $11}' >>$TEMPFILE
|
||||
|
||||
fi
|
||||
|
||||
mv $TEMPFILE $FINALTEMPFILE
|
||||
|
||||
|
|
|
@ -1,6 +1,16 @@
|
|||
# mars config
|
||||
# v002
|
||||
# v003 - Wed Feb 19 11:01:19 CET 2014
|
||||
|
||||
# values:
|
||||
# -------
|
||||
# 1 - /proc/sys/block/mars/[filename]
|
||||
|
||||
UserParameter=system.mars[*], ( grep $1 /tmp/zabbix.mars || echo '0 0' ) | awk '{print $$2}'
|
||||
UserParameter=system.marsstatus[*], grep $1 /tmp/zabbix.mars | grep ' $2 ' | awk '{print $$4}' | head -n 1
|
||||
|
||||
# values:
|
||||
# -------
|
||||
# 1 - ressource-name
|
||||
# 2 - statuslink-name
|
||||
|
||||
UserParameter=system.marsstatus[*], ( grep $1 /tmp/zabbix.mars | grep ' $2 ' || echo '0 0 0 0') | awk '{print $$4}' | head -n 1
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<zabbix_export>
|
||||
<version>2.0</version>
|
||||
<date>2014-02-05T12:30:02Z</date>
|
||||
<date>2014-03-18T14:18:06Z</date>
|
||||
<groups>
|
||||
<group>
|
||||
<name>Templates</name>
|
||||
|
@ -9,8 +9,8 @@
|
|||
</groups>
|
||||
<templates>
|
||||
<template>
|
||||
<template>Template Mars Server</template>
|
||||
<name>Template Mars Server</name>
|
||||
<template>Template App Mars Server</template>
|
||||
<name>Template App Mars Server</name>
|
||||
<groups>
|
||||
<group>
|
||||
<name>Templates</name>
|
||||
|
@ -34,7 +34,7 @@
|
|||
<key>system.mars[copy_read_max_fly]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -73,7 +73,7 @@
|
|||
<key>system.mars[copy_write_max_fly]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -112,7 +112,7 @@
|
|||
<key>system.mars[io_flying_count]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -151,7 +151,7 @@
|
|||
<key>system.mars[io_tuning/writeback_rate_kb]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -190,7 +190,7 @@
|
|||
<key>system.mars[io_tuning/writeback_until_percent]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -229,7 +229,7 @@
|
|||
<key>vfs.file.time[/tmp/zabbix.mars,change]</key>
|
||||
<delay>300</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -268,7 +268,7 @@
|
|||
<key>system.mars[logger_mem_used_kb]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -307,7 +307,7 @@
|
|||
<key>system.mars[mapfree_period_sec]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -346,7 +346,7 @@
|
|||
<key>system.modules[mars]</key>
|
||||
<delay>30</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -385,7 +385,7 @@
|
|||
<key>proc.num[mars_light]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -424,7 +424,7 @@
|
|||
<key>system.marsstatus[{$RESNAME},allow-replay]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -463,7 +463,7 @@
|
|||
<key>system.marsstatus[{$RESNAME},attach]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -502,7 +502,46 @@
|
|||
<key>system.marsstatus[{$RESNAME},connect]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
<units/>
|
||||
<delta>0</delta>
|
||||
<snmpv3_securityname/>
|
||||
<snmpv3_securitylevel>0</snmpv3_securitylevel>
|
||||
<snmpv3_authpassphrase/>
|
||||
<snmpv3_privpassphrase/>
|
||||
<formula>1</formula>
|
||||
<delay_flex/>
|
||||
<params/>
|
||||
<ipmi_sensor/>
|
||||
<data_type>0</data_type>
|
||||
<authtype>0</authtype>
|
||||
<username/>
|
||||
<password/>
|
||||
<publickey/>
|
||||
<privatekey/>
|
||||
<port/>
|
||||
<description/>
|
||||
<inventory_link>0</inventory_link>
|
||||
<applications>
|
||||
<application>
|
||||
<name>Mars Server Status</name>
|
||||
</application>
|
||||
</applications>
|
||||
<valuemap/>
|
||||
</item>
|
||||
<item>
|
||||
<name>Mars Server Status - emergency</name>
|
||||
<type>0</type>
|
||||
<snmp_community/>
|
||||
<multiplier>0</multiplier>
|
||||
<snmp_oid/>
|
||||
<key>system.mars[emergency-modus]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -541,7 +580,7 @@
|
|||
<key>system.marsstatus[{$RESNAME},is-attached]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -580,7 +619,7 @@
|
|||
<key>system.marsstatus[{$RESNAME},is-copying]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -619,7 +658,7 @@
|
|||
<key>system.marsstatus[{$RESNAME},is-primary]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -658,7 +697,7 @@
|
|||
<key>system.marsstatus[{$RESNAME},is-replaying]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -697,7 +736,7 @@
|
|||
<key>system.marsstatus[{$RESNAME},is-syncing]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -736,7 +775,7 @@
|
|||
<key>system.marsstatus[{$RESNAME},open-count]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -775,7 +814,7 @@
|
|||
<key>system.marsstatus[{$RESNAME},replay_rate]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -814,7 +853,7 @@
|
|||
<key>system.marsstatus[{$RESNAME},sync]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -853,7 +892,7 @@
|
|||
<key>system.mars[propagate_interval_sec]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -892,7 +931,7 @@
|
|||
<key>system.mars[scan_interval_sec]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -931,7 +970,7 @@
|
|||
<key>system.mars[statusfiles_rollover_sec]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -970,7 +1009,7 @@
|
|||
<key>system.mars[sync_flip_interval_sec]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -1009,7 +1048,7 @@
|
|||
<key>system.mars[traffic_tuning/client_role_traffic_rate_kb]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -1048,7 +1087,7 @@
|
|||
<key>system.mars[traffic_tuning/server_role_traffic_rate_kb]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -1087,7 +1126,7 @@
|
|||
<key>system.mars[traffic_tuning/writeback_rate_kb]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -1126,7 +1165,7 @@
|
|||
<key>system.mars[traffic_tuning/writeback_until_percent]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -1165,7 +1204,7 @@
|
|||
<key>system.mars[traffic_tunning/client_role_traffic_count_ops]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -1204,7 +1243,7 @@
|
|||
<key>system.mars[traffic_tunning/server_role_traffic_count_ops]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -1243,7 +1282,7 @@
|
|||
<key>system.mars[write_throttle_count_ops]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -1282,7 +1321,7 @@
|
|||
<key>system.mars[write_throttle_rate_kb]</key>
|
||||
<delay>60</delay>
|
||||
<history>90</history>
|
||||
<trends>365</trends>
|
||||
<trends>90</trends>
|
||||
<status>0</status>
|
||||
<value_type>3</value_type>
|
||||
<allowed_hosts/>
|
||||
|
@ -1344,7 +1383,7 @@
|
|||
<sort_triggers>0</sort_triggers>
|
||||
<resource>
|
||||
<name>mars count percent</name>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
</resource>
|
||||
</screen_item>
|
||||
<screen_item>
|
||||
|
@ -1364,7 +1403,7 @@
|
|||
<sort_triggers>0</sort_triggers>
|
||||
<resource>
|
||||
<name>mars count sec</name>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
</resource>
|
||||
</screen_item>
|
||||
<screen_item>
|
||||
|
@ -1384,7 +1423,7 @@
|
|||
<sort_triggers>0</sort_triggers>
|
||||
<resource>
|
||||
<name>mars count kb</name>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
</resource>
|
||||
</screen_item>
|
||||
<screen_item>
|
||||
|
@ -1404,7 +1443,7 @@
|
|||
<sort_triggers>0</sort_triggers>
|
||||
<resource>
|
||||
<name>mars count ops</name>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
</resource>
|
||||
</screen_item>
|
||||
<screen_item>
|
||||
|
@ -1424,7 +1463,7 @@
|
|||
<sort_triggers>0</sort_triggers>
|
||||
<resource>
|
||||
<name>mars count io fly</name>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
</resource>
|
||||
</screen_item>
|
||||
<screen_item>
|
||||
|
@ -1444,7 +1483,7 @@
|
|||
<sort_triggers>0</sort_triggers>
|
||||
<resource>
|
||||
<name>mars count copy fly</name>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
</resource>
|
||||
</screen_item>
|
||||
</screen_items>
|
||||
|
@ -1454,7 +1493,7 @@
|
|||
</templates>
|
||||
<triggers>
|
||||
<trigger>
|
||||
<expression>{Template Mars Server:vfs.file.time[/tmp/zabbix.mars,change].fuzzytime(931)}=0</expression>
|
||||
<expression>{Template App Mars Server:vfs.file.time[/tmp/zabbix.mars,change].fuzzytime(931)}=0</expression>
|
||||
<name>Mars check not running on {HOST.NAME}</name>
|
||||
<url/>
|
||||
<status>0</status>
|
||||
|
@ -1464,7 +1503,17 @@
|
|||
<dependencies/>
|
||||
</trigger>
|
||||
<trigger>
|
||||
<expression>{Template Mars Server:system.marsstatus[{$RESNAME},allow-replay].last(0)}=0 & {Template Mars Server:system.modules[mars].last(0)}=1 & {Template Mars Server:proc.num[mars_light].last(0)}=1</expression>
|
||||
<expression>{Template App Mars Server:system.mars[emergency-modus].last(0)}=1</expression>
|
||||
<name>MARS Device on {HOST.NAME} emergency !</name>
|
||||
<url/>
|
||||
<status>0</status>
|
||||
<priority>5</priority>
|
||||
<description/>
|
||||
<type>0</type>
|
||||
<dependencies/>
|
||||
</trigger>
|
||||
<trigger>
|
||||
<expression>{Template App Mars Server:system.marsstatus[{$RESNAME},allow-replay].last(0)}=0 & {Template App Mars Server:system.modules[mars].last(0)}=1 & {Template App Mars Server:proc.num[mars_light].last(0)}=1</expression>
|
||||
<name>MARS Device on {HOST.NAME} not allowed replay</name>
|
||||
<url/>
|
||||
<status>0</status>
|
||||
|
@ -1474,7 +1523,17 @@
|
|||
<dependencies/>
|
||||
</trigger>
|
||||
<trigger>
|
||||
<expression>{Template Mars Server:system.marsstatus[{$RESNAME},attach].last(0)}=0 & {Template Mars Server:system.modules[mars].last(0)}=1 & {Template Mars Server:proc.num[mars_light].last(0)}=1</expression>
|
||||
<expression>{Template App Mars Server:system.marsstatus[{$RESNAME},is-attached].last(0)}=0 & {Template App Mars Server:system.marsstatus[{$RESNAME},attach].last(0)}=1 & {Template App Mars Server:system.modules[mars].last(0)}=1 & {Template App Mars Server:proc.num[mars_light].last(0)}=1</expression>
|
||||
<name>MARS Device on {HOST.NAME} not attached</name>
|
||||
<url/>
|
||||
<status>0</status>
|
||||
<priority>1</priority>
|
||||
<description/>
|
||||
<type>0</type>
|
||||
<dependencies/>
|
||||
</trigger>
|
||||
<trigger>
|
||||
<expression>{Template App Mars Server:system.marsstatus[{$RESNAME},attach].last(#2)}=0 & {Template App Mars Server:system.modules[mars].last(#2)}=1 & {Template App Mars Server:proc.num[mars_light].last(#2)}=1</expression>
|
||||
<name>MARS Device on {HOST.NAME} not attached</name>
|
||||
<url/>
|
||||
<status>0</status>
|
||||
|
@ -1484,7 +1543,17 @@
|
|||
<dependencies/>
|
||||
</trigger>
|
||||
<trigger>
|
||||
<expression>{Template Mars Server:system.marsstatus[{$RESNAME},connect].last(0)}=0 & {Template Mars Server:system.modules[mars].last(0)}=1 & {Template Mars Server:proc.num[mars_light].last(0)}=1</expression>
|
||||
<expression>{Template App Mars Server:system.marsstatus[{$RESNAME},attach].last(#2)}=0</expression>
|
||||
<name>MARS Device on {HOST.NAME} not attached</name>
|
||||
<url/>
|
||||
<status>0</status>
|
||||
<priority>1</priority>
|
||||
<description>{Template App Mars Server:system.marsstatus[{$RESNAME},is-attached].last(0)}=0 & {Template App Mars Server:system.marsstatus[{$RESNAME},attach].last(0)}=1 & {Template App Mars Server:system.modules[mars].last(0)}=1 & {Template App Mars Server:proc.num[mars_light].last(0)}=1</description>
|
||||
<type>0</type>
|
||||
<dependencies/>
|
||||
</trigger>
|
||||
<trigger>
|
||||
<expression>{Template App Mars Server:system.marsstatus[{$RESNAME},connect].last(0)}=0 & {Template App Mars Server:system.modules[mars].last(0)}=1 & {Template App Mars Server:proc.num[mars_light].last(0)}=1</expression>
|
||||
<name>MARS Device on {HOST.NAME} not connected</name>
|
||||
<url/>
|
||||
<status>0</status>
|
||||
|
@ -1494,17 +1563,7 @@
|
|||
<dependencies/>
|
||||
</trigger>
|
||||
<trigger>
|
||||
<expression>{Template Mars Server:system.marsstatus[{$RESNAME},is-attached].last(0)}=0 & {Template Mars Server:system.marsstatus[{$RESNAME},attach].last(0)}=1 & {Template Mars Server:system.modules[mars].last(0)}=1 & {Template Mars Server:proc.num[mars_light].last(0)}=1</expression>
|
||||
<name>MARS Device on {HOST.NAME} not is-attached</name>
|
||||
<url/>
|
||||
<status>0</status>
|
||||
<priority>1</priority>
|
||||
<description/>
|
||||
<type>0</type>
|
||||
<dependencies/>
|
||||
</trigger>
|
||||
<trigger>
|
||||
<expression>{Template Mars Server:system.marsstatus[{$RESNAME},is-replaying].last(0)}=0 & {Template Mars Server:system.marsstatus[{$RESNAME},allow-replay].last(0)}=1 & {Template Mars Server:system.modules[mars].last(0)}=1 & {Template Mars Server:proc.num[mars_light].last(0)}=1</expression>
|
||||
<expression>{Template App Mars Server:system.marsstatus[{$RESNAME},is-replaying].last(0)}=0 & {Template App Mars Server:system.marsstatus[{$RESNAME},allow-replay].last(0)}=1 & {Template App Mars Server:system.modules[mars].last(0)}=1 & {Template App Mars Server:proc.num[mars_light].last(0)}=1</expression>
|
||||
<name>MARS Device on {HOST.NAME} not is-replaying</name>
|
||||
<url/>
|
||||
<status>1</status>
|
||||
|
@ -1514,7 +1573,7 @@
|
|||
<dependencies/>
|
||||
</trigger>
|
||||
<trigger>
|
||||
<expression>{Template Mars Server:system.marsstatus[{$RESNAME},sync].last(0)}=0 & {Template Mars Server:system.modules[mars].last(0)}=1 & {Template Mars Server:proc.num[mars_light].last(0)}=1</expression>
|
||||
<expression>{Template App Mars Server:system.marsstatus[{$RESNAME},sync].last(0)}=0 & {Template App Mars Server:system.modules[mars].last(0)}=1 & {Template App Mars Server:proc.num[mars_light].last(0)}=1</expression>
|
||||
<name>MARS Device on {HOST.NAME} not sync</name>
|
||||
<url/>
|
||||
<status>0</status>
|
||||
|
@ -1524,7 +1583,7 @@
|
|||
<dependencies/>
|
||||
</trigger>
|
||||
<trigger>
|
||||
<expression>{Template Mars Server:system.modules[mars].last(0)}=0 | {Template Mars Server:proc.num[mars_light].last(0)}=0</expression>
|
||||
<expression>{Template App Mars Server:system.modules[mars].last(0)}=0 | {Template App Mars Server:proc.num[mars_light].last(0)}=0</expression>
|
||||
<name>MARS Module on {HOST.NAME} not loaded or running</name>
|
||||
<url/>
|
||||
<status>0</status>
|
||||
|
@ -1561,7 +1620,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[copy_read_max_fly]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
@ -1573,7 +1632,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[copy_write_max_fly]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
@ -1605,7 +1664,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[io_flying_count]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
@ -1637,7 +1696,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[io_tuning/writeback_rate_kb]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
@ -1649,7 +1708,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[logger_mem_used_kb]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
@ -1661,7 +1720,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[traffic_tuning/client_role_traffic_rate_kb]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
@ -1673,7 +1732,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[traffic_tuning/server_role_traffic_rate_kb]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
@ -1685,7 +1744,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[traffic_tuning/writeback_rate_kb]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
@ -1697,7 +1756,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[write_throttle_rate_kb]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
@ -1729,7 +1788,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[traffic_tunning/client_role_traffic_count_ops]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
@ -1741,7 +1800,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[traffic_tunning/server_role_traffic_count_ops]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
@ -1753,7 +1812,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[write_throttle_count_ops]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
@ -1785,7 +1844,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[io_tuning/writeback_until_percent]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
@ -1817,7 +1876,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[mapfree_period_sec]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
@ -1829,7 +1888,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[propagate_interval_sec]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
@ -1841,7 +1900,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[scan_interval_sec]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
@ -1853,7 +1912,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[statusfiles_rollover_sec]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
@ -1865,7 +1924,7 @@
|
|||
<calc_fnc>2</calc_fnc>
|
||||
<type>0</type>
|
||||
<item>
|
||||
<host>Template Mars Server</host>
|
||||
<host>Template App Mars Server</host>
|
||||
<key>system.mars[sync_flip_interval_sec]</key>
|
||||
</item>
|
||||
</graph_item>
|
||||
|
|
Loading…
Reference in New Issue