monitoring: update doc, monitoring and mars-status

This commit is contained in:
Joerg Mann 2014-03-19 12:42:38 +01:00 committed by Thomas Schoebel-Theuer
parent 8429aaf773
commit 3ec20ba8e5
6 changed files with 322 additions and 349 deletions

View File

@ -3,11 +3,6 @@
#
# last update at now ...
### TODO:
### - check em-mode
### - check join/leave cluster/resource
###
use warnings;
use strict;
@ -20,23 +15,19 @@ use File::Basename;
binmode STDOUT, ":utf8";
### defaults
my $version = "0.072q";
my $version = "0.073";
my $alife_timeout = "30"; # sec for remote-nodes timeout
my $is_tty = 0;
my $mars_dir = '/mars';
my $himself = `uname -n` or die "cannot determine my network node name\n";
my $clearscreen = `clear`;
my $StatusCode = 'UpToDate';
my @StatusText = ();
my $NodeStatusCode = 'UpToDate';
my @NodeStatusText = ();
my $MarsTreeVer = 0.1;
chomp $himself;
### ARGV
# Optionen in Hash-Ref parsen
my $params = {};
GetOptions( $params, 'help', 'h', 'version', 'v', 'monitor', 'role', 'cstate', 'dstate', 'resource=s', 'system', 'interval=i', 'history', 'ascii', 'debug' );
GetOptions( $params, 'help', 'h', 'version', 'v', 'resource=s', 'system', 'interval=i', 'history', 'ascii', 'debug' );
#########################################################################################
### terminal settings
@ -66,21 +57,15 @@ sub display_help {
print "$HelpText\n\n" if ($HelpText);
print "Usage: mars-status [--help]\n";
print "Usage: mars-status [--version]\n";
print "Usage: mars-status (without specification of parameters, an abstract of all the informations spent)\n";
print "Usage: mars-status (without specification of parameters, an abstract of all the information sent)\n";
print "Usage: mars-status [--resource <RESNAME>] [--interval <seconds>] | [--history] | [--debug ] | [--system]\n";
print "Usage: mars-status [--resource <RESNAME>] --monitor \n";
print "Usage: mars-status --resource <RESNAME> [--role | --cstate | --dstate]\n";
print " --resource : limits the display to the specified resource\n";
print " --interval : refreshes the display every second xxx\n";
print " --history : shows information about the log files, version numbers and their status\n";
print " --ascii : display history in ascii code letters\n";
print " --system : display mars-system informations\n";
print " --monitor : indicator to use for monitoring on all state (by local node only !)\n";
print " --role|--cstate|--dstate single state on lokal node\n";
print " --debug : additional display debug messages\n\n";
print "Usage small include rotate : mars-status --interval 2\n";
print "Usage monitoring : mars-status --monitor\n";
print "Usage monitoring drbd-linke : mars-status --cstate (or --dstate or --role)\n";
print "Usage small include refresh : mars-status --interval 2\n";
print "Usage full, include debug : mars-status --system --history --debug\n\n";
exit;
}
@ -117,28 +102,10 @@ sub print_screen {
my $Color = shift;
my $Level = shift;
### default
if ( !$params->{'monitor'} && !$Level ){
$Color = 'FAINT' if (!$Color);
print color "$Color" if ( $is_tty );
print "$Text";
print color 'reset' if ( $is_tty );
### monitor
} elsif ( $params->{'monitor'} && $Level ) {
if ( $params->{'role'} && $Level eq 'Rmonitor' ) {
print "$Text\n";
exit;
} elsif ( $params->{'dstate'} && $Level eq 'Dmonitor' ) {
print "$Text\n";
exit;
} elsif ( $params->{'cstate'} && $Level eq 'Cmonitor' ) {
print "$Text\n";
exit;
} elsif ( !$params->{'role'} && !$params->{'dstate'} && !$params->{'cstate'}) {
print "$Text\n";
}
}
$Color = 'FAINT' if (!$Color);
print color "$Color" if ( $is_tty );
print "$Text";
print color 'reset' if ( $is_tty );
}
@ -169,37 +136,6 @@ sub convert_link {
}
#########################################################################################
### StatusCode
sub monitoring {
my $Code = shift;
my $Text = shift;
# UpToDate - eishokey
# UpDateIng - worker
# OutDate - replaying
# InvaliDate - syncing
# SwitchOff - SwitchOff
# Failed - system, network, uae.
# unknown - not joined
$Code = "UpToDate" if ( $Code eq '' );
# global
if ( $StatusCode ne 'UpToDate') {
$StatusCode = $Code;
}
# local-node
if ( $Code ne 'UpToDate' ) {
$NodeStatusCode = $Code;
}
push @StatusText, $Text;
push @NodeStatusText, $Text;
}
#########################################################################################
### sub display resource-partner
sub display_partner {
@ -216,20 +152,15 @@ sub display_partner {
##########################################################################
### check status
if ( $PStatus eq $PName ) {
print_screen "Primary", "$Color_blue";
print_screen "Primary [$PRes on $PName]",'', 'Rmonitor';
monitoring '', "joined";
print_screen "Primary", "$Color_blue bold";
} else {
if ( $PDevice eq 0 ) {
print_screen "not joined","$Color_red";
print_screen "not joined\n",'', 'Rmonitor';
print_screen " -> Resource is not joined to this node\n", "$Color_red";
monitoring "unknown", "not joined";
return;
} else {
print_screen "Secondary", "$Color_blue";
print_screen "Secondary [$PRes on $PName]",'', 'Rmonitor';
monitoring "", "joined";
print_screen "Secondary", "$Color_blue bold";
}
}
@ -241,48 +172,58 @@ sub display_partner {
my $PAlive = time()- $PAlive[9] - $alife_timeout;
print_screen ", System", '';
if ( $PAlive > 1 ) {
print_screen " unknown (last message before $PAlive sec) !!!\n", "$Color_red";
monitoring "Failed", "not alive"
print_screen " unknown (last message before $PAlive sec) !!!", "$Color_red";
} else {
print_screen " alive\n", "$Color_green";
monitoring "", "alive";
print_screen " alive", "$Color_green";
}
##########################################################################
### check device
my $CheckDiskDev = check_link "$mars_dir/$PRes/data-$PName";
my $CheckDiskMrs = check_link "$mars_dir/$PRes/data-$PName";
print_screen "\tDevice : Disk-Device ";
print_screen "$CheckDiskDev", "$Color_blue";
print_screen ", used as Mars-Device ";
print_screen "$CheckDiskMrs", "$Color_blue";
# disk-device
my $DiskDev = check_link "$mars_dir/$PRes/data-$PName";
print_screen "\n\tDevice : Disk-Device ";
print_screen "$DiskDev", "$Color_blue";
# resize
my $ASize = check_link "$mars_dir/$PRes/actsize-$PName";
if ( $PSize eq $ASize) {
print_screen ", not resized";
print_screen ", not enlarged";
} else {
print_screen "\n\t\t---> HINT: resizing used ($PSize != $ASize)","$Color_red";
print_screen ", resize active","$Color_red bold";
}
##########################################################################
### check mountpint
if ( $himself eq $PName ) {
my $PUDevice = "/dev/mars/$PDevice";
if ( stat( $PUDevice) ) {
open my $fh, '<', '/proc/mounts' or die $!;
$PUDevice = ( grep { /^$PUDevice / } <$fh> )[0];
if ( $PUDevice ) {
$PUDevice = ( split / /, $PUDevice )[1];
print_screen " and mounted as $PUDevice\n", "$Color_blue";
} else {
print_screen "\n\t\t---> TODO: enable to mount\n", "$Color_green";
}
} else {
print_screen "\n\t\t---> HINT: unable to mount, Device is Secondary or mars is starting\n", "$Color_blue";
}
} else {
print_screen "\n";
# mars-device
my $MarsDev = "/dev/mars/$PDevice";
my $Temp = "";
if ( $PName eq $himself ) { # himself
if ( $PName eq $PStatus) { # himself=primary
print_screen ", used as Mars-Device ";
print_screen "$MarsDev", "$Color_blue";
if ( stat( $MarsDev) ) {
open my $fh, '<', '/proc/mounts' or die $!;
$MarsDev = ( grep { /^$MarsDev / } <$fh> )[0];
if ( $MarsDev ) {
$MarsDev = ( split / /, $MarsDev )[1];
print_screen "\n\t\t---> WORK: mounted as $MarsDev", "$Color_blue";
} else {
print_screen "\n\t\t---> TODO: enable to mount", "$Color_green";
}
} else {
print_screen "\n\t\t---> HINT: unable to mount, mars is starting or defective", "$Color_red";
}
} else { # himself secondary
if ( stat( $MarsDev) ) {
open my $fh, '<', '/proc/mounts' or die $!;
$MarsDev = ( grep { /^$MarsDev / } <$fh> )[0];
if ( !$MarsDev ) {
print_screen "\n\t\t---> HINT: Mars-Device on Secondary available", "$Color_red";
}
}
}
}
if ( $PSize ne $ASize) {
print_screen "\n\t\t---> HINT: resizing used ($PSize != $ASize)","$Color_red";
}
$$ref_ResPartner++;
@ -292,27 +233,25 @@ sub display_partner {
### sync - status
my $PSyncsize = check_link "$mars_dir/$PRes/syncstatus-$PName";
my $SStatus = sprintf ("%.2f", ($PSyncsize / $PSize * 100));
print_screen (sprintf "\tSync : %s bytes (%.3fTB) synced = ", $PSyncsize, ( $PSyncsize/1024/1024/1024/1024));
print_screen (sprintf "\n\tSync : %s bytes (%.3fTB) synced = ", $PSyncsize, ( $PSyncsize/1024/1024/1024/1024));
### sync - speed
my $SSpeed = check_link "$mars_dir/$PRes/actual-$PName/sync_rate";
$SSpeed = sprintf ("%.2f", $SSpeed / 1024 / 1024);
$SSpeed = sprintf ("%.3f", $SSpeed / 1024 / 1024);
my $SEndTime = ($PSize - $PSyncsize ) / 1024 / 1024 / 1024;
if ( $SSpeed eq "0.00" ) {
if ( $SSpeed eq "0.000" ) {
$SSpeed = "%";
} else {
$SEndTime = sprintf ("%.2f", $SEndTime / $SSpeed / 60);
$SSpeed = "%, by $SSpeed gb/s (hypothetically ends in $SEndTime min)";
$SSpeed = "%, at $SSpeed gb/s (done in $SEndTime min)";
}
### sync - results
if ( $SStatus < 100) {
print_screen "$SStatus$SSpeed\n";
print_screen "\t\t---> WORK: Sync in progress = ($SStatus% < 100.00%)\n", "$Color_red";
monitoring "InvaliDate", "not in sync ($SStatus%)";
print_screen "\t\t---> WORK: Sync in progress = ($SStatus% < 100.00%)\n", "$Color_blue";
} else {
print_screen "$SStatus$SSpeed\n", "$Color_green";
monitoring "", "synced";
}
@ -344,7 +283,7 @@ sub display_partner {
print_screen ", received with $LogSpeed gb/s" if ( $LogSpeed ne "0.00" );
print_screen "\n";
if ( $Ljoined eq "0" || $PLogSize eq "0.0001" ) {
print_screen "\t\t---> WORK: Logfile wait for starting ...\n", "$Color_red";
print_screen "\t\t---> WORK: Logfile wait for starting ...\n", "$Color_blue";
}
if ( ( $ref_AULogfile ) && !($PLogName eq $ref_AULogfile) ) {
print_screen "\t\t---> HINT: Logfile Version not actual = ($PLogName ! $ref_AULogfile)\n", "$Color_red";
@ -357,7 +296,7 @@ sub display_partner {
my $RStatus = sprintf("%.2f", ( $PLogFile[1] / $PLogSize * 100));
$RStatus = 0 if ( $Ljoined eq "0" || $PLogSize eq "1" );
$RStatus = 99.99 if (( $PLogFile[1] ne $PLogSize ) && ( $RStatus eq "100.00" ));
print_screen sprintf ("\tReplayed: %s bytes (%.3fGB) now replayed, Todo %d = ",
print_screen sprintf ("\tReplayed: %s bytes (%.3fGB) now replayed, Todo %d, completed ",
$PLogFile[1], ( $PLogFile[1]/1024/1024/1024 ), $PLogFile[2]);
### replay - speed
@ -366,38 +305,33 @@ sub display_partner {
if ( $RSpeed eq "0.00" ) {
$RSpeed = "%";
} else {
$RSpeed = "%, by $RSpeed gb/s";
$RSpeed = "%, at $RSpeed gb/s";
}
### replay - results
if (( $RStatus < 1 ) && ( $PLogSize != 0.0001 )) {
print_screen "$RStatus$RSpeed\n";
print_screen "\t\t---> HINT: Replay not started, Logfile inactive = (Size: $PLogSize)\n", "$Color_red";
monitoring "OutDate", "replay stopped";
} elsif (( $RStatus < 100 ) && ( $PLogSize != 0.0001 )) {
print_screen "$RStatus$RSpeed\n";
print_screen "\t\t---> WORK: Replay in progress = ($RStatus% < 100.00%)\n", "$Color_red";
monitoring "UpDateIng", "replay running1";
print_screen "\t\t---> WORK: Replay in progress = ($RStatus% < 100.00%)\n", "$Color_blue";
} elsif ( $PLogFile[2] > 0 ) {
$RStatus = sprintf("%.2f", ($PLogFile[1]-$PLogFile[2])/$PLogFile[1] * 100);
print_screen "$RStatus$RSpeed\n", "$Color_red";
monitoring "UpDateIng", "replay running2";
} elsif ( $PLogSize = 0.0001 ) {
$RStatus = "100.00";
print_screen "$RStatus$RSpeed\n", "$Color_green";
monitoring '', "replay wait";
} else {
print_screen "$RStatus% $RSpeed\n", "$Color_green";
monitoring '', "replaying";
}
### replay - hints
if ($PLogFile[2] != 0) {
print_screen "\t\t---> HINT: Replay-Todo is actualy $PLogFile[2], ", "$Color_blue";
print_screen "\t\t---> WORK: Replay-Todo is actualy $PLogFile[2], ", "$Color_blue";
if ( $PLogFile[2] < 0 ) {
print_screen "replaying backwards ??? Check this !!!\n", "$Color_red";
} elsif ( $PLogFile[2] > 0 ) {
@ -411,17 +345,31 @@ sub display_partner {
##########################################################################
### check actual
my $ActStatus = check_link "$mars_dir/$PRes/actual-$PName/is-primary";
my $ActDevice = check_link "$mars_dir/$PRes/actual-$PName/device-$PDevice";
print_screen "\tActual : Status=";
if ( $ActStatus eq 1 ) {
print_screen "\tActual : Status=Primary, used Device=";
convert_link "$mars_dir/$PRes/actual-$PName/device-$PDevice";
print_screen "\n";
print_screen "Primary", "$Color_green";
print_screen ", used Device=";
# hack for multiple linkversions
if ( $ActDevice eq "off") {
if ( convert_link "$mars_dir/$PRes/actual-$PName/device-$PDevice" eq "off" ) {
print_screen "on", "$Color_red";
} else {
print_screen "on", "$Color_green";
}
} else {
print_screen "on", "$Color_green";
}
} else {
print_screen "\tActual : Status=Secondary, Syncstatus=";
print_screen "Secondary", "$Color_green";
print_screen ", Syncstatus=";
convert_link "$mars_dir/$PRes/actual-$PName/copy-syncstatus-$PName";
print_screen ", Logfileupdate=";
convert_link "$mars_dir/$PRes/actual-$PName/logfile-update";
print_screen "\n";
}
print_screen ", Attached=";
convert_link "$mars_dir/$PRes/actual-$PName/is-attached";
print_screen "\n";
##########################################################################
### check switches
@ -429,59 +377,31 @@ sub display_partner {
print_screen "\tSwitches: Attach=";
if ( readlink "$mars_dir/$PRes/todo-$PName/attach" eq 1 ) { ### Use of uninitialized value in string
print_screen "on", "$Color_green";
monitoring "", "attached";
} else {
print_screen "off", "$Color_red";
monitoring "SwitchOff", "attach off";
}
print_screen " [masked:" if ( $ActStatus eq 1 );
print_screen " Connect=";
if ( readlink "$mars_dir/$PRes/todo-$PName/connect" eq 1 ) { ### Use of uninitialized value in string
print_screen "on", "$Color_green";
monitoring "", "connected";
} else {
print_screen "off", "$Color_red";
monitoring "SwitchOff", "connect off";
}
print_screen " Sync=";
if ( readlink "$mars_dir/$PRes/todo-$PName/sync" eq 1 ) { ### Use of uninitialized value in string
print_screen "on", "$Color_green";
monitoring "", "synced";
} else {
print_screen "off", "$Color_red";
monitoring "SwitchOff", "sync off";
}
print_screen " AllowReplay=" ;
if ( readlink "$mars_dir/$PRes/todo-$PName/allow-replay" eq 1 ) { ### Use of uninitialized value in string
print_screen "on", "$Color_green";
monitoring "", "replayed";
} else {
print_screen "off", "$Color_red";
monitoring "SwitchOff", "replay off";
}
print_screen "]" if ( $ActStatus eq 1 );
print_screen "\n";
##########################################################################
### node status
my $NodeStatusText = '';
foreach (@NodeStatusText) {
$NodeStatusText = "$NodeStatusText($_)";
}
### normal-modus
print_screen "\tStatus : $NodeStatusCode = $NodeStatusText\n", '';
### monitor-modus
print_screen "$NodeStatusCode [$NodeStatusText]", '', 'Dmonitor';
if ( $NodeStatusCode ne 'SwitchOff' ) {
print_screen "Disconnect [$PRes on $PName]", '', 'Cmonitor';
} else {
print_screen "Connect [$PRes on $PName]", '', 'Cmonitor';
}
### reset values
$NodeStatusCode = 'UpToDate';
@NodeStatusText = ();
return $PLogName;
}
@ -527,7 +447,7 @@ sub check_ressource {
### joined (und nicht monitor)...
if (( $ResPartner eq 1 ) && ( !$params->{'monitor'} )) {
if ( $ResPartner eq 1 ) {
### partners
opendir my $server_dh, "$mars_dir/$res" or die "Cannot open $mars_dir/$res: $!";
my @servers = grep { /^data/ && readlink "$mars_dir/$res/$_" } readdir $server_dh;
@ -638,6 +558,7 @@ sub check_logfile {
my $VersionErrorChk = 0;
my $VersionLastChk = 0;
my @VersionFile = <$mars_dir/$LResource/version-$VersionNr*>;
my $VersionNode = "";
foreach my $VersionFile (@VersionFile) {
my @VersionDetail = check_link "$VersionFile";
@VersionDetail = split (',|:', "@VersionDetail" );
@ -656,8 +577,7 @@ sub check_logfile {
} else {
print_screen "\t$Gls$Gfr$Gls$Gfr$Gfr $Gao$Gab", "$Color_red";
}
print_screen "Primary ";
$VersionNode = "Primary \t ";
} elsif ( $LogHost eq "" ) {
# none
@ -676,9 +596,9 @@ sub check_logfile {
} else {
print_screen "\t$Gls$Gfr$Gls$Gfr$Gfr $Gls$Gfr$Gau$Gab", "$Color_red";
}
print_screen "Secondary ";
$VersionNode = "Secondary";
};
print_screen sprintf ("Node: $VersionSource\t\tCheck: $VersionDetail[0]\t\tSize: $VersionDetail[2] bytes (%.3fGB)", $VersionDetail[2] / 1024 / 1024 / 1024 );
print_screen sprintf ("$VersionNode $VersionSource \tCheck: $VersionDetail[0] \tSize: $VersionDetail[2] bytes (%.3fGB)", $VersionDetail[2] / 1024 / 1024 / 1024 );
if ( $LogFile ) {
@ -722,7 +642,7 @@ sub check_logfile {
}
if ( $LogFile && $DeleteFiles eq $LogFile ) {
print_screen "\t$Gls$Gfr$Gkr$Gab", "$Color_red";
print_screen "\t\t---> HINT: LogFile ist marked for delete ($DeleteFiles).\n", "$Color_green";
print_screen "\t\t---> TODO: LogFile ist marked for delete ($DeleteFiles).\n", "$Color_green";
}
}
@ -731,22 +651,22 @@ sub check_logfile {
### same checks
if ( $VersionFileCount ne $LPartner ) {
print_screen "\t$Gls$Gfr$Gkr$Gab", "$Color_red";
print_screen "TODO: Count of Logfiles different = (Cluster has $LPartner Nodes, but only find $VersionFileCount Node)\n", "$Color_red";
print_screen "HINT: Count of Logfiles different = (Cluster has $LPartner Nodes, but only find $VersionFileCount Node)\n", "$Color_red";
}
if ( $VersionErrorSize eq 1 && $VersionErrorChk eq 1) {
# print_screen "\t$Gls$Gfr$Gao$Gab\n", "$Color_red";
} elsif ( $VersionErrorSize eq 1 ) {
print_screen "\t$Gls$Gfr$Gkr$Gab", "$Color_red";
print_screen "TODO: Logfiles has not equal size and same Checksums, ups ... \n","$Color_red";
print_screen "HINT: Logfiles has not equal size and same Checksums, ups ... \n","$Color_red";
} elsif ( $VersionErrorSize ne 1 && $VersionErrorChk eq 1 ) {
print_screen "\t$Gls$Gfr$Gkr$Gab", "$Color_red";
print_screen "TODO: Logfiles has not equal Checksums and same size, check for SPLIT-BRAIN\n","$Color_red";
print_screen "HINT: Logfiles has not equal Checksums and same size, check for SPLIT-BRAIN\n","$Color_red";
}
if ( `ls -l $mars_dir/$LResource/replay-* | grep $VersionNr` ) {
print_screen "\t$Gls$Gfr$Gao$Gab", "$Color_red";
print_screen "WORK: Version are actual and used. ", "$Color_green";
print_screen "WORK: Version are actual and used. ", "$Color_blue";
if ( $VersionErrorSize ne 1 && $VersionErrorChk ne 1) {
print_screen "Wait for start replay ...\n", "$Color_green";
} else {
@ -754,10 +674,10 @@ sub check_logfile {
}
} elsif ( !$LogFile ) {
print_screen "\t$Gls$Gfr$Gao$Gab", "$Color_red";
print_screen "WORK: Version is deleted the next log-rotate ...\n", "$Color_green";
print_screen "WORK: Version is deleted the next log-rotate ...\n", "$Color_blue";
} elsif ( !`ls -l $mars_dir/$LResource/replay-* | grep $VersionNr` && $LogFile ) {
print_screen "\t$Gls$Gfr$Gao$Gab", "$Color_red";
print_screen "WORK: Logfiles has all equal Sizes and Checksums, can be deleted ...\n", "$Color_green";
print_screen "TODO: Logfiles has all equal Sizes and Checksums, can be deleted ...\n", "$Color_green";
} else {
print "ups ??";
}
@ -766,37 +686,6 @@ sub check_logfile {
}
#########################################################################################
### diskfull
sub check_disk_is_full {
my @diskfull = glob("$mars_dir/rest-space-*");
my $diskfull_mars = "";
print_screen "-> Diskspace on Cluster:", 'bold';
if ( @diskfull ) {
foreach ( @diskfull ) {
my $diskfull_space = check_link "$_";
my $diskfull_system = $_;
$diskfull_system =~ s!/mars/rest-space-!!;
if ( $diskfull_space < 1 ) {
$diskfull_space = sprintf ("%.2f", $diskfull_space / 1024 );
if ( $diskfull_system eq $himself ) {
print_screen "\n\t-> TODO: Local Partition $mars_dir full ($diskfull_space kb Limit) !!! mars is stopping !!!\n\n", "red";
$diskfull_mars = "$diskfull_mars,$diskfull_system";
monitoring 'Failed', 'System: Mars-Disk full, MARS stopping';
} else {
print_screen "\n\t-> TODO: Remotesystem $diskfull_system have mars-disk full ($diskfull_space kb Limit) !!!\n\n", "red";
$diskfull_mars = "$diskfull_mars,$diskfull_system";
monitoring 'Failed', 'System: Remote-Mars-Disk full';
}
}
}
}
### TODO: /0
if ( !$diskfull_mars ) {
print_screen " ok\n", "$Color_green";
}
}
#########################################################################################
### check debug-files
sub check_debugfile {
@ -868,15 +757,74 @@ sub info_version {
#########################################################################################
### avg_limit
### diskfull
sub check_diskfull {
my @diskfull = glob("$mars_dir/rest-space-*");
my $diskfull_mars = "";
print_screen "-> Cluster Diskspace:", 'bold';
if ( @diskfull ) {
foreach ( @diskfull ) {
my $diskfull_space = check_link "$_";
my $diskfull_system = $_;
$diskfull_system =~ s!/mars/rest-space-!!;
if ( $diskfull_space < 1 ) {
$diskfull_space = sprintf ("%.2f", $diskfull_space / 1024 );
if ( $diskfull_system eq $himself ) {
print_screen "\n\t-> HINT: Local Partition $mars_dir full ($diskfull_space kb Limit) !!! mars is stopping !!!\n->", "$Color_red bold";
$diskfull_mars = "$diskfull_mars,$diskfull_system";
} else {
print_screen "\n\t-> HINT: Remotesystem $diskfull_system have mars-disk full ($diskfull_space kb Limit) !!!\n->", "$Color_red bold";
$diskfull_mars = "$diskfull_mars,$diskfull_system";
}
}
}
}
### TODO: /0
if ( !$diskfull_mars ) {
print_screen " smoothly ", "$Color_green";
}
}
#########################################################################################
### emergency
sub check_jammed {
my $jammed = check_link "$mars_dir/emergency-$himself";
print_screen "-> Mars-Transaktion ", 'bold';
if (!$jammed) {
print_screen "running normaly\n", "$Color_green";
print_screen " Transaktions:", 'bold';
if ( !$jammed ) {
print_screen " smoothly ", "$Color_green";
} else {
print_screen "and Replication not runnunig !!!\n", "$Color_red";
monitoring 'Failed', 'System: Replikation not running';
print_screen " and Replication not running !!!\n-> ", "$Color_red";
}
}
#########################################################################################
### connects
sub check_connects {
my $jammed = check_link "$mars_dir/emergency-$himself";
print_screen " Connects:", 'bold';
if ( !$jammed ) {
print_screen " TODO ", "$Color_green";
} else {
print_screen " TODO !!!\n", "$Color_red";
}
}
#########################################################################################
### synclimit
sub check_synclimit {
my $synclimit;
if ( open (MARS_LIMIT, "< /proc/sys/mars/sync_limit") ) {
while (<MARS_LIMIT>) {
$synclimit .= $_;
$synclimit =~ s/[\n\t]//g;
}
close MARS_LIMIT;
}
print_screen " Synclimit:", 'bold';
if ( !$synclimit ) {
print_screen " smoothly ", "$Color_green";
} else {
print_screen " set to $synclimit !!!\n", "$Color_red";
}
}
@ -1048,23 +996,6 @@ while(1) {
}
##########################################################################
### check and set monitor
### big monitor
if ( $params->{'monitor'} || $params->{'cstate'} || $params->{'dstate'} || $params->{'role'} ) {
$params->{'system'} = 1;
$params->{'history'} = 1;
### TODO: check!
$params->{'debug'} = 0;
$params->{'monitor'} = 1;
}
### small-monitor
if (( $params->{'cstate'} || $params->{'dstate'} || $params->{'role'} ) && ( !$params->{'resource'})) {
display_help "Syntax-Error: Option resource is missing by --cstate / --dstate / --role!";
}
##########################################################################
### read mars infos
info_version;
@ -1074,17 +1005,18 @@ while(1) {
### check system limits
if ( $params->{'system'} ) {
check_systemstatus;
}
##########################################################################
### check system params
check_disk_is_full;
check_diskfull;
check_jammed;
check_connects;
check_synclimit;
### check resources
print_screen "---> Resources <---\n", "$Color_blue bold";
print_screen "\n---> Resources <---\n", "$Color_blue bold";
check_ressource;
@ -1101,17 +1033,6 @@ while(1) {
}
##########################################################################
### end, exit for monitor
if ( $params->{'monitor'} ) {
if (( $StatusCode eq 'InvaliDate' || $StatusCode eq 'Failed' || $StatusCode eq 'OutDate' || $StatusCode eq 'SwitchOff' )) {
exit 1;
} else {
exit 0;
}
}
##########################################################################
### end, next loop
print color 'reset';

View File

@ -1,4 +1,4 @@
.TH mars-status 8 "December 12, 2012" "" "Mars Admin"
.TH mars-status 8 "18.03.2014" "" "Mars Status"
.SH NAME
mars-status \- program to display status information from mars
@ -12,10 +12,6 @@ mars-status \- program to display status information from mars
.br
.B "Usage: mars-status [--resource <RESNAME>] [--interval <seconds>] [[--history] [--debug] [--system]]"
.br
.B "Usage: mars-status [--resource <RESNAME>] --monitor "
.br
.B "Usage: mars-status --resource <RESNAME> [--role | --cstate | --dstate]"
.br
.B " --resource : limits the display to the specified resource"
.br
.B " --interval : refreshes the display every second xxx"
@ -24,10 +20,6 @@ mars-status \- program to display status information from mars
.br
.B " --system : display mars-system informations"
.br
.B " --monitor : indicator to use for monitoring on all state"
.br
.B " --role|--cstate|--dstate single state on lokal node"
.br
.B " --debug : additional display error messages and warnings to internal"
.br
.br
@ -75,22 +67,6 @@ Shows information about the log files, version numbers and their status.
Additional display error messages and warnings to internal.
.TP
.BR monitor
Indicator to use for monitoring.
.TP
.BR cstate
Indicator to use for monitoring.
.TP
.BR dstate
Indicator to use for monitoring.
.TP
.BR role
Indicator to use for monitoring.
.TP
.SH BUGS
Problems and errors in the program are not known ;)

Binary file not shown.

View File

@ -1,9 +1,10 @@
#!/bin/bash
# v 0.02
# modify by jms at Fri Jan 31 14:11:36 CET 2014
# v 0.04
# modify by jms at Mon Feb 17 10:44:28 CET 2014
# zabbix mars
TEMPFILE="/tmp/zabbix.mars"
FINALTEMPFILE="/tmp/zabbix.mars"
TEMPFILE="${FINALTEMPFILE}.tmp"
echo -n >$TEMPFILE
@ -23,6 +24,12 @@ else
echo "# todo $RESNAME $HOSTNAME" >>$TEMPFILE
ls -ld /mars/resource-$RESNAME/todo-$HOSTNAME/* | sed -e 's!.*/todo-.*/!!g' | awk '{print "status '$RESNAME' " $1 " " $3}' >>$TEMPFILE
done
# emergency
echo -n "emergency-modus " >>$TEMPFILE
ls -l /mars/emergency-$HOSTNAME | awk '{print $11}' >>$TEMPFILE
fi
mv $TEMPFILE $FINALTEMPFILE

View File

@ -1,6 +1,16 @@
# mars config
# v002
# v003 - Wed Feb 19 11:01:19 CET 2014
# values:
# -------
# 1 - /proc/sys/block/mars/[filename]
UserParameter=system.mars[*], ( grep $1 /tmp/zabbix.mars || echo '0 0' ) | awk '{print $$2}'
UserParameter=system.marsstatus[*], grep $1 /tmp/zabbix.mars | grep ' $2 ' | awk '{print $$4}' | head -n 1
# values:
# -------
# 1 - ressource-name
# 2 - statuslink-name
UserParameter=system.marsstatus[*], ( grep $1 /tmp/zabbix.mars | grep ' $2 ' || echo '0 0 0 0') | awk '{print $$4}' | head -n 1

View File

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<zabbix_export>
<version>2.0</version>
<date>2014-02-05T12:30:02Z</date>
<date>2014-03-18T14:18:06Z</date>
<groups>
<group>
<name>Templates</name>
@ -9,8 +9,8 @@
</groups>
<templates>
<template>
<template>Template Mars Server</template>
<name>Template Mars Server</name>
<template>Template App Mars Server</template>
<name>Template App Mars Server</name>
<groups>
<group>
<name>Templates</name>
@ -34,7 +34,7 @@
<key>system.mars[copy_read_max_fly]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -73,7 +73,7 @@
<key>system.mars[copy_write_max_fly]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -112,7 +112,7 @@
<key>system.mars[io_flying_count]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -151,7 +151,7 @@
<key>system.mars[io_tuning/writeback_rate_kb]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -190,7 +190,7 @@
<key>system.mars[io_tuning/writeback_until_percent]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -229,7 +229,7 @@
<key>vfs.file.time[/tmp/zabbix.mars,change]</key>
<delay>300</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -268,7 +268,7 @@
<key>system.mars[logger_mem_used_kb]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -307,7 +307,7 @@
<key>system.mars[mapfree_period_sec]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -346,7 +346,7 @@
<key>system.modules[mars]</key>
<delay>30</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -385,7 +385,7 @@
<key>proc.num[mars_light]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -424,7 +424,7 @@
<key>system.marsstatus[{$RESNAME},allow-replay]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -463,7 +463,7 @@
<key>system.marsstatus[{$RESNAME},attach]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -502,7 +502,46 @@
<key>system.marsstatus[{$RESNAME},connect]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authpassphrase/>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>Mars Server Status</name>
</application>
</applications>
<valuemap/>
</item>
<item>
<name>Mars Server Status - emergency</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>system.mars[emergency-modus]</key>
<delay>60</delay>
<history>90</history>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -541,7 +580,7 @@
<key>system.marsstatus[{$RESNAME},is-attached]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -580,7 +619,7 @@
<key>system.marsstatus[{$RESNAME},is-copying]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -619,7 +658,7 @@
<key>system.marsstatus[{$RESNAME},is-primary]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -658,7 +697,7 @@
<key>system.marsstatus[{$RESNAME},is-replaying]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -697,7 +736,7 @@
<key>system.marsstatus[{$RESNAME},is-syncing]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -736,7 +775,7 @@
<key>system.marsstatus[{$RESNAME},open-count]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -775,7 +814,7 @@
<key>system.marsstatus[{$RESNAME},replay_rate]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -814,7 +853,7 @@
<key>system.marsstatus[{$RESNAME},sync]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -853,7 +892,7 @@
<key>system.mars[propagate_interval_sec]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -892,7 +931,7 @@
<key>system.mars[scan_interval_sec]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -931,7 +970,7 @@
<key>system.mars[statusfiles_rollover_sec]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -970,7 +1009,7 @@
<key>system.mars[sync_flip_interval_sec]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -1009,7 +1048,7 @@
<key>system.mars[traffic_tuning/client_role_traffic_rate_kb]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -1048,7 +1087,7 @@
<key>system.mars[traffic_tuning/server_role_traffic_rate_kb]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -1087,7 +1126,7 @@
<key>system.mars[traffic_tuning/writeback_rate_kb]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -1126,7 +1165,7 @@
<key>system.mars[traffic_tuning/writeback_until_percent]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -1165,7 +1204,7 @@
<key>system.mars[traffic_tunning/client_role_traffic_count_ops]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -1204,7 +1243,7 @@
<key>system.mars[traffic_tunning/server_role_traffic_count_ops]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -1243,7 +1282,7 @@
<key>system.mars[write_throttle_count_ops]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -1282,7 +1321,7 @@
<key>system.mars[write_throttle_rate_kb]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<trends>90</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
@ -1344,7 +1383,7 @@
<sort_triggers>0</sort_triggers>
<resource>
<name>mars count percent</name>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
</resource>
</screen_item>
<screen_item>
@ -1364,7 +1403,7 @@
<sort_triggers>0</sort_triggers>
<resource>
<name>mars count sec</name>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
</resource>
</screen_item>
<screen_item>
@ -1384,7 +1423,7 @@
<sort_triggers>0</sort_triggers>
<resource>
<name>mars count kb</name>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
</resource>
</screen_item>
<screen_item>
@ -1404,7 +1443,7 @@
<sort_triggers>0</sort_triggers>
<resource>
<name>mars count ops</name>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
</resource>
</screen_item>
<screen_item>
@ -1424,7 +1463,7 @@
<sort_triggers>0</sort_triggers>
<resource>
<name>mars count io fly</name>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
</resource>
</screen_item>
<screen_item>
@ -1444,7 +1483,7 @@
<sort_triggers>0</sort_triggers>
<resource>
<name>mars count copy fly</name>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
</resource>
</screen_item>
</screen_items>
@ -1454,7 +1493,7 @@
</templates>
<triggers>
<trigger>
<expression>{Template Mars Server:vfs.file.time[/tmp/zabbix.mars,change].fuzzytime(931)}=0</expression>
<expression>{Template App Mars Server:vfs.file.time[/tmp/zabbix.mars,change].fuzzytime(931)}=0</expression>
<name>Mars check not running on {HOST.NAME}</name>
<url/>
<status>0</status>
@ -1464,7 +1503,17 @@
<dependencies/>
</trigger>
<trigger>
<expression>{Template Mars Server:system.marsstatus[{$RESNAME},allow-replay].last(0)}=0 &amp; {Template Mars Server:system.modules[mars].last(0)}=1 &amp; {Template Mars Server:proc.num[mars_light].last(0)}=1</expression>
<expression>{Template App Mars Server:system.mars[emergency-modus].last(0)}=1</expression>
<name>MARS Device on {HOST.NAME} emergency !</name>
<url/>
<status>0</status>
<priority>5</priority>
<description/>
<type>0</type>
<dependencies/>
</trigger>
<trigger>
<expression>{Template App Mars Server:system.marsstatus[{$RESNAME},allow-replay].last(0)}=0 &amp; {Template App Mars Server:system.modules[mars].last(0)}=1 &amp; {Template App Mars Server:proc.num[mars_light].last(0)}=1</expression>
<name>MARS Device on {HOST.NAME} not allowed replay</name>
<url/>
<status>0</status>
@ -1474,7 +1523,17 @@
<dependencies/>
</trigger>
<trigger>
<expression>{Template Mars Server:system.marsstatus[{$RESNAME},attach].last(0)}=0 &amp; {Template Mars Server:system.modules[mars].last(0)}=1 &amp; {Template Mars Server:proc.num[mars_light].last(0)}=1</expression>
<expression>{Template App Mars Server:system.marsstatus[{$RESNAME},is-attached].last(0)}=0 &amp; {Template App Mars Server:system.marsstatus[{$RESNAME},attach].last(0)}=1 &amp; {Template App Mars Server:system.modules[mars].last(0)}=1 &amp; {Template App Mars Server:proc.num[mars_light].last(0)}=1</expression>
<name>MARS Device on {HOST.NAME} not attached</name>
<url/>
<status>0</status>
<priority>1</priority>
<description/>
<type>0</type>
<dependencies/>
</trigger>
<trigger>
<expression>{Template App Mars Server:system.marsstatus[{$RESNAME},attach].last(#2)}=0 &amp; {Template App Mars Server:system.modules[mars].last(#2)}=1 &amp; {Template App Mars Server:proc.num[mars_light].last(#2)}=1</expression>
<name>MARS Device on {HOST.NAME} not attached</name>
<url/>
<status>0</status>
@ -1484,7 +1543,17 @@
<dependencies/>
</trigger>
<trigger>
<expression>{Template Mars Server:system.marsstatus[{$RESNAME},connect].last(0)}=0 &amp; {Template Mars Server:system.modules[mars].last(0)}=1 &amp; {Template Mars Server:proc.num[mars_light].last(0)}=1</expression>
<expression>{Template App Mars Server:system.marsstatus[{$RESNAME},attach].last(#2)}=0</expression>
<name>MARS Device on {HOST.NAME} not attached</name>
<url/>
<status>0</status>
<priority>1</priority>
<description>{Template App Mars Server:system.marsstatus[{$RESNAME},is-attached].last(0)}=0 &amp; {Template App Mars Server:system.marsstatus[{$RESNAME},attach].last(0)}=1 &amp; {Template App Mars Server:system.modules[mars].last(0)}=1 &amp; {Template App Mars Server:proc.num[mars_light].last(0)}=1</description>
<type>0</type>
<dependencies/>
</trigger>
<trigger>
<expression>{Template App Mars Server:system.marsstatus[{$RESNAME},connect].last(0)}=0 &amp; {Template App Mars Server:system.modules[mars].last(0)}=1 &amp; {Template App Mars Server:proc.num[mars_light].last(0)}=1</expression>
<name>MARS Device on {HOST.NAME} not connected</name>
<url/>
<status>0</status>
@ -1494,17 +1563,7 @@
<dependencies/>
</trigger>
<trigger>
<expression>{Template Mars Server:system.marsstatus[{$RESNAME},is-attached].last(0)}=0 &amp; {Template Mars Server:system.marsstatus[{$RESNAME},attach].last(0)}=1 &amp; {Template Mars Server:system.modules[mars].last(0)}=1 &amp; {Template Mars Server:proc.num[mars_light].last(0)}=1</expression>
<name>MARS Device on {HOST.NAME} not is-attached</name>
<url/>
<status>0</status>
<priority>1</priority>
<description/>
<type>0</type>
<dependencies/>
</trigger>
<trigger>
<expression>{Template Mars Server:system.marsstatus[{$RESNAME},is-replaying].last(0)}=0 &amp; {Template Mars Server:system.marsstatus[{$RESNAME},allow-replay].last(0)}=1 &amp; {Template Mars Server:system.modules[mars].last(0)}=1 &amp; {Template Mars Server:proc.num[mars_light].last(0)}=1</expression>
<expression>{Template App Mars Server:system.marsstatus[{$RESNAME},is-replaying].last(0)}=0 &amp; {Template App Mars Server:system.marsstatus[{$RESNAME},allow-replay].last(0)}=1 &amp; {Template App Mars Server:system.modules[mars].last(0)}=1 &amp; {Template App Mars Server:proc.num[mars_light].last(0)}=1</expression>
<name>MARS Device on {HOST.NAME} not is-replaying</name>
<url/>
<status>1</status>
@ -1514,7 +1573,7 @@
<dependencies/>
</trigger>
<trigger>
<expression>{Template Mars Server:system.marsstatus[{$RESNAME},sync].last(0)}=0 &amp; {Template Mars Server:system.modules[mars].last(0)}=1 &amp; {Template Mars Server:proc.num[mars_light].last(0)}=1</expression>
<expression>{Template App Mars Server:system.marsstatus[{$RESNAME},sync].last(0)}=0 &amp; {Template App Mars Server:system.modules[mars].last(0)}=1 &amp; {Template App Mars Server:proc.num[mars_light].last(0)}=1</expression>
<name>MARS Device on {HOST.NAME} not sync</name>
<url/>
<status>0</status>
@ -1524,7 +1583,7 @@
<dependencies/>
</trigger>
<trigger>
<expression>{Template Mars Server:system.modules[mars].last(0)}=0 | {Template Mars Server:proc.num[mars_light].last(0)}=0</expression>
<expression>{Template App Mars Server:system.modules[mars].last(0)}=0 | {Template App Mars Server:proc.num[mars_light].last(0)}=0</expression>
<name>MARS Module on {HOST.NAME} not loaded or running</name>
<url/>
<status>0</status>
@ -1561,7 +1620,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[copy_read_max_fly]</key>
</item>
</graph_item>
@ -1573,7 +1632,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[copy_write_max_fly]</key>
</item>
</graph_item>
@ -1605,7 +1664,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[io_flying_count]</key>
</item>
</graph_item>
@ -1637,7 +1696,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[io_tuning/writeback_rate_kb]</key>
</item>
</graph_item>
@ -1649,7 +1708,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[logger_mem_used_kb]</key>
</item>
</graph_item>
@ -1661,7 +1720,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[traffic_tuning/client_role_traffic_rate_kb]</key>
</item>
</graph_item>
@ -1673,7 +1732,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[traffic_tuning/server_role_traffic_rate_kb]</key>
</item>
</graph_item>
@ -1685,7 +1744,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[traffic_tuning/writeback_rate_kb]</key>
</item>
</graph_item>
@ -1697,7 +1756,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[write_throttle_rate_kb]</key>
</item>
</graph_item>
@ -1729,7 +1788,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[traffic_tunning/client_role_traffic_count_ops]</key>
</item>
</graph_item>
@ -1741,7 +1800,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[traffic_tunning/server_role_traffic_count_ops]</key>
</item>
</graph_item>
@ -1753,7 +1812,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[write_throttle_count_ops]</key>
</item>
</graph_item>
@ -1785,7 +1844,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[io_tuning/writeback_until_percent]</key>
</item>
</graph_item>
@ -1817,7 +1876,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[mapfree_period_sec]</key>
</item>
</graph_item>
@ -1829,7 +1888,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[propagate_interval_sec]</key>
</item>
</graph_item>
@ -1841,7 +1900,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[scan_interval_sec]</key>
</item>
</graph_item>
@ -1853,7 +1912,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[statusfiles_rollover_sec]</key>
</item>
</graph_item>
@ -1865,7 +1924,7 @@
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Mars Server</host>
<host>Template App Mars Server</host>
<key>system.mars[sync_flip_interval_sec]</key>
</item>
</graph_item>