mars/userspace/mars-status

983 lines
35 KiB
Perl

#!/usr/bin/perl -w
# (c) 2012 Joerg Mann / 1&1 Internet AG
#
# $Id: a4e4e506b549c83b43a7e94b7f9fc475fe977e37 $
# last update at now ...
# TODO:
# check todo-global delete-logfiles
# check nachtaegliches join (log-v-4 ...), resize, delay, statusvalues
# check monitoring -> redesign statuscodes
###
use warnings;
use strict;
use English;
use Getopt::Long;
use Term::ANSIColor;
use Date::Language;
use POSIX qw(strftime);
use File::Basename;
### defaults
my $version = "0.071";
my $alife_timeout = "30"; # sec for remote-nodes timeout
my $is_tty = 0;
my $mars_dir = '/mars';
my $himself = `uname -n` or die "cannot determine my network node name\n";
my $clearscreen = `clear`;
my $StatusCode = 'UpToDate';
my @StatusText = ();
my $NodeStatusCode = 'UpToDate';
my @NodeStatusText = ();
chomp $himself;
my $Color_blue = 'yellow';
my $Color_green = 'green';
### ARGV
# Optionen in Hash-Ref parsen
my $params = {};
GetOptions( $params, 'help', 'h', 'version', 'v', 'monitor', 'role', 'cstate', 'dstate', 'resource=s', 'system', 'interval=i', 'history', 'debug' );
### small help
sub display_help {
my $HelpText = shift;
print "$HelpText\n\n" if ($HelpText);
print "Usage: mars-status [--help]\n";
print "Usage: mars-status [--version]\n";
print "Usage: mars-status (without specification of parameters, an abstract of all the informations spent)\n";
print "Usage: mars-status [--resource <RESNAME>] [--interval <seconds>] | [--history] | [--debug ] | [--system]\n";
print "Usage: mars-status [--resource <RESNAME>] --monitor \n";
print "Usage: mars-status --resource <RESNAME> [--role | --cstate | --dstate]\n";
print " --resource : limits the display to the specified resource\n";
print " --interval : refreshes the display every second xxx\n";
print " --history : shows information about the log files, version numbers and their status\n";
print " --system : display mars-system informations\n";
print " --monitor : indicator to use for monitoring on all state (by local node only !)\n";
print " --role|--cstate|--dstate single state on lokal node\n";
print " --debug : additional display debug messages\n\n";
print "Usage small include rotate : mars-status --interval 2\n";
print "Usage monitoring : mars-status --monitor\n";
print "Usage monitoring drbd-linke : mars-status --cstate (or --dstate or --role)\n";
print "Usage full, include debug : mars-status --system --history --debug\n\n";
print "Advanced information are also available here: http://http://wiki.intranet.1and1.com/ ->ProjektTEC1603 ->TECITO.1735 -> MARS\n";
exit;
}
if($params->{help} || $params->{h} ) {
display_help;
}
# Farbe zuruecksetzen
$SIG{INT} = sub {
print color 'reset';
print $clearscreen;
exit;
};
#########################################################################################
### figure out TTY
my $tty = readlink '/dev/stdout';
while ( my $temp = readlink $tty ) {
$tty = $temp;
}
if ( $tty =~ /^\/dev\/pts\// ) {
$is_tty = 1;
} elsif ( $tty =~ /^\/dev\/tty/ ) {
$is_tty = 1;
}
#########################################################################################
### print color
sub print_screen {
my $Text = shift;
my $Color = shift;
my $Level = shift;
### default
if ( !$params->{'monitor'} && !$Level ){
$Color = 'FAINT' if (!$Color);
print color "$Color" if ( $is_tty );
print "$Text";
print color 'reset' if ( $is_tty );
### monitor
} elsif ( $params->{'monitor'} && $Level ) {
if ( $params->{'role'} && $Level eq 'Rmonitor' ) {
print "$Text\n";
exit;
} elsif ( $params->{'dstate'} && $Level eq 'Dmonitor' ) {
print "$Text\n";
exit;
} elsif ( $params->{'cstate'} && $Level eq 'Cmonitor' ) {
print "$Text\n";
exit;
} elsif ( !$params->{'role'} && !$params->{'dstate'} && !$params->{'cstate'}) {
print "$Text\n";
}
}
}
#########################################################################################
### read link
sub check_link {
my $dir = shift;
my $result = readlink $dir;
if ( !$result ) {
return 0;
} else {
return $result;
}
}
#########################################################################################
### read links
sub convert_link {
my $link = shift;
$link = check_link "$link";
if (( !$link ) || ( $link eq 0 )) {
print_screen "off", 'red';
} else {
print_screen "on", "$Color_green";
}
return $link;
}
#########################################################################################
### StatusCode
sub monitoring {
my $Code = shift;
my $Text = shift;
# UpToDate - eishokey
# UpDateIng - worker
# OutDate - replaying
# InvaliDate - syncing
# SwitchOff - SwitchOff
# Failed - system, network, uae.
# unknown - not joined
$Code = "UpToDate" if ( $Code eq '' );
# global
if ( $StatusCode ne 'UpToDate') {
$StatusCode = $Code;
}
# local-node
if ( $Code ne 'UpToDate' ) {
$NodeStatusCode = $Code;
}
push @StatusText, $Text;
push @NodeStatusText, $Text;
}
#########################################################################################
### sub display resource-partner
sub display_partner {
my %p = @_;
my $PRes = $p{ressource};
my $PName = $p{nodename};
my $PSize = $p{ressource_size};
my $ref_ResPartner = $p{res_partner};
my $ref_AULogfile = $p{res_AULogfile};
my $PStatus = check_link "$mars_dir/$PRes/primary";
my $PDevice = check_link "$mars_dir/$PRes/device-$PName";
my $Ljoined = check_link "$mars_dir/$PRes/device-$himself";
##########################################################################
### check status
if ( $PStatus eq $PName ) {
print_screen "Primary", "$Color_blue";
print_screen "Primary [$PRes on $PName]",'', 'Rmonitor';
monitoring '', "joined";
} else {
if ( $PDevice eq 0 ) {
print_screen "not joined",'red';
print_screen "not joined\n",'', 'Rmonitor';
print_screen " -> Resource is not joined to this node\n", 'red';
monitoring "unknown", "not joined";
return;
} else {
print_screen "Secondary", "$Color_blue";
print_screen "Secondary [$PRes on $PName]",'', 'Rmonitor';
monitoring "", "joined";
}
}
##########################################################################
### check alive
my @PAlive = lstat("$mars_dir/alive-$PName");
if ( !$PAlive[9] ) { $PAlive[9] = 0 };
my $PAlive = time()- $PAlive[9] - $alife_timeout;
print_screen ", System", '';
if ( $PAlive > 1 ) {
print_screen " unknown (last message before $PAlive sec) !!!\n", 'red';
monitoring "Failed", "not alive"
} else {
print_screen " alive\n", "$Color_green";
monitoring "", "alive";
}
##########################################################################
### check device
print_screen "\tDevices : Disk-Device ".check_link "$mars_dir/$PRes/data-$PName";
print_screen ", used as Mars-Device /dev/mars/$PDevice";
my $ASize = check_link "$mars_dir/$PRes/actsize-$PName";
if ( $PSize eq $ASize) {
print_screen ", not resized";
} else {
print_screen "\n\t\t---> HINT: resizing used ($PSize != $ASize)",'red';
}
##########################################################################
### check mountpint
if ( $himself eq $PName ) {
my $PUDevice = "/dev/mars/$PDevice";
if ( stat( $PUDevice) ) {
open my $fh, '<', '/proc/mounts' or die $!;
$PUDevice = ( grep { /^$PUDevice / } <$fh> )[0];
if ( $PUDevice ) {
$PUDevice = ( split / /, $PUDevice )[1];
print_screen " and mountet as $PUDevice\n", "$Color_blue";
} else {
print_screen "\n\t\t---> TODO: enable to mount\n", "$Color_green";
}
} else {
print_screen "\n\t\t---> HINT: unable to mount, Device is Secondary or mars is starting\n", "$Color_blue";
}
} else {
print_screen "\n";
}
$$ref_ResPartner++;
##########################################################################
### check sync
### sync - status
my $PSyncsize = check_link "$mars_dir/$PRes/syncstatus-$PName";
my $SStatus = sprintf("%.2f", ($PSyncsize / $PSize * 100));
print_screen (sprintf "\tSync : %s bytes (%.3fTB) synced = ", $PSyncsize, ( $PSyncsize/1024/1024/1024/1024));
### sync - speed
my $SSpeed = check_link "$mars_dir/$PRes/actual-$PName/sync_rate";
$SSpeed = sprintf ("%.2f", $SSpeed / 1024 / 1024);
if ( $SSpeed eq "0.00" ) {
$SSpeed = "%";
} else {
$SSpeed = "%, by $SSpeed mb/s";
}
### sync - results
if ( $SStatus < 100) {
print_screen "$SStatus$SSpeed\n";
print_screen "\t\t---> WORK: Sync in progress = ($SStatus% < 100.00%)\n", 'red';
monitoring "InvaliDate", "not in sync ($SStatus%)";
} else {
print_screen "$SStatus$SSpeed\n", "$Color_green";
monitoring "", "synced";
}
##########################################################################
### TODO: work by resize ...
##########################################################################
### check logfile
### logfile - status
my @PLogFile = split (',', check_link "$mars_dir/$PRes/replay-$PName" );
my @PLogLink = split ("-", $PLogFile[0]);
### TODO: kein Logfile vorhanden ... mmh.
my $PLogName = "$PLogLink[0]-$PLogLink[1]";
my $PLogSize = -s "$mars_dir/$PRes/$PLogFile[0]";
if ( !$PLogFile[1] ) {
$PLogFile[1] = 0; $PLogFile[2] = 0;
}
$PLogSize = 0.0001 if (( !$PLogSize ) || ( $PLogSize eq 0 ));
my $LogSpeed = check_link "$mars_dir/$PRes/actual-$PName/file_rate";
$LogSpeed = sprintf ("%.2f", $LogSpeed / 1024 / 1024);
### TODO:
### logfile - delay
### logfile - results
print_screen (sprintf "\tLogfile : %s bytes (%.3fGB) in ", $PLogSize, ( $PLogSize/1024/1024/1024 ));
print_screen "$PLogName", "$Color_green";
print_screen " active";
print_screen ", received with $LogSpeed mb/s" if ( $LogSpeed ne "0.00" );
print_screen "\n";
if ( $Ljoined eq "0" || $PLogSize eq "0.0001" ) {
print_screen "\t\t---> WORK: Logfile empty = (Size: $PLogSize)\n", 'red';
}
if ( ( $ref_AULogfile ) && !($PLogName eq $ref_AULogfile) ) {
print_screen "\t\t---> HINT: Logfile Version not actual = ($PLogName ! $ref_AULogfile)\n", 'red';
}
##########################################################################
### check replay ...
### replay - status
my $RStatus = sprintf("%.2f", ( $PLogFile[1] / $PLogSize * 100));
$RStatus = 0 if ( $Ljoined eq "0" || $PLogSize eq "1" );
$RStatus = 99.99 if (( $PLogFile[1] ne $PLogSize ) && ( $RStatus eq "100.00" ));
print_screen sprintf ("\tReplayed: %s bytes (%.3fGB) now replayed, Todo %d (%.3fGB) = ",
$PLogFile[1], ( $PLogFile[1]/1024/1024/1024 ), $PLogFile[2], ( $PLogFile[2]/1024/1024/1024 ));
### replay - speed
my $RSpeed = check_link "$mars_dir/$PRes/actual-$PName/replay_rate";
$RSpeed = sprintf ("%.2f", $RSpeed / 1024 / 1024);
if ( $RSpeed eq "0.00" ) {
$RSpeed = "%";
} else {
$RSpeed = "%, by $RSpeed mb/s";
}
### replay - results
if (( $RStatus < 1 ) && ( $PLogSize != 0.0001 )) {
print_screen "$RStatus$RSpeed\n";
print_screen "\t\t---> HINT: Replay not started, Logfile inactive = (Size: $PLogSize)\n", 'red';
monitoring "OutDate", "replay stopped";
} elsif (( $RStatus < 100 ) && ( $PLogSize != 0.0001 )) {
print_screen "$RStatus$RSpeed\n";
print_screen "\t\t---> WORK: Replay in progress = ($RStatus% < 100.00%)\n", 'red';
monitoring "UpDateIng", "replay running1";
} elsif ( $PLogFile[2] > 0 ) {
$RStatus = sprintf("%.2f", ($PLogFile[1]-$PLogFile[2])/$PLogFile[1] * 100);
print_screen "$RStatus$RSpeed\n", 'red';
monitoring "UpDateIng", "replay running2";
} elsif ( $PLogSize = 0.0001 ) {
$RStatus = "100.00";
print_screen "$RStatus$RSpeed\n", "$Color_green";
monitoring '', "replay wait";
} else {
print_screen "$RStatus% $RSpeed\n", "$Color_green";
monitoring '', "replaying";
}
### replay - hints
if ($PLogFile[2] != 0) {
print_screen "\t\t---> HINT: Replay-Todo is actualy $PLogFile[2], ", "$Color_blue";
if ( $PLogFile[2] < 0 ) {
print_screen "replaying backwards ??? Check this !!!\n", 'red';
} elsif ( $PLogFile[2] > 0 ) {
print_screen "mars it's working ...\n";
} else {
print_screen "replaying working unknown ... Check this !!!\n", 'red';
}
}
##########################################################################
### check actual
my $ActStatus = check_link "$mars_dir/$PRes/actual-$PName/is-primary";
if ( $ActStatus eq 1 ) {
print_screen "\tActual : Status=Primary, used Device=";
convert_link "$mars_dir/$PRes/actual-$PName/device-$PDevice";
print_screen "\n";
} else {
print_screen "\tActual : Status=Secondary, Syncstatus=";
convert_link "$mars_dir/$PRes/actual-$PName/copy-syncstatus-$PName";
print_screen ", Logfileupdate=";
convert_link "$mars_dir/$PRes/actual-$PName/logfile-update";
print_screen "\n";
}
##########################################################################
### check switches
my $SWStatus;
print_screen "\tSwitches: Attach=";
if ( readlink "$mars_dir/$PRes/todo-$PName/attach" eq 1 ) {
print_screen "on", "$Color_green";
monitoring "", "attached";
} else {
print_screen "off", 'red';
monitoring "SwitchOff", "attach off";
}
print_screen " [masked:" if ( $ActStatus eq 1 );
print_screen " Connect=";
if ( readlink "$mars_dir/$PRes/todo-$PName/connect" eq 1 ) {
print_screen "on", "$Color_green";
monitoring "", "connected";
} else {
print_screen "off", 'red';
monitoring "SwitchOff", "connect off";
}
print_screen " Sync=";
if ( readlink "$mars_dir/$PRes/todo-$PName/sync" eq 1 ) {
print_screen "on", "$Color_green";
monitoring "", "synced";
} else {
print_screen "off", 'red';
monitoring "SwitchOff", "sync off";
}
print_screen " AllowReplay=" ;
if ( readlink "$mars_dir/$PRes/todo-$PName/allow-replay" eq 1 ) {
print_screen "on", "$Color_green";
monitoring "", "replayed";
} else {
print_screen "off", 'red';
monitoring "SwitchOff", "replay off";
}
print_screen "]" if ( $ActStatus eq 1 );
print_screen "\n";
##########################################################################
### node status
my $NodeStatusText = '';
foreach (@NodeStatusText) {
$NodeStatusText = "$NodeStatusText($_)";
}
### normal-modus
print_screen "\tStatus : $NodeStatusCode = $NodeStatusText\n", '';
### monitor-modus
print_screen "$NodeStatusCode [$NodeStatusText]", '', 'Dmonitor';
if ( $NodeStatusCode eq 'SwitchOff' ) {
print_screen "Disconnect [$PRes on $PName]", '', 'Cmonitor';
} else {
print_screen "Connect [$PRes on $PName]", '', 'Cmonitor';
}
### reset values
$NodeStatusCode = 'UpToDate';
@NodeStatusText = ();
return $PLogName;
}
#########################################################################################
### check ressources
sub check_ressource {
opendir my $dirhandle, $mars_dir or die "Cannot open $mars_dir: $!";
my @resources = grep { /^res/ && -d "$mars_dir/$_" } readdir $dirhandle;
if ( !@resources ) {
print_screen "---> HINT: no resources found\n", 'red';
next;
}
### read resources
foreach my $res (@resources) {
my $ResPartner = 0;
my $res_name = $res;
$res_name =~ s/^resource-//;
if ( $params->{'resource'} ) {
if (!( $params->{'resource'} eq $res_name)) {
next;
}
}
my $res_size = check_link "$mars_dir/$res/size";
if ( $res_size eq 0 ) { $res_size = 1 };
my $res_tbsize = ( $res_size) / 1024 / 1024 /1024 / 1024;
my $res_master = check_link "$mars_dir/$res/primary";
if ( $res_master eq 0 ) { $res_master = "unknown" };
#print_screen sprintf("-> check resource %s, with %d bytes (%.3fTB), Primary Node is %s\n", $res_name, $res_size, $res_tbsize, $res_master), 'bold';
print_screen sprintf("-> check resource %s, with %.3fTB, Primary Node is %s\n", $res_name, $res_tbsize, $res_master), 'bold';
### him self
print_screen " -> local Node ($himself) as ",'bold';
my $ActualUsedLogfile = display_partner(
ressource => $res,
nodename => $himself,
ressource_size => $res_size,
res_partner => \$ResPartner,
res_AULogfile => "",
);
# end him self
### joined (und nicht monitor)...
if (( $ResPartner eq 1 ) && ( !$params->{'monitor'} )) {
### partners
opendir my $server_dh, "$mars_dir/$res" or die "Cannot open $mars_dir/$res: $!";
my @servers = grep { /^data/ && readlink "$mars_dir/$res/$_" } readdir $server_dh;
@servers = sort (@servers);
foreach my $partner (@servers) {
$partner =~ s/^data-//;
if ( $partner eq $himself ) { next; }
print_screen " -> remote Node ($partner) as ", 'bold';
display_partner(
ressource => $res,
nodename => $partner,
ressource_size => $res_size,
res_partner => \$ResPartner,
res_AULogfile => $ActualUsedLogfile,
);
}
} # end joined
### modus
if ( $ResPartner eq 0 ) {
print_screen " -> modus for $res_name is remote ($ResPartner nodes)\n",'bold';
} elsif ( $ResPartner eq 1 ) {
print_screen " -> modus for $res_name is standalone ($ResPartner node)\n",'bold';
} else {
print_screen " -> modus for $res_name is clustered ($ResPartner nodes)\n ",'bold';
}
### resources history
if ( $params->{'history'} ) {
check_logfile( $res, $ResPartner );
}
### check resources debug
if ($params->{'debug'}) {
print_screen " -> $res-Debug:\n", "$Color_blue";
my $debug_res;
### TODO: small hack, read 3 files ...
$debug_res = check_debugfile("$res", "2.warn"); print_screen "$debug_res" if ( $debug_res );
$debug_res = check_debugfile("$res", "3.error"); print_screen "$debug_res" if ( $debug_res );
$debug_res = check_debugfile("$res", "4.fatal"); print_screen "$debug_res" if ( $debug_res );
}
} # end foreach
}
#########################################################################################
###
sub check_logfile {
my $LResource = shift;
my $LPartner = shift;
my $oldEqual = 0;
my $LogFailed = 0;
my $LogCount = 0;
my $LogCountSum = 0;
my $LogCountNow = 1;
my @logfile = <$mars_dir/$LResource/log*>;
### mal fix zaehlen ...
foreach (@logfile) {
$LogCountSum++;
}
print_screen " -> History Replay/Status\n", "$Color_blue";
### search all logfiles
foreach my $logfile (@logfile) {
my $LVersion = $logfile;
$LVersion =~ s/^.*log-([0-9]+)-.*$/$1/;
my $LogStatus = check_link "$logfile";
my $allEqual = 1; ### logfiles gleich
my $OldCheck; ### checksum from versionfile
my $OldSize; ### size from versionfile
if ( $LogStatus eq 0 ) {
### found logfile
my $LogSize = -s "$logfile";
if ( !$LogSize ) { $LogSize=0; }
### logfile stat-values
my @LogStat = stat ( $logfile );
$LogStat[10] = gmtime($LogStat[10]);
### quickfix ...
if ($LogStat[9] > $LogStat[8]) {
$LogStat[9] = $LogStat[9] - $LogStat[8];
} else {
$LogStat[9] = $LogStat[8] - $LogStat[9];
}
print_screen (sprintf "\tLogfile Version: $LVersion Size: $LogSize bytes (%.3fGB) from %s, include hypothetically %s sec\n",
($LogSize /1024/1024/1024), $LogStat[10], $LogStat[9]);
### search all logfile version
my @LVersion = <$mars_dir/$LResource/version-$LVersion*>;
foreach my $LVersion (@LVersion) {
### search version
my @LogDetail = split (',', check_link "$LVersion" );
my $LogServer = $LVersion;
### search size of logfile
my $ActLogSize = $LogSize - $LogDetail[2];
### search name of source
$LogServer =~ s/.*[0-9]-//;
$LogCount++;
### output
print_screen (sprintf "\t\tSource: $LogServer \tCheck: $LogDetail[0] \tTodo: %.3fGB \tReplayPosition: $LogDetail[2]\n", $ActLogSize/1024/1024/1024);
### new versionfile
if ( !defined $OldCheck ) {
$OldCheck = $LogDetail[0];
$OldSize = $LogDetail[2];
$LogFailed = 0;
$allEqual = 1;
### checksum different
} elsif ( ($LogDetail[0] ne $OldCheck) and ( $LogDetail[2] eq $OldSize ) and ( $LogDetail[2] ne 0) ) {
print_screen "\t\t---> TODO: Logfiles has not equal Checksums and same size, check for SPLIT-BRAIN\n",'red';
$LogFailed = 1;
$allEqual = 0;
### value different
} elsif ( ($LogDetail[0] ne $OldCheck ) or ( $LogDetail[2] ne $OldSize ) ) {
$LogFailed = 1;
$allEqual = 0;
### eishokey
} else {
$LogFailed = 0;
$allEqual = 1;
}
} # end foreach
if ( $allEqual eq 1 ) {
$oldEqual = 1;
} else {
$oldEqual = 0;
}
### check Count Logfiles
if ( !($LogCount eq $LPartner) ) {
print_screen "\t\t---> TODO: Count of Logfiles different = (have:$LPartner found:$LogCount)\n", 'red';
$LogFailed = 1;
$oldEqual = 0;
} elsif ( `ls -l $mars_dir/$LResource/replay-* | grep $LVersion` ) {
print_screen "\t\t---> WORK: Logfiles are actual and used, Replay in progess...\n", 'red';
} elsif ( $LogFailed eq 1 ) {
print_screen "\t\t---> WORK: Logfiles has not equal Checksums and different size, Reception in progress ...\n", 'red';
} elsif ( $LogCountSum eq $LogCountNow ) {
print_screen "\t\t---> WORK: Logfiles are actual and unused(1).\n", "$Color_green";
} elsif (( $oldEqual eq 1 ) && ( $OldSize eq 0 )) {
print_screen "\t\t---> WORK: Logfiles are actual and unused(2).\n", "$Color_green";
} elsif ( $oldEqual eq 1 ) {
print_screen "\t\t---> WORK: Logfiles has all equal Sizes and Checksums, can be deleted ...\n", "$Color_green";
} else {
print_screen "\t\t---> TODO: Logfiles has same other errors - Please check History of Logfiles\n",'red';
}
### check delete infos
$logfile =~ m|/log-(.*)$|;
my $DelLogfile = "log-$1";
my @DeleteFiles = <$mars_dir/todo-global/delete-*>;
foreach my $DeleteFiles (@DeleteFiles) {
if (( !$DeleteFiles ) || ( !(readlink $DeleteFiles) )) {
$DeleteFiles = "n/a";
} else {
$DeleteFiles = basename (readlink $DeleteFiles);
}
if ( $DeleteFiles eq $DelLogfile ) {
print_screen "\t\t---> HINT: LogFile ist marked for delete ($DeleteFiles).\n", "$Color_green";
}
}
$LogCount=0;
} # end logstatus
$LogCountNow++;
} # end foreach
}
#########################################################################################
### diskfull
sub check_disk_is_full {
my @diskfull = glob("$mars_dir/rest-space-*");
my $diskfull_mars = "";
print_screen "-> Diskspace on Cluster:", 'bold';
if ( @diskfull ) {
foreach ( @diskfull ) {
my $diskfull_space = check_link "$_";
my $diskfull_system = $_;
$diskfull_system =~ s!/mars/rest-space-!!;
if ( $diskfull_space < 1 ) {
$diskfull_space = sprintf ("%.2f", $diskfull_space / 1024 );
if ( $diskfull_system eq $himself ) {
print_screen "\n\t-> TODO: Local Partition $mars_dir full ($diskfull_space kb Limit) !!! mars is stopping !!!\n\n", "red";
$diskfull_mars = "$diskfull_mars,$diskfull_system";
monitoring 'Failed', 'System: Mars-Disk full, MARS stopping';
} else {
print_screen "\n\t-> TODO: Remotesystem $diskfull_system have mars-disk full ($diskfull_space kb Limit) !!!\n\n", "red";
$diskfull_mars = "$diskfull_mars,$diskfull_system";
monitoring 'Failed', 'System: Remote-Mars-Disk full';
}
}
}
}
### TODO: /0
if ( !$diskfull_mars ) {
print_screen " ok\n", "$Color_green";
}
}
#########################################################################################
### check debug-files
sub check_debugfile {
### TODO: Fix Level / diaplying
my $debug_dir = shift;
my $debug_level = shift;
my $debug_file = "$mars_dir/$debug_dir/$debug_level.status";
if ( open (MARS_DEBUG, "< $debug_file") ) {
my $mars_debug = "";
while ( <MARS_DEBUG> ) {
if ( m/^(\d+\.\d+)/ ) {
s/^(\d+\.\d+)/strftime("%a %b %e %H:%M:%S %Y:", localtime($1))/e;
} else {
$_ = localtime(0) . ': ' . $_;
}
s/MARS_DEBUG\s+//;
$mars_debug = "$mars_debug\t$_";
}
close MARS_DEBUG;
return "$mars_debug" if ( $mars_debug ne "");
}
}
#########################################################################################
### info version
sub info_version {
### module
my %mars_info;
open ( my $lsmod_handle,'-|','lsmod | grep mars' ) || die "blub ... $!";
if (!<$lsmod_handle>) {
print_screen "Module Mars not running\n",'red';
sleep(10);
next;
}
open ( my $modinfo_handle, '-|', 'modinfo mars' ) || die "cannot run modinfo mars: $!";
while ( my $line = <$modinfo_handle> ) {
chomp $line;
my ( $key, $value) = split /: +/, $line;
if ( $value) {
$mars_info{$key} = $value;
}
}
if ( $mars_info{author} eq "") {
print_screen "Module Mars not running\n",'red';
next;
}
### status
print_screen "MARS Status - $himself, $version", "$Color_blue";
if ( $params->{'resource'} ) { print_screen ", Ressource: $params->{'resource'}", "$Color_blue"; }
print_screen "\n";
### marsadm
my $MAVersion = qx"marsadm version";
print_screen "MARS Admin - $MAVersion", "$Color_blue";
### module
print_screen "MARS Module - $mars_info{version}\n", "$Color_blue";
### kernel
my $KVersion = '/proc/version';
open my $Kfh, '<', "$KVersion" or die $!;
$KVersion = ( grep { /^Linux/ } <$Kfh> )[0];
$KVersion = ( split / /, $KVersion )[2];
print_screen "MARS Kernel - $KVersion\n", "$Color_blue";
print_screen "-------------------------------------------------------------------------------\n";
}
#########################################################################################
### avg_limit
sub check_jammed {
my $jammed = check_link "$mars_dir/jammed-$himself";
print_screen "-> Mars-Transaktion ", 'bold';
if (( !$jammed ) || ( $jammed ne 0 )) {
print_screen "running normaly\n", "$Color_green";
} else {
print_screen "and Replication not runnunig !!!\n", 'red';
monitoring 'Failed', 'System: Replikation not running';
}
}
#########################################################################################
### limit's auslesen ...
sub check_limit {
my $LimitText = shift; # sol-text
my $LimitSolVar = shift; # sol-filename
my $LimitSolEin = shift; # sol-einheit
my $LimitIstVar = shift; # ist-filename
my $LimitIstEin = shift; # ist-einheit
### for better ...
$LimitSolVar = "" if (!$LimitSolVar);
$LimitIstVar = "" if (!$LimitIstVar);
$LimitSolEin = "" if (!$LimitSolEin);
$LimitIstEin = "" if (!$LimitIstEin);
### soll
my $mars_limit_sol;
if ( open (MARS_LIMIT, "< /proc/sys/mars/$LimitSolVar") ) {
while (<MARS_LIMIT>) {
$mars_limit_sol .= $_;
$mars_limit_sol =~ s/[\n\t]//g;
}
close MARS_LIMIT;
}
### ist
my $mars_limit_ist;
if ( open (MARS_LIMIT, "< /proc/sys/mars/$LimitIstVar") ) {
while (<MARS_LIMIT>) {
$mars_limit_ist .= $_;
$mars_limit_ist =~ s/[\n\t]//g;
}
close MARS_LIMIT;
}
### presently results
print_screen "-> $LimitText: ", 'bold';
if ( ($LimitSolVar) && !($LimitIstVar) ) {
### only sol & lamport_clock
if ( $LimitSolVar eq "lamport_clock" ) {
my $C_Time = $mars_limit_sol;
$C_Time =~ s/CURRENT_TIME=//;
$C_Time =~ s/lamport_now=.*//;
my $L_Time = $mars_limit_sol;
$L_Time =~ s/.*lamport_now=//;
$mars_limit_sol = sprintf("%.2f", $C_Time - $L_Time);
print_screen "$mars_limit_sol $LimitSolEin\n";
### only sol
} elsif ( $mars_limit_sol < 1 ) {
print_screen "is now unsed\n";
} else {
print_screen "is set to ";
print_screen "$mars_limit_sol $LimitSolEin\n", 'red';
}
} elsif ( !($LimitSolVar) && ($LimitIstVar) ) {
### only ist
if ( $mars_limit_ist < 1 ) {
print_screen "is actualy null\n";
} else {
print_screen "is actualy ";
print_screen "$mars_limit_ist $LimitIstEin\n", 'red';
}
} elsif ( ($LimitSolVar) && ($LimitIstVar) && ($mars_limit_sol < 1) ) {
### sol & ist = 0
print_screen "is actualy unused\n";
} else {
### sol & ist / rest ...
print_screen "is set to ";
print_screen "$mars_limit_sol $LimitSolEin", 'red';
print_screen ", actualy used ";
print_screen "$mars_limit_ist $LimitIstEin\n", 'red';
}
}
##############################################################################
### main loop ...
while(1) {
my $dateFormat = Date::Language->new('English');
### version only
if ( $params->{version} || $params->{v}) {
info_version;
exit 0;
}
##########################################################################
### main run
print $clearscreen;
print "\nNOTE !!!\n********\nThe author does not guarantee this development-test-alpha-pre-beta-version, it is untested and certainly not fully functional. Use at your own risk ;)\n\n";
##########################################################################
### check and set monitor
### big monitor
if ( $params->{'monitor'} || $params->{'cstate'} || $params->{'dstate'} || $params->{'role'} ) {
$params->{'system'} = 1;
$params->{'history'} = 1;
### TODO: check!
$params->{'debug'} = 0;
$params->{'monitor'} = 1;
}
### small-monitor
if (( $params->{'cstate'} || $params->{'dstate'} || $params->{'role'} ) && ( !$params->{'resource'})) {
display_help "Syntax-Error: Option resource is missing by --cstate / --dstate / --role!";
}
##########################################################################
### read mars infos
info_version;
##########################################################################
### check system limits
if ( $params->{'system'} ) {
### text / sol-file / sol-einheit / ist-file / ist-einheit
my $mars_disk_space = `df '$mars_dir' | grep '$mars_dir'| awk '{print \$2}'`;
$mars_disk_space = sprintf("%01.2f", $mars_disk_space / 1024);
check_limit "AVG-Limit", "loadavg_limit", "loadavg";
check_limit "Memory-Limit", "mem_limit_percent", "%", "mem_used_raw_kb", "kb";
check_limit "Network-IO-Timeout", "network_io_timeout", "sec";
check_limit "Traffic Limit", "tuning/traffic_limit_kb", "kb/s", "tuning/traffic_rate_kb", "kb/s";
check_limit "Server-IO Limit", "tuning/server_io_limit_kb", "kb/s", "tuning/server_io_rate_kb", "kb/s";
check_limit "Delay say Overflow", "", "", "delay_say_on_overflow", "(on/off)";
check_limit "Statusfile Rollover", "", "", "statusfiles_rollover_sec", "sec";
check_limit "Flying IO Count", "", "", "io_flying_count";
check_limit "LoggerMemory", "", "", "logger_mem_used_kb", "kb";
check_limit "FreeSpaceLimit on /mars", "free_space_mb", "mb", "", "$mars_disk_space";
check_limit "FreeSpaceLimit LogDelete", "logdel_auto_gb", "gb";
check_limit "FreeSpaceLimit LogRotate", "logrot_auto_gb", "gb";
check_limit "LamportClockDifferenz", "lamport_clock", "sec";
### check system params
check_jammed;
check_disk_is_full;
}
##########################################################################
### check resources
check_ressource;
##########################################################################
### check global debug
if ($params->{'debug'}) {
print_screen "-> Main-Debug:\n", 'red';
my $debug_res;
$debug_res = check_debugfile("", "2.warn"); print_screen "$debug_res" if ( $debug_res );
$debug_res = check_debugfile("", "3.error"); print_screen "$debug_res" if ( $debug_res );
$debug_res = check_debugfile("", "4.fatal"); print_screen "$debug_res" if ( $debug_res );
}
##########################################################################
### end, exit for monitor
if ( $params->{'monitor'} ) {
if (( $StatusCode eq 'InvaliDate' || $StatusCode eq 'Failed' || $StatusCode eq 'OutDate' || $StatusCode eq 'SwitchOff' )) {
exit 1;
} else {
exit 0;
}
}
##########################################################################
### end, next loop
print color 'reset';
exit if (not $params->{'interval'});
sleep($params->{'interval'});
}
exit;