#!/usr/bin/perl -w # (c) 2012 Joerg Mann / 1&1 Internet AG # # $Id: 219590ee4375f5c41f13f0b277146c9bf0ff94bf $ # last update at now ... # TODO: # check todo-global delete-logfiles # check nachtaegliches join (log-v-4 ...), resize, delay, statusvalues # check monitoring -> redesign statuscodes # 20121201 - redesign debug-files and -messages # 20121204 - add display todo counter # 20121205 - add/update system entrys # 20121206 - upgrade LogDelay/LogSpeed # 20121210 - optimize same code # 20121217 - resign versionlink, remove delay # 20121219 - small fixes and layout ### use warnings; use strict; use English; use Getopt::Long; use Term::ANSIColor; use Date::Language; use POSIX qw(strftime); use File::Basename; ### defaults my $version = "0.070-27"; my $alife_timeout = "30"; # sec for remote-nodes timeout my $is_tty = 0; my $mars_dir = '/mars'; my $himself = `uname -n` or die "cannot determine my network node name\n"; my $clearscreen = `clear`; my $StatusCode = 'UpToDate'; my @StatusText = (); my $NodeStatusCode = 'UpToDate'; my @NodeStatusText = (); chomp $himself; my $Color_blue = 'yellow'; my $Color_green = 'green'; ### ARGV # Optionen in Hash-Ref parsen my $params = {}; GetOptions( $params, 'help', 'h', 'version', 'v', 'monitor', 'role', 'cstate', 'dstate', 'resource=s', 'system', 'interval=i', 'history', 'debug' ); ### small help sub display_help { my $HelpText = shift; print "$HelpText\n\n" if ($HelpText); print "Usage: mars-status [--help]\n"; print "Usage: mars-status [--version]\n"; print "Usage: mars-status (without specification of parameters, an abstract of all the informations spent)\n"; print "Usage: mars-status [--resource ] [--interval ] | [--history] | [--debug ] | [--system]\n"; print "Usage: mars-status [--resource ] --monitor \n"; print "Usage: mars-status --resource [--role | --cstate | --dstate]\n"; print " --resource : limits the display to the specified resource\n"; print " --interval : refreshes the display every second xxx\n"; print " --history : shows information about the log files, version numbers and their status\n"; print " --system : display mars-system informations\n"; print " --monitor : indicator to use for monitoring on all state (by local node only !)\n"; print " --role|--cstate|--dstate single state on lokal node\n"; print " --debug : additional display debug messages\n\n"; print "Usage small include rotate : mars-status --interval 2\n"; print "Usage monitoring : mars-status --monitor\n"; print "Usage monitoring drbd-linke : mars-status --cstate (or --dstate or --role)\n"; print "Usage full, include debug : mars-status --system --history --debug\n\n"; print "Advanced information are also available here: http://http://wiki.intranet.1and1.com/ ->ProjektTEC1603 ->TECITO.1735 -> MARS\n"; exit; } if($params->{help} || $params->{h} ) { display_help; } # Farbe zuruecksetzen $SIG{INT} = sub { print color 'reset'; print $clearscreen; exit; }; ######################################################################################### ### figure out TTY my $tty = readlink '/dev/stdout'; while ( my $temp = readlink $tty ) { $tty = $temp; } if ( $tty =~ /^\/dev\/pts\// ) { $is_tty = 1; } elsif ( $tty =~ /^\/dev\/tty/ ) { $is_tty = 1; } ######################################################################################### ### print color sub print_screen { my $Text = shift; my $Color = shift; my $Level = shift; ### default if ( !$params->{'monitor'} && !$Level ){ $Color = 'FAINT' if (!$Color); print color "$Color" if ( $is_tty ); print "$Text"; print color 'reset' if ( $is_tty ); ### monitor } elsif ( $params->{'monitor'} && $Level ) { if ( $params->{'role'} && $Level eq 'Rmonitor' ) { print "$Text\n"; exit; } elsif ( $params->{'dstate'} && $Level eq 'Dmonitor' ) { print "$Text\n"; exit; } elsif ( $params->{'cstate'} && $Level eq 'Cmonitor' ) { print "$Text\n"; exit; } elsif ( !$params->{'role'} && !$params->{'dstate'} && !$params->{'cstate'}) { print "$Text\n"; } } } ######################################################################################### ### read link sub check_link { my $dir = shift; my $result = readlink $dir; if ( !$result ) { return 0; } else { return $result; } } ######################################################################################### ### read links sub convert_link { my $link = shift; $link = check_link "$link"; if (( !$link ) || ( $link eq 0 )) { print_screen "off", 'red'; } else { print_screen "on", "$Color_green"; } return $link; } ######################################################################################### ### StatusCode sub monitoring { my $Code = shift; my $Text = shift; # UpToDate - eishokey # UpDateIng - worker # OutDate - replaying # InvaliDate - syncing # SwitchOff - SwitchOff # Failed - system, network, uae. # unknown - not joined $Code = "UpToDate" if ( $Code eq '' ); # global if ( $StatusCode ne 'UpToDate') { $StatusCode = $Code; } # local-node if ( $Code ne 'UpToDate' ) { $NodeStatusCode = $Code; } push @StatusText, $Text; push @NodeStatusText, $Text; } ######################################################################################### ### sub display resource-partner sub display_partner { my %p = @_; my $PRes = $p{ressource}; my $PName = $p{nodename}; my $PSize = $p{ressource_size}; my $ref_ResPartner = $p{res_partner}; my $ref_AULogfile = $p{res_AULogfile}; my $PStatus = check_link "$mars_dir/$PRes/primary"; my $PDevice = check_link "$mars_dir/$PRes/device-$PName"; my $Ljoined = check_link "$mars_dir/$PRes/device-$himself"; ########################################################################## ### check status if ( $PStatus eq $PName ) { print_screen "Primary", "$Color_blue"; print_screen "Primary [$PRes on $PName]",'', 'Rmonitor'; monitoring '', "joined"; } else { if ( $PDevice eq 0 ) { print_screen "not joined",'red'; print_screen "not joined\n",'', 'Rmonitor'; print_screen " -> Resource is not joined to this node\n", 'red'; monitoring "unknown", "not joined"; return; } else { print_screen "Secondary", "$Color_blue"; print_screen "Secondary [$PRes on $PName]",'', 'Rmonitor'; monitoring "", "joined"; } } ########################################################################## ### check alive my @PAlive = lstat("$mars_dir/alive-$PName"); if ( !$PAlive[9] ) { $PAlive[9] = 0 }; my $PAlive = time()- $PAlive[9] - $alife_timeout; print_screen ", System", ''; if ( $PAlive > 1 ) { print_screen " unknown (last message before $PAlive sec) !!!\n", 'red'; monitoring "Failed", "not alive" } else { print_screen " alive\n", "$Color_green"; monitoring "", "alive"; } ########################################################################## ### check device print_screen "\tDevices : Disk-Device ".check_link "$mars_dir/$PRes/data-$PName"; print_screen ", used as Mars-Device /dev/mars/$PDevice"; my $ASize = check_link "$mars_dir/$PRes/actsize-$PName"; if ( $PSize eq $ASize) { print_screen ", not resized"; } else { print_screen "\n\t\t---> HINT: resizing used ($PSize != $ASize)",'red'; } ########################################################################## ### check mountpint if ( $himself eq $PName ) { my $PUDevice = "/dev/mars/$PDevice"; if ( stat( $PUDevice) ) { open my $fh, '<', '/proc/mounts' or die $!; $PUDevice = ( grep { /^$PUDevice / } <$fh> )[0]; if ( $PUDevice ) { $PUDevice = ( split / /, $PUDevice )[1]; print_screen " and mountet as $PUDevice\n", "$Color_blue"; } else { print_screen "\n\t\t---> TODO: enable to mount\n", "$Color_green"; } } else { print_screen "\n\t\t---> HINT: unable to mount, Device is Secondary or mars is starting\n", "$Color_blue"; } } else { print_screen "\n"; } $$ref_ResPartner++; ########################################################################## ### check sync ### sync - status my $PSyncsize = check_link "$mars_dir/$PRes/syncstatus-$PName"; my $SStatus = sprintf("%.2f", ($PSyncsize / $PSize * 100)); print_screen (sprintf "\tSync : %s bytes (%.3fTB) synced = ", $PSyncsize, ( $PSyncsize/1024/1024/1024/1024)); ### sync - speed my $SSpeed = check_link "$mars_dir/$PRes/actual-$PName/sync_rate"; $SSpeed = sprintf ("%.2f", $SSpeed / 1024 / 1024); if ( $SSpeed eq "0.00" ) { $SSpeed = "%"; } else { $SSpeed = "%, by $SSpeed mb/s"; } ### sync - results if ( $SStatus < 100) { print_screen "$SStatus$SSpeed\n"; print_screen "\t\t---> WORK: Sync in progress = ($SStatus% < 100.00%)\n", 'red'; monitoring "InvaliDate", "not in sync ($SStatus%)"; } else { print_screen "$SStatus$SSpeed\n", "$Color_green"; monitoring "", "synced"; } ########################################################################## ### TODO: work by resize ... #print "\n$PSize\n$PSyncsize\n$PName"; ########################################################################## ### check logfile ### logfile - status my @PLogFile = split (',', check_link "$mars_dir/$PRes/replay-$PName" ); my @PLogLink = split ("-", $PLogFile[0]); ### TODO: kein Logfile vorhanden ... mmh. my $PLogName = "$PLogLink[0]-$PLogLink[1]"; my $PLogSize = -s "$mars_dir/$PRes/$PLogFile[0]"; if ( !$PLogFile[1] ) { $PLogFile[1] = 0; $PLogFile[2] = 0; } $PLogSize = 0.0001 if (( !$PLogSize ) || ( $PLogSize eq 0 )); my $LogSpeed = check_link "$mars_dir/$PRes/actual-$PName/file_rate"; $LogSpeed = sprintf ("%.2f", $LogSpeed / 1024 / 1024); # ### logfile - delaytime # my $LogDelay = "0.000000000"; # my $NewLogDelay = "0.000000000"; # my @LogDelayLink = lstat ("$mars_dir/$PRes/actual-$PName/timestamp"); # my @LogDelayTime = split (',', check_link "$mars_dir/$PRes/actual-$PName/timestamp"); # ### offset replay # if (( $LogDelayTime[4] ) && ( $LogDelayTime[4] ne '0.000000000' )) { # $LogDelay = $LogDelayLink[9] - $LogDelayTime[4]; # } # ### offset newer logfile # if ( ( $ref_AULogfile ) && !($PLogName eq $ref_AULogfile) ) { # my @NewLogDelay = `ls $mars_dir/$PRes/$ref_AULogfile*`; # $NewLogDelay[0] =~ s/\n//; # @NewLogDelay = stat ("$NewLogDelay[0]"); # #$NewLogDelay = $NewLogDelay[9]; # $LogDelay = $LogDelayLink[9] - $NewLogDelay[9]; # } # $LogDelay = strftime("%H:%M:%S", gmtime($LogDelay)); #print "*ld $LogDelay\n"; #print "#dl9 lstat - $LogDelayLink[9] - ".gmtime($LogDelayLink[9])."\n"; #print "#dl @LogDelayLink\n"; #print "#dt4 link - $LogDelayTime[4] - ".gmtime($LogDelayTime[4])."\n"; #print "#dt @LogDelayTime\n"; #print "#nl $NewLogDelay\n"; # ### log delay monitoring # my $LogDelayMonitor = $LogDelay; # my ($h,$m,$s) = split /:/, $LogDelayMonitor; # $LogDelayMonitor = (($h*3600) + ($m*60) + $s); # if ( $LogDelayMonitor eq 0 ) { # 0 # monitoring "UpToDate", "Delay $LogDelayMonitor sec"; # } elsif ( $LogDelayMonitor < 60 ) { # unter 1 min # monitoring "UpDateIng", "Delay $LogDelayMonitor sec"; # } else { # rest # monitoring "OutDate", "Delay $LogDelayMonitor sec"; # } ### logfile - results print_screen (sprintf "\tLogfile : %s bytes (%.3fGB) in ", $PLogSize, ( $PLogSize/1024/1024/1024 )); print_screen "$PLogName", "$Color_green"; print_screen " active"; print_screen ", received with $LogSpeed mb/s" if ( $LogSpeed ne "0.00" ); # print_screen ", Delay roughly $LogDelay - but not really sure ;)" if ( $LogDelay ne "00:00:00" ); print_screen "\n"; if ( $Ljoined eq "0" || $PLogSize eq "0.0001" ) { print_screen "\t\t---> WORK: Logfile empty = (Size: $PLogSize)\n", 'red'; } if ( ( $ref_AULogfile ) && !($PLogName eq $ref_AULogfile) ) { print_screen "\t\t---> HINT: Logfile Version not actual = ($PLogName ! $ref_AULogfile)\n", 'red'; } ########################################################################## ### check replay ... ### replay - status my $RStatus = sprintf("%.2f", ( $PLogFile[1] / $PLogSize * 100)); $RStatus = 0 if ( $Ljoined eq "0" || $PLogSize eq "1" ); $RStatus = 99.99 if (( $PLogFile[1] ne $PLogSize ) && ( $RStatus eq "100.00" )); print_screen sprintf ("\tReplayed: %s bytes (%.3fGB) now replayed, Todo %d (%.3fGB) = ", $PLogFile[1], ( $PLogFile[1]/1024/1024/1024 ), $PLogFile[2], ( $PLogFile[2]/1024/1024/1024 )); ### replay - speed my $RSpeed = check_link "$mars_dir/$PRes/actual-$PName/replay_rate"; $RSpeed = sprintf ("%.2f", $RSpeed / 1024 / 1024); if ( $RSpeed eq "0.00" ) { $RSpeed = "%"; } else { $RSpeed = "%, by $RSpeed mb/s"; } ### replay - results if (( $RStatus < 1 ) && ( $PLogSize != 0.0001 )) { print_screen "$RStatus$RSpeed\n"; print_screen "\t\t---> HINT: Replay not started, Logfile inactive = (Size: $PLogSize)\n", 'red'; monitoring "OutDate", "replay stopped"; } elsif (( $RStatus < 100 ) && ( $PLogSize != 0.0001 )) { print_screen "$RStatus$RSpeed\n"; print_screen "\t\t---> WORK: Replay in progress = ($RStatus% < 100.00%)\n", 'red'; monitoring "UpDateIng", "replay running1"; } elsif ( $PLogFile[2] > 0 ) { $RStatus = sprintf("%.2f", ($PLogFile[1]-$PLogFile[2])/$PLogFile[1] * 100); print_screen "$RStatus$RSpeed\n", 'red'; monitoring "UpDateIng", "replay running2"; } elsif ( $PLogSize = 0.0001 ) { $RStatus = "100.00"; print_screen "$RStatus$RSpeed\n", "$Color_green"; monitoring '', "replay wait"; } else { print_screen "$RStatus% $RSpeed\n", "$Color_green"; monitoring '', "replaying"; } ### replay - hints if ($PLogFile[2] != 0) { print_screen "\t\t---> HINT: Replay-Todo is actualy $PLogFile[2], ", "$Color_blue"; if ( $PLogFile[2] < 0 ) { print_screen "replaying backwards ??? Check this !!!\n", 'red'; } elsif ( $PLogFile[2] > 0 ) { print_screen "mars it's working ...\n"; } else { print_screen "replaying working unknown ... Check this !!!\n", 'red'; } } ########################################################################## ### check actual my $ActStatus = check_link "$mars_dir/$PRes/actual-$PName/is-primary"; if ( $ActStatus eq 1 ) { print_screen "\tActual : Status=Primary, used Device="; convert_link "$mars_dir/$PRes/actual-$PName/device-$PDevice"; print_screen "\n"; } else { print_screen "\tActual : Status=Secondary, Syncstatus="; convert_link "$mars_dir/$PRes/actual-$PName/copy-syncstatus-$PName"; print_screen ", Logfileupdate="; convert_link "$mars_dir/$PRes/actual-$PName/logfile-update"; print_screen "\n"; } ########################################################################## ### check switches my $SWStatus; print_screen "\tSwitches: Attach="; if ( readlink "$mars_dir/$PRes/todo-$PName/attach" eq 1 ) { print_screen "on", "$Color_green"; monitoring "", "attached"; } else { print_screen "off", 'red'; monitoring "SwitchOff", "attach off"; } print_screen " [masked:" if ( $ActStatus eq 1 ); print_screen " Connect="; if ( readlink "$mars_dir/$PRes/todo-$PName/connect" eq 1 ) { print_screen "on", "$Color_green"; monitoring "", "connected"; } else { print_screen "off", 'red'; monitoring "SwitchOff", "connect off"; } print_screen " Sync="; if ( readlink "$mars_dir/$PRes/todo-$PName/sync" eq 1 ) { print_screen "on", "$Color_green"; monitoring "", "synced"; } else { print_screen "off", 'red'; monitoring "SwitchOff", "sync off"; } print_screen " AllowReplay=" ; if ( readlink "$mars_dir/$PRes/todo-$PName/allow-replay" eq 1 ) { print_screen "on", "$Color_green"; monitoring "", "replayed"; } else { print_screen "off", 'red'; monitoring "SwitchOff", "replay off"; } print_screen "]" if ( $ActStatus eq 1 ); print_screen "\n"; ########################################################################## ### node status my $NodeStatusText = ''; foreach (@NodeStatusText) { $NodeStatusText = "$NodeStatusText($_)"; } ### normal-modus # print_screen "\tStatus : $NodeStatusCode = $NodeStatusText\n", ''; ### monitor-modus print_screen "$NodeStatusCode [$NodeStatusText]", '', 'Dmonitor'; if ( $NodeStatusCode eq 'SwitchOff' ) { print_screen "Disconnect [$PRes on $PName]", '', 'Cmonitor'; } else { print_screen "Connect [$PRes on $PName]", '', 'Cmonitor'; } ### reset values $NodeStatusCode = 'UpToDate'; @NodeStatusText = (); return $PLogName; } ######################################################################################### ### check ressources sub check_ressource { opendir my $dirhandle, $mars_dir or die "Cannot open $mars_dir: $!"; my @resources = grep { /^res/ && -d "$mars_dir/$_" } readdir $dirhandle; if ( !@resources ) { print_screen "---> HINT: no resources found\n", 'red'; next; } ### read resources foreach my $res (@resources) { my $ResPartner = 0; my $res_name = $res; $res_name =~ s/^resource-//; if ( $params->{'resource'} ) { if (!( $params->{'resource'} eq $res_name)) { next; } } my $res_size = check_link "$mars_dir/$res/size"; if ( $res_size eq 0 ) { $res_size = 1 }; my $res_tbsize = ( $res_size) / 1024 / 1024 /1024 / 1024; my $res_master = check_link "$mars_dir/$res/primary"; if ( $res_master eq 0 ) { $res_master = "unknown" }; #print_screen sprintf("-> check resource %s, with %d bytes (%.3fTB), Primary Node is %s\n", $res_name, $res_size, $res_tbsize, $res_master), 'bold'; print_screen sprintf("-> check resource %s, with %.3fTB, Primary Node is %s\n", $res_name, $res_tbsize, $res_master), 'bold'; ### him self print_screen " -> local Node ($himself) as ",'bold'; my $ActualUsedLogfile = display_partner( ressource => $res, nodename => $himself, ressource_size => $res_size, res_partner => \$ResPartner, res_AULogfile => "", ); # end him self ### joined (und nicht monitor)... if (( $ResPartner eq 1 ) && ( !$params->{'monitor'} )) { ### partners opendir my $server_dh, "$mars_dir/$res" or die "Cannot open $mars_dir/$res: $!"; my @servers = grep { /^data/ && readlink "$mars_dir/$res/$_" } readdir $server_dh; @servers = sort (@servers); foreach my $partner (@servers) { $partner =~ s/^data-//; if ( $partner eq $himself ) { next; } print_screen " -> remote Node ($partner) as ", 'bold'; display_partner( ressource => $res, nodename => $partner, ressource_size => $res_size, res_partner => \$ResPartner, res_AULogfile => $ActualUsedLogfile, ); } } # end joined ### modus if ( $ResPartner eq 0 ) { print_screen " -> modus for $res_name is remote ($ResPartner nodes)\n",'bold'; } elsif ( $ResPartner eq 1 ) { print_screen " -> modus for $res_name is standalone ($ResPartner node)\n",'bold'; } else { print_screen " -> modus for $res_name is clustered ($ResPartner nodes)\n ",'bold'; } ### resources history if ( $params->{'history'} ) { check_logfile( $res, $ResPartner ); } ### check resources debug if ($params->{'debug'}) { print_screen " -> $res-Debug:\n", "$Color_blue"; my $debug_res; ### TODO: small hack, read 3 files ... $debug_res = check_debugfile("$res", "2.warn"); print_screen "$debug_res" if ( $debug_res ); $debug_res = check_debugfile("$res", "3.error"); print_screen "$debug_res" if ( $debug_res ); $debug_res = check_debugfile("$res", "4.fatal"); print_screen "$debug_res" if ( $debug_res ); } } # end foreach } ######################################################################################### ### sub check_logfile { my $LResource = shift; my $LPartner = shift; my $oldEqual = 0; my $LogFailed = 0; my $LogCount = 0; my $LogCountSum = 0; my $LogCountNow = 1; my @logfile = <$mars_dir/$LResource/log*>; ### mal fix zaehlen ... foreach (@logfile) { $LogCountSum++; } print_screen " -> History Replay/Status\n", "$Color_blue"; ### search all logfiles foreach my $logfile (@logfile) { my $LVersion = $logfile; $LVersion =~ s/^.*log-([0-9]+)-.*$/$1/; my $LogStatus = check_link "$logfile"; my $allEqual = 1; ### logfiles gleich my $OldCheck; ### checksum from versionfile my $OldSize; ### size from versionfile if ( $LogStatus eq 0 ) { ### found logfile my $LogSize = -s "$logfile"; if ( !$LogSize ) { $LogSize=0; } ### logfile stat-values my @LogStat = stat ( $logfile ); $LogStat[10] = gmtime($LogStat[10]); ### quickfix ... if ($LogStat[9] > $LogStat[8]) { $LogStat[9] = $LogStat[9] - $LogStat[8]; } else { $LogStat[9] = $LogStat[8] - $LogStat[9]; } print_screen (sprintf "\tLogfile Version: $LVersion Size: $LogSize bytes (%.3fGB) from %s, include hypothetically %s sec\n", ($LogSize /1024/1024/1024), $LogStat[10], $LogStat[9]); ### search all logfile version my @LVersion = <$mars_dir/$LResource/version-$LVersion*>; foreach my $LVersion (@LVersion) { ### search version my @LogDetail = split (',', check_link "$LVersion" ); my $LogServer = $LVersion; ### search size of logfile my $ActLogSize = $LogSize - $LogDetail[2]; ### search name of source $LogServer =~ s/.*[0-9]-//; $LogCount++; ### output print_screen (sprintf "\t\tSource: $LogServer \tCheck: $LogDetail[0] \tTodo: %.3fGB \tReplayPosition: $LogDetail[2]\n", $ActLogSize/1024/1024/1024); my @dummy = split (',', check_link "$mars_dir/$LResource/replay-$LogServer" ); @dummy = split ('-', $dummy[0]); $dummy[0] = '$dummy[0]-$dummy[1]'; # print "* $mars_dir/$LResource/replay-$LogServer\n"; # print "* $dummy[0]\n"; ### new versionfile if ( !defined $OldCheck ) { $OldCheck = $LogDetail[0]; $OldSize = $LogDetail[2]; $LogFailed = 0; $allEqual = 1; ### checksum different } elsif ( ($LogDetail[0] ne $OldCheck) and ( $LogDetail[2] eq $OldSize ) and ( $LogDetail[2] ne 0) ) { print_screen "\t\t---> TODO: Logfiles has not equal Checksums and same size, check for SPLIT-BRAIN\n",'red'; $LogFailed = 1; $allEqual = 0; ### value different } elsif ( ($LogDetail[0] ne $OldCheck ) or ( $LogDetail[2] ne $OldSize ) ) { $LogFailed = 1; $allEqual = 0; ### eishokey } else { $LogFailed = 0; $allEqual = 1; } } # end foreach if ( $allEqual eq 1 ) { $oldEqual = 1; } else { $oldEqual = 0; } # ### check relay active # my @ReplayLogfile = `ls -l $mars_dir/$LResource/replay-* | grep $LVersion`; # print "*** @ReplayLogfile\n"; #lrwxrwxrwx 1 root root 51 Dec 19 14:36 /mars/resource-Device-BS6/replay-istore-test-bap6 -> log-000007974-istore-test-bs6,1486362576,1806027520 #lrwxrwxrwx 1 root root 33 Dec 19 14:29 /mars/resource-Device-BS6/replay-istore-test-bs6 -> log-000007978-istore-test-bs6,0,0 #lrwxrwxrwx 1 root root 129 Dec 19 14:15 /mars/resource-Device-BS6/version-000007968-istore-test-bap6 -> fd6610adc6a0df858aac6d9eb81b571d,istore-test-bs6,3292390012,7968;3ce4b3f0610405fe65eec10eccbf5c54,istore-test-bs6,3292390600,7967 #lrwxrwxrwx 1 root root 129 Dec 19 14:14 /mars/resource-Device-BS6/version-000007968-istore-test-bs6 -> fd6610adc6a0df858aac6d9eb81b571d,istore-test-bs6,3292390012,7968;3ce4b3f0610405fe65eec10eccbf5c54,istore-test-bs6,3292390600,7967 ### check Count Logfiles if ( !($LogCount eq $LPartner) ) { print_screen "\t\t---> TODO: Count of Logfiles different = (have:$LPartner found:$LogCount)\n", 'red'; $LogFailed = 1; $oldEqual = 0; } elsif ( `ls -l $mars_dir/$LResource/replay-* | grep $LVersion` ) { print_screen "\t\t---> WORK: Logfiles are actual and used, Replay in progess...\n", 'red'; } elsif ( $LogFailed eq 1 ) { print_screen "\t\t---> WORK: Logfiles has not equal Checksums and different size, Reception in progress ...\n", 'red'; } elsif ( $LogCountSum eq $LogCountNow ) { print_screen "\t\t---> WORK: Logfiles are actual and unused(1).\n", "$Color_green"; } elsif (( $oldEqual eq 1 ) && ( $OldSize eq 0 )) { print_screen "\t\t---> WORK: Logfiles are actual and unused(2).\n", "$Color_green"; } elsif ( $oldEqual eq 1 ) { print_screen "\t\t---> WORK: Logfiles has all equal Sizes and Checksums, can be deleted ...\n", "$Color_green"; } else { print_screen "\t\t---> TODO: Logfiles has same other errors - Please check History of Logfiles\n",'red'; } ### check delete infos $logfile =~ m|/log-(.*)$|; my $DelLogfile = "log-$1"; my @DeleteFiles = <$mars_dir/todo-global/delete-*>; foreach my $DeleteFiles (@DeleteFiles) { if (( !$DeleteFiles ) || ( !(readlink $DeleteFiles) )) { $DeleteFiles = "n/a"; } else { $DeleteFiles = basename (readlink $DeleteFiles); } if ( $DeleteFiles eq $DelLogfile ) { print_screen "\t\t---> HINT: LogFile ist marked for delete ($DeleteFiles).\n", "$Color_green"; } } $LogCount=0; } # end logstatus $LogCountNow++; } # end foreach } ######################################################################################### ### diskfull sub check_disk_is_full { my @diskfull = glob("$mars_dir/rest-space-*"); my $diskfull_mars = ""; print_screen "-> Diskspace on Cluster:", 'bold'; if ( @diskfull ) { foreach ( @diskfull ) { my $diskfull_space = check_link "$_"; my $diskfull_system = $_; $diskfull_system =~ s!/mars/rest-space-!!; if ( $diskfull_space < 1 ) { $diskfull_space = sprintf ("%.2f", $diskfull_space / 1024 ); if ( $diskfull_system eq $himself ) { print_screen "\n\t-> TODO: Local Partition $mars_dir full ($diskfull_space kb Limit) !!! mars is stopping !!!\n\n", "red"; $diskfull_mars = "$diskfull_mars,$diskfull_system"; monitoring 'Failed', 'System: Mars-Disk full, MARS stopping'; } else { print_screen "\n\t-> TODO: Remotesystem $diskfull_system have mars-disk full ($diskfull_space kb Limit) !!!\n\n", "red"; $diskfull_mars = "$diskfull_mars,$diskfull_system"; monitoring 'Failed', 'System: Remote-Mars-Disk full'; } } } } ### TODO: /0 if ( !$diskfull_mars ) { print_screen " ok\n", "$Color_green"; } } ######################################################################################### ### check debug-files sub check_debugfile { ### TODO: Fix Level ### 0.debug,1.info,2.warn,3.error,4.fatal,5.total my $debug_dir = shift; my $debug_level = shift; my $debug_file = "$mars_dir/$debug_dir/$debug_level.status"; if ( open (MARS_DEBUG, "< $debug_file") ) { my $mars_debug = ""; while ( ) { if ( m/^(\d+\.\d+)/ ) { s/^(\d+\.\d+)/strftime("%a %b %e %H:%M:%S %Y:", localtime($1))/e; } else { $_ = localtime(0) . ': ' . $_; } s/MARS_DEBUG\s+//; $mars_debug = "$mars_debug\t$_"; } close MARS_DEBUG; return "$mars_debug" if ( $mars_debug ne ""); } } ######################################################################################### ### info version sub info_version { ### module my %mars_info; open ( my $lsmod_handle,'-|','lsmod | grep mars' ) || die "blub ... $!"; if (!<$lsmod_handle>) { print_screen "Module Mars not running\n",'red'; sleep(10); next; } open ( my $modinfo_handle, '-|', 'modinfo mars' ) || die "cannot run modinfo mars: $!"; while ( my $line = <$modinfo_handle> ) { chomp $line; my ( $key, $value) = split /: +/, $line; if ( $value) { $mars_info{$key} = $value; } } if ( $mars_info{author} eq "") { print_screen "Module Mars not running\n",'red'; next; } ### status print_screen "MARS Status - $himself, $version", "$Color_blue"; if ( $params->{'resource'} ) { print_screen ", Ressource: $params->{'resource'}", "$Color_blue"; } print_screen "\n"; ### marsadm my $MAVersion = qx"marsadm version"; print_screen "MARS Admin - $MAVersion", "$Color_blue"; ### module print_screen "MARS Module - $mars_info{version}\n", "$Color_blue"; ### kernel my $KVersion = '/proc/version'; open my $Kfh, '<', "$KVersion" or die $!; $KVersion = ( grep { /^Linux/ } <$Kfh> )[0]; $KVersion = ( split / /, $KVersion )[2]; print_screen "MARS Kernel - $KVersion\n", "$Color_blue"; print_screen "-------------------------------------------------------------------------------\n"; } ######################################################################################### ### avg_limit sub check_jammed { my $jammed = check_link "$mars_dir/jammed-$himself"; print_screen "-> Mars-Transaktion ", 'bold'; if (( !$jammed ) || ( $jammed ne 0 )) { print_screen "running normaly\n", "$Color_green"; } else { print_screen "and Replication not runnunig !!!\n", 'red'; monitoring 'Failed', 'System: Replikation not running'; } } ######################################################################################### ### limit's auslesen ... sub check_limit { my $LimitText = shift; # sol-text my $LimitSolVar = shift; # sol-filename my $LimitSolEin = shift; # sol-einheit my $LimitIstVar = shift; # ist-filename my $LimitIstEin = shift; # ist-einheit ### for better ... $LimitSolVar = "" if (!$LimitSolVar); $LimitIstVar = "" if (!$LimitIstVar); $LimitSolEin = "" if (!$LimitSolEin); $LimitIstEin = "" if (!$LimitIstEin); ### soll my $mars_limit_sol; if ( open (MARS_LIMIT, "< /proc/sys/mars/$LimitSolVar") ) { while () { $mars_limit_sol .= $_; $mars_limit_sol =~ s/[\n\t]//g; } close MARS_LIMIT; } ### ist my $mars_limit_ist; if ( open (MARS_LIMIT, "< /proc/sys/mars/$LimitIstVar") ) { while () { $mars_limit_ist .= $_; $mars_limit_ist =~ s/[\n\t]//g; } close MARS_LIMIT; } ### presently results print_screen "-> $LimitText: ", 'bold'; if ( ($LimitSolVar) && !($LimitIstVar) ) { ### only sol & lamport_clock if ( $LimitSolVar eq "lamport_clock" ) { my $C_Time = $mars_limit_sol; $C_Time =~ s/CURRENT_TIME=//; $C_Time =~ s/lamport_now=.*//; my $L_Time = $mars_limit_sol; $L_Time =~ s/.*lamport_now=//; $mars_limit_sol = sprintf("%.2f", $C_Time - $L_Time); print_screen "$mars_limit_sol $LimitSolEin\n"; ### only sol } elsif ( $mars_limit_sol < 1 ) { print_screen "is now unsed\n"; } else { print_screen "is set to "; print_screen "$mars_limit_sol $LimitSolEin\n", 'red'; } } elsif ( !($LimitSolVar) && ($LimitIstVar) ) { ### only ist if ( $mars_limit_ist < 1 ) { print_screen "is actualy null\n"; } else { print_screen "is actualy "; print_screen "$mars_limit_ist $LimitIstEin\n", 'red'; } } elsif ( ($LimitSolVar) && ($LimitIstVar) && ($mars_limit_sol < 1) ) { ### sol & ist = 0 print_screen "is actualy unused\n"; } else { ### sol & ist / rest ... print_screen "is set to "; print_screen "$mars_limit_sol $LimitSolEin", 'red'; print_screen ", actualy used "; print_screen "$mars_limit_ist $LimitIstEin\n", 'red'; } } ############################################################################## ### main loop ... while(1) { my $dateFormat = Date::Language->new('English'); ### version only if ( $params->{version} || $params->{v}) { info_version; exit 0; } ########################################################################## ### main run print $clearscreen; ### check ! # print "\nNOTE !!!\n********\nThe author does not guarantee this development-test-alpha-pre-beta-version, it is untested and certainly not fully functional. Use at your own risk ;)\n\n"; ########################################################################## ### check and set monitor ### big monitor if ( $params->{'monitor'} || $params->{'cstate'} || $params->{'dstate'} || $params->{'role'} ) { $params->{'system'} = 1; $params->{'history'} = 1; ### TODO: check! $params->{'debug'} = 0; $params->{'monitor'} = 1; } ### small-monitor if (( $params->{'cstate'} || $params->{'dstate'} || $params->{'role'} ) && ( !$params->{'resource'})) { display_help "Syntax-Error: Option resource is missing by --cstate / --dstate / --role!"; } ########################################################################## ### read mars infos info_version; ########################################################################## ### check system limits if ( $params->{'system'} ) { ### text / sol-file / sol-einheit / ist-file / ist-einheit my $mars_disk_space = `df '$mars_dir' | grep '$mars_dir'| awk '{print \$2}'`; $mars_disk_space = sprintf("%01.2f", $mars_disk_space / 1024); check_limit "AVG-Limit", "loadavg_limit", "loadavg"; check_limit "Memory-Limit", "mem_limit_percent", "%", "mem_used_raw_kb", "kb"; check_limit "Network-IO-Timeout", "network_io_timeout", "sec"; check_limit "Traffic Limit", "tuning/traffic_limit_kb", "kb/s", "tuning/traffic_rate_kb", "kb/s"; check_limit "Server-IO Limit", "tuning/server_io_limit_kb", "kb/s", "tuning/server_io_rate_kb", "kb/s"; check_limit "Delay say Overflow", "", "", "delay_say_on_overflow", "(on/off)"; check_limit "Statusfile Rollover", "", "", "statusfiles_rollover_sec", "sec"; check_limit "Flying IO Count", "", "", "io_flying_count"; check_limit "LoggerMemory", "", "", "logger_mem_used_kb", "kb"; check_limit "FreeSpaceLimit on /mars", "free_space_mb", "mb", "", "$mars_disk_space"; check_limit "FreeSpaceLimit LogDelete", "logdel_auto_gb", "gb"; check_limit "FreeSpaceLimit LogRotate", "logrot_auto_gb", "gb"; check_limit "LamportClockDifferenz", "lamport_clock", "sec"; ### check system params check_jammed; check_disk_is_full; } ########################################################################## ### check resources check_ressource; ########################################################################## ### check global debug if ($params->{'debug'}) { print_screen "-> Main-Debug:\n", 'red'; my $debug_res; $debug_res = check_debugfile("", "2.warn"); print_screen "$debug_res" if ( $debug_res ); $debug_res = check_debugfile("", "3.error"); print_screen "$debug_res" if ( $debug_res ); $debug_res = check_debugfile("", "4.fatal"); print_screen "$debug_res" if ( $debug_res ); } ########################################################################## ### end, exit for monitor if ( $params->{'monitor'} ) { if (( $StatusCode eq 'InvaliDate' || $StatusCode eq 'Failed' || $StatusCode eq 'OutDate' || $StatusCode eq 'SwitchOff' )) { exit 1; } else { exit 0; } } ########################################################################## ### end, next loop print color 'reset'; exit if (not $params->{'interval'}); sleep($params->{'interval'}); } exit;