same more fixes

This commit is contained in:
jmann 2012-06-05 16:34:37 +02:00 committed by Thomas Schoebel-Theuer
parent eef8878205
commit b0cdd13ce5
2 changed files with 532 additions and 523 deletions

View File

@ -1,7 +1,7 @@
#!/usr/bin/perl -w
# (c) 2012 Joerg Mann / 1&1 Internet AG
#
# $Id: 7925d898464c836d5dbf76d2b6d3fed2413f6034 $
# $Id: 89bfc1b46c3f5907b173cb59379663c9b9f06049 $
# last update at Tue Apr 10 17:20:32 CEST 2012 by joerg.mann@1und1.de
# TODO:
@ -20,7 +20,7 @@ use Date::Language;
use POSIX qw(strftime);
### defaults
my $version = "0.067u";
my $version = "0.067v";
my $alife_timeout = "99"; # sec
my $is_tty = 0;
my $mars_dir = '/mars';
@ -32,7 +32,7 @@ chomp $himself;
### ARGV
# Optionen in Hash-Ref parsen
my $params = {};
GetOptions( $params, 'help', 'h', 'version', 'v', 'resource=s', 'interval=i', 'long', 'history', 'debug' );
GetOptions( $params, 'help', 'h', 'version', 'v', 'monitor', 'm', 'resource=s', 'interval=i', 'long', 'history', 'debug' );
if($params->{help} || $params->{h}) {
print "Usage: mars-status [--help]\n";
@ -81,7 +81,7 @@ sub check_link {
}
### print color
sub print_warn {
sub print_screen {
my $text = shift;
my $color = shift;
print color "$color" if ( $is_tty );
@ -94,9 +94,9 @@ sub convert_link {
my $link = shift;
$link = check_link "$link";
if ( ( !$link ) || ( $link eq 0 ) ) {
print_warn "off", 'red';
print_screen "off", 'red';
} else {
print_warn "on", 'green';
print_screen "on", 'green';
}
return $link;
}
@ -119,31 +119,29 @@ sub display_partner {
### status
if ( $PStatus eq $PName ) {
print_warn "as Primary, ",'blue';
print_screen "as Primary, ",'blue';
} else {
if ( $PDevice eq 0 ) {
print_warn "not joined, ",'red';
print_screen "not joined, ",'red';
} else {
print_warn "as Secondary, ",'blue';
print_screen "as Secondary, ",'blue';
}
}
### alive
my @PAlive = lstat("$mars_dir/alive-$PName");
if ( !$PAlive[9] ) { $PAlive[9]=0 };
my $PAlive = time()- $PAlive[9] - $alife_timeout;
if ( $PAlive > 1 ) {
print_warn "Status: unknown (last message before $PAlive sec) !!!\n", 'red';
print_screen "Status: unknown (last message before $PAlive sec) !!!\n", 'red';
} else {
print_warn "Status: connected\n",'blue';
print_screen "Status: connected\n",'blue';
}
### device
# joined ?
if ( $PDevice eq 0 ) {
if ( $params->{'long'} ) { print_warn " -> Resource is not joined to this node\n", 'red'; }
if ( $params->{'long'} ) { print_screen " -> Resource is not joined to this node\n", 'red'; }
return;
}
if ( $params->{'long'} ) {
@ -159,12 +157,12 @@ sub display_partner {
$PUDevice = ( grep { /^$PUDevice / } <$fh> )[0];
if ( $PUDevice ) {
$PUDevice = ( split / /, $PUDevice )[1];
print_warn " and mountet as $PUDevice\n",'blue';
print_screen " and mountet as $PUDevice\n",'blue';
} else {
print_warn "\n\t\t---> TODO: enable to mount\n",'green';
print_screen "\n\t\t---> TODO: enable to mount\n",'green';
}
} else {
print_warn "\n\t\t---> HINT: unable to mount, Device is Secondary or mars is starting ...\n",'blue';
print_screen "\n\t\t---> HINT: unable to mount, Device is Secondary or mars is starting ...\n",'blue';
}
} else {
print "\n";
@ -184,51 +182,48 @@ sub display_partner {
if ( $params->{'long'} ) {
printf "\tLogfile : %s with %s bytes (%.3fGB) received\n", $PLogName, $PLogSize, ( $PLogSize/1024/1024/1024 );
if ( $Ljoined eq "0" || $PLogSize eq "0.0001" ) {
print_warn "\t\t---> TODO: Logfile inactive or empty (Size: $PLogSize)\n", 'red';
print_screen "\t\t---> TODO: Logfile inactive or empty (Size: $PLogSize)\n", 'red';
}
if ( ( $ref_AULogfile ) && !($PLogName eq $ref_AULogfile) ) {
print_warn "\t\t---> TODO: Logfile Version not actual = ($PLogName ! $ref_AULogfile)\n", 'red';
print_screen "\t\t---> TODO: Logfile Version not actual = ($PLogName ! $ref_AULogfile)\n", 'red';
}
}
### replay
my $RStatus = ( $PLogFile[1] / $PLogSize ) * 100;
my $RStatus = sprintf("%.2f", ( $PLogFile[1] / $PLogSize * 100));
if ( $Ljoined eq "0" || $PLogSize eq "1" ) { $RStatus = 0; }
$$ref_ResInReplay = $RStatus;
if ( $params->{'long'} ) {
printf "\tReplayed: %s bytes (%.3fGB) replayed, Todo %d (%.3fGB) = ",
$PLogFile[1], ( $PLogFile[1]/1024/1024/1024 ),
$PLogFile[2], ( $PLogFile[2]/1024/1024/1024 );
$RStatus = sprintf("%.2f", $RStatus);
if (( $RStatus < 1 ) && ( $PLogSize != 0.0001 )) {
print_warn "$RStatus%\n\t\t---> TODO: Replay not started, Logfile inactive or empty (Size: $PLogSize)\n", 'red';
print_screen "$RStatus%\n\t\t---> TODO: Replay not started, Logfile inactive or empty (Size: $PLogSize)\n", 'red';
} elsif (( $RStatus < 100 ) && ( $PLogSize != 0.0001 )) {
print_warn "$RStatus%\n\t\t---> WORK: Replay in progress = ($RStatus% < 100.00%)\n", 'red';
print_screen "$RStatus%\n\t\t---> WORK: Replay in progress = ($RStatus% < 100.00%)\n", 'red';
} elsif ( $PLogFile[2] > 0 ) {
$RStatus = sprintf("%.2f", ($PLogFile[1]-$PLogFile[2])/$PLogFile[1] * 100);
print_warn "$RStatus%\n", 'red';
print_screen "$RStatus%\n", 'red';
} elsif ( $PLogSize = 0.0001 ) {
$RStatus = "100.00";
print_warn "$RStatus%\n", 'green';
print_screen "$RStatus%\n", 'green';
} else {
print_warn "$RStatus%\n", 'green';
print_screen "$RStatus%\n", 'green';
}
}
$$ref_ResInReplay = $RStatus;
### sync
my $PSyncsize = check_link "$mars_dir/$PRes/syncstatus-$PName";
my $SStatus = ( $PSyncsize / $PSize * 100);
my $SStatus = sprintf("%.2f", ($PSyncsize / $PSize * 100));
$$ref_ResInSync = $SStatus;
if ( $params->{'long'} ) {
printf "\tSync : %s bytes (%.3fTB) synced = ", $PSyncsize, ( $PSyncsize/1024/1024/1024/1024);
$SStatus = sprintf("%.2f", $SStatus);
if ( $SStatus < 100) {
print_warn "$SStatus%\n\t\t---> WORK: Sync in progress = ($SStatus% < 100.00%)\n", 'red';
print_screen "$SStatus%\n\t\t---> WORK: Sync in progress = ($SStatus% < 100.00%)\n", 'red';
} else {
print_warn "$SStatus%\n", 'green';
print_screen "$SStatus%\n", 'green';
}
}
@ -264,7 +259,7 @@ sub check_logfile {
my $oldEqual = 0;
my $LogCount = 0;
my $LogFailed = 0;
print_warn " -> History Replay/Status\n",'blue';
print_screen " -> History Replay/Status\n",'blue';
my @logfile = <$mars_dir/$LResource/log*>;
foreach my $logfile (@logfile) {
@ -275,13 +270,12 @@ sub check_logfile {
if ( $LogStatus eq 0 ) {
# # info to old logfiles (old loop) ...
# if (( $oldEqual eq 1 ) && ( $LogFailed eq 0 )) {
# print_warn "\t\t---> TODO: logfiles has all equal Sizes and Checksums, can be deleted?\n",'green';
# print_screen "\t\t---> TODO: logfiles has all equal Sizes and Checksums, can be deleted?\n",'green';
# # TODO delete Links
# } elsif (( $oldEqual eq 1 ) && ( $LogFailed ne 0 )) {
# print_warn "\t\t---> TODO: logfiles has same other errors - Please check History of Logfiles\n",'blue';
# print_screen "\t\t---> TODO: logfiles has same other errors - Please check History of Logfiles\n",'blue';
# }
# found logfile
my $OldCheck;
my $OldSize;
@ -306,10 +300,10 @@ sub check_logfile {
# not same
$allEqual = 0;
if ( !($LogDetail[0] eq $OldCheck) && ($LogDetail[2] eq $OldSize) ) {
print_warn "\t\t---> TODO: check logfiles has not equal Checksums and same size !!!\n",'red';
print_screen "\t\t---> TODO: check logfiles has not equal Checksums and same size !!!\n",'red';
$LogFailed = 1;
} elsif ( $LogFailed eq 0 ) {
print_warn "\t\t---> TODO: check logfiles has not equal Checksums and different size ???\n",'red';
print_screen "\t\t---> TODO: check logfiles has not equal Checksums and different size ???\n",'red';
$LogFailed = 1;
}
} else {
@ -319,10 +313,11 @@ sub check_logfile {
# check bad values
if ( $LogDetail[3] < 0 ) {
print_warn "\t\t---> TODO: Found bad values ($LogDetail[3])it's ok ???\n", 'red';
print_screen "\t\t---> TODO: Found bad values ($LogDetail[3])it's ok ???\n", 'red';
$LogFailed = 1;
}
}
} # end foreach
if ( $allEqual eq 1 ) {
$oldEqual = 1;
} else {
@ -330,23 +325,22 @@ sub check_logfile {
}
# check Count Logfiles
if ( !($LogCount eq $LPartner) ) {
print_warn "\t\t---> TODO: Count of Logfiles different (have:$LPartner found:$LogCount)\n", 'red';
print_screen "\t\t---> TODO: Count of Logfiles different (have:$LPartner found:$LogCount)\n", 'red';
$LogFailed = 1;
$oldEqual = 0;
}
$LogCount=0;
###
if (( $oldEqual eq 1 ) && ( $LogFailed eq 0 )) {
print_warn "\t\t*---> TODO: logfiles has all equal Sizes and Checksums, can be deleted?\n",'green';
print_screen "\t\t*---> TODO: logfiles has all equal Sizes and Checksums, can be deleted?\n",'green';
# TODO check aktuell logfile
# TODO delete links !
} elsif (( $oldEqual eq 1 ) && ( $LogFailed ne 0 )) {
print_warn "\t\t*---> TODO: logfiles has same other errors - Please check History of Logfiles\n",'red';
print_screen "\t\t*---> TODO: logfiles has same other errors - Please check History of Logfiles\n",'red';
}
###
}
}
} # end logstatus
} # end foreach
}
#########################################################################################
@ -358,11 +352,11 @@ sub check_avg_limit {
$mars_avg_limit = $_;
}
close MARS_LOADAVG;
print_warn "-> Node AVG-Speed-Limit is ", 'bold';
print_screen "-> Node AVG-Speed-Limit is ", 'bold';
if (( !$mars_avg_limit ) || ( $mars_avg_limit < "1" )) {
print_warn "unset, used full speed\n", 'green';
print_screen "unset, used full speed\n", 'green';
} else {
print_warn "is $mars_avg_limit", 'red';
print_screen "is $mars_avg_limit", 'red';
}
}
}
@ -373,7 +367,7 @@ sub check_avg_limit {
sub check_disk_is_full {
my @diskfull = glob("$mars_dir/rest-space-*");
my $diskfull_mars = "";
print_warn "-> Diskspace on Cluster:", 'bold';
print_screen "-> Diskspace on Cluster:", 'bold';
if ( @diskfull ) {
foreach ( @diskfull) {
my $diskfull_space = check_link "$_";
@ -382,10 +376,10 @@ sub check_disk_is_full {
if ( $diskfull_space < 1 ) {
$diskfull_space = sprintf ("%.2f", $diskfull_space / 1024 );
if ( $diskfull_system eq $himself ) {
print_warn "\n\t-> ERROR ! Local Partition $mars_dir full ($diskfull_space kb Limit) !!! mars is stopping !!!\n\n", "red";
print_screen "\n\t-> ERROR ! Local Partition $mars_dir full ($diskfull_space kb Limit) !!! mars is stopping !!!\n\n", "red";
$diskfull_mars = "$diskfull_mars,$diskfull_system";
} else {
print_warn "\n\t-> WARNING ! Remotesystem $diskfull_system have mars-disk full ($diskfull_space kb Limit) !!!\n\n", "red";
print_screen "\n\t-> WARNING ! Remotesystem $diskfull_system have mars-disk full ($diskfull_space kb Limit) !!!\n\n", "red";
$diskfull_mars = "$diskfull_mars,$diskfull_system";
}
}
@ -393,7 +387,7 @@ sub check_disk_is_full {
}
# TODO /0
if ( !$diskfull_mars ) {
print_warn " ok\n", 'green';
print_screen " ok\n", 'green';
}
}
@ -412,7 +406,7 @@ sub check_mars_warn {
$mars_warn = "$mars_warn\t$_";
}
close MARS_WARN;
if ( $mars_warn ne "" ) { print_warn "-> MARS WARNINGS:\n", 'red'; print "$mars_warn" }
if ( $mars_warn ne "" ) { print_screen "-> MARS WARNINGS:\n", 'red'; print "$mars_warn" }
}
}
@ -431,26 +425,21 @@ sub check_mars_error {
$mars_error = "$mars_error\t$_";
}
close MARS_ERROR;
if ( $mars_error ne "" ) { print_warn "-> MARS ERRORS:\n", 'red'; print "$mars_error" }
if ( $mars_error ne "" ) { print_screen "-> MARS ERRORS:\n", 'red'; print "$mars_error" }
}
}
#########################################################################################
### main loop ...
while(1) {
print $clearscreen;
my $dateFormat = Date::Language->new('English');
#########################################################################################
### read mars infos
### info version
sub info_version {
### module
my %mars_info;
open ( my $lsmod_handle,'-|','lsmod | grep mars' ) || die "blub ... $!";
if (!<$lsmod_handle>) {
print_warn "Module Mars not running\n",'red';
print_screen "Module Mars not running\n",'red';
sleep(10);
next;
#exit 1;
}
open ( my $modinfo_handle, '-|', 'modinfo mars' ) || die "cannot run modinfo mars: $!";
while ( my $line = <$modinfo_handle> ) {
chomp $line;
@ -459,55 +448,43 @@ while(1) {
$mars_info{$key} = $value;
}
}
if ( $mars_info{author} eq "") {
print_warn "Module Mars not running\n",'red';
exit 1;
print_screen "Module Mars not running\n",'red';
next;
}
# status
print_warn "MARS Status - $himself, $version",'blue';
# TODO: if ( $$params->{'???'} ) { print_warn ", Listmodus $???",'blue'; }
if ( $params->{'resource'} ) { print_warn ", Ressource: $params->{'resource'}",'blue'; }
### status
print_screen "MARS Status - $himself, $version",'blue';
if ( $params->{'resource'} ) { print_screen ", Ressource: $params->{'resource'}",'blue'; }
print "\n";
# marsadm
### marsadm
my $MAVersion = qx"marsadm version";
print_warn "MARS Admin - $MAVersion",'blue';
print_screen "MARS Admin - $MAVersion",'blue';
### module
print_screen "MARS Module - $mars_info{version}\n",'blue';
# module
print_warn "MARS Module - $mars_info{version}\n",'blue';
# kernel
### kernel
my $KVersion = '/proc/version';
open my $Kfh, '<', "$KVersion" or die $!;
$KVersion = ( grep { /^Linux/ } <$Kfh> )[0];
$KVersion = ( split / /, $KVersion )[2];
print_warn "MARS Kernel - $KVersion\n",'blue';
print_screen "MARS Kernel - $KVersion\n",'blue';
print "-------------------------------------------------------------------------------\n";
if($params->{version} || $params->{v}) {
exit 0;
}
#########################################################################################
### check load-limit
check_avg_limit;
#########################################################################################
### check resources
### check ressources
sub check_ressource {
opendir my $dirhandle, $mars_dir or die "Cannot open $mars_dir: $!";
my @resources = grep { /^res/ && -d "$mars_dir/$_" } readdir $dirhandle;
if ( !@resources ) {
print_warn "---> no resources found\n", 'red';
print_screen "---> no resources found\n", 'red';
exit;
}
foreach my $res (@resources) {
my $ResPartner = 0;
my $ResInReplay = 0;
@ -532,9 +509,8 @@ while(1) {
printf "-> check resource %s, with %d bytes (%.3fTB), Primary Node is %s\n", $res_name, $res_size, $res_tbsize, $res_master;
print color 'reset' if ( $is_tty );
### hin self
print_warn " -> local node ($himself) ",'bold';
### him self
print_screen " -> local node ($himself) ",'bold';
my $ActualUsedLogfile = display_partner(
ressource => $res,
nodename => $himself,
@ -544,12 +520,16 @@ while(1) {
res_insync => \$ResInSync,
res_AULogfile => "",
);
$ResInReplaySum = $ResInReplay;
$ResInReplayPar = 1;
$ResInSyncSum = $ResInSync;
$ResInSyncPar = 1;
if ($ResInReplay ne "100.00") {
$ResInReplaySum = $ResInReplaySum + $ResInReplay;
$ResInReplayPar = $ResInReplayPar + 1;
}
if ($ResInSync ne "100.00") {
$ResInSyncSum = $ResInSyncSum + $ResInSync;
$ResInSyncPar = $ResInSyncPar + 1;
} # end him self
# not joined ...
### joined ...
if ( $ResPartner eq 1) {
### partners
opendir my $server_dh, "$mars_dir/$res" or die "Cannot open $mars_dir/$res: $!";
@ -558,7 +538,7 @@ while(1) {
foreach my $partner (@servers) {
$partner =~ s/^data-//;
if ( $partner eq $himself ) { next; }
print_warn " -> remote node ($partner) ", 'bold';
print_screen " -> remote node ($partner) ", 'bold';
display_partner(
ressource => $res,
nodename => $partner,
@ -569,41 +549,40 @@ while(1) {
res_AULogfile => $ActualUsedLogfile,
);
}
if ($ResInReplay != 100) {
if ($ResInReplay ne "100.00") {
$ResInReplaySum = $ResInReplaySum + $ResInReplay;
$ResInReplayPar = $ResInReplayPar + 1;
}
if ($ResInSync != 100) {
if ($ResInSync ne "100.00") {
$ResInSyncSum = $ResInSyncSum + $ResInSync;
$ResInSyncPar = $ResInSyncPar + 1;
}
}
} # end joined
### modus
if ( $ResPartner eq 0) {
if ( $params->{'long'} ) { print_warn " -> modus for $res_name is remote ($ResPartner nodes)\n",'bold'; }
if ( $params->{'long'} ) { print_screen " -> modus for $res_name is remote ($ResPartner nodes)\n",'bold'; }
} elsif ( $ResPartner eq 1 ) {
if ( $params->{'long'} ) { print_warn " -> modus for $res_name is standalone ($ResPartner node)\n",'bold'; }
if ( $params->{'long'} ) { print_screen " -> modus for $res_name is standalone ($ResPartner node)\n",'bold'; }
} else {
print_warn " -> modus for $res_name is cluster ($ResPartner nodes), ",'bold';
print_screen " -> modus for $res_name is cluster ($ResPartner nodes), ",'bold';
$ResInReplaySum = sprintf("%.2f", $ResInReplaySum / $ResInReplayPar );
$ResInSyncSum = sprintf("%.2f", $ResInSyncSum / $ResInSyncPar );
print_warn "ClusterSummary: ", 'black';
print_screen "ClusterSummary: ", 'black';
if ( $ResInReplaySum eq "100.00" ) {
print_warn "in replay ($ResInReplaySum%),", 'green';
print_screen "in replay ($ResInReplaySum%),", 'green';
} elsif ( $ResInReplaySum eq "0.00" ) {
print_warn "inaktiv ($ResInReplaySum%),", 'red';
print_screen "inaktiv ($ResInReplaySum%),", 'red';
} else {
print_warn "not in replay ($ResInReplaySum%),", 'red';
print_screen "not in replay ($ResInReplaySum%),", 'red';
}
if ( $ResInSyncSum eq "100.00" ) {
print_warn " in sync ($ResInSyncSum%)\n", 'green';
print_screen " in sync ($ResInSyncSum%)\n", 'green';
} else {
print_warn " not in sync ($ResInSyncSum%)\n", "red";
print_screen " not in sync ($ResInSyncSum%)\n", "red";
}
}
} # end modus
### debug output
if ( $params->{'long'} ) {
@ -611,15 +590,45 @@ while(1) {
if ( $params->{'history'} ) {
check_logfile( $res, $ResPartner );
}
#
#
#
} # end debug
} # end foreach
}
#########################################################################################
### main loop ...
while(1) {
my $dateFormat = Date::Language->new('English');
### version only
if ( $params->{version} || $params->{v}) {
info_version;
exit 0;
}
### monitor only
if ( $params->{monitor} || $params->{m}) {
print "Funktion nicht vorhanden\n";
exit 0;
}
#########################################################################################
### main run
print $clearscreen;
### read mars infos
if ( $params->{'long'} ) {
# read mars info
info_version;
# check load-limit
check_avg_limit;
}
### check resources
check_ressource;
### debug output
if ( $params->{'long'} ) {
### mars-warn/error
@ -630,7 +639,7 @@ while(1) {
}
}
### end
print color 'reset';
exit if (not $params->{'interval'});
sleep($params->{'interval'});

View File

@ -738,7 +738,7 @@ Advanced information are also available here: http://http://wiki.intranet.1and1.
sub version {
print "$0 $Id\n";
print "my IP is $ip\n";
#print "my IP is $ip\n";
exit 0;
}