#!/usr/bin/perl -w # ########################################################################### ## By Da Beave (beave@vistech.net) ## ## ## ## This is the _6th_ time I've wrote this routine. Most of the time it ## ## was shell script. Perl's even better for this task (no forking!) ## ## So, we shall call this: ## ## ## ## Monitor version 6.0! [Nov 19th, 2004] - Da re-re-re-re-re-re-created ## ## ## ## This is a simple monitor utility. It's function is to report (via ## ## e-mail) when servers lose connectivity. It's actually a little more ## ## complicated than that. When a server is seen as "down" for the ## ## first time, it is checked X number of times after that. Maybe ## ## it's a temporary glitch? If after X times, the server is still down ## ## it is added to a "downhost" array (list), and a e-mail is shot off ## ## to the appropriate people. It then continues on with checking the ## ## rest of the servers. After a normal run, it goes back and re-checks ## ## the "downhost" area. Why? To see if any "down" servers came backup! ## ## ## ## What does all this mean? It notifies you _one_ time if the server ## ## is down, and notifies you _one_ time if it's back online. ## ## ## ########################################################################### use strict; # Always.. use Net::Ping; # This prevent us from using the -T flag! Insecure dep. # Take care of a few security related aspects. $|++; # Prevent double buffing delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV TERM)}; # Don't trust ENV. my $config="/etc/monitor.conf"; # Primary configuration file, where # server information. # This is the layout of the conf. file. # # # # # Layout of this configuration file is like thus: # # # # 1:2:3:4:5:6 # # # # 1 = Hostname or IP address # # 2 = Short Description (I.E. - Vistech Communications, Inc) # # 3 = Where to e-mail events # # 4 = How long to wait between attempts # # 5 = How many times to attempt to ping before flagging as "down" # # 6 = Number of packets to send to remote host. # # #1.1.1.1:Testing of Monitor:beave@vistech.net:1:3:5 # my $totalsleep=30; # In seconds. Time to "wait" after a # normal/full run. my $address="monitor\@vistech.net"; # We're e-mail should appear to be from my $addressdesc="(Vistech Monitor)"; # Ditto my $flag=0; my $checkflag=0; my @downhosts; my @downhoststmp; my $check; my $currenttime; # Place to store the current time. Duh. our $host; our $desc; our $email; our $delay; our $numping; our $numtimes; my $i; my $p = Net::Ping->new(); while () # Set infinite loop { # Pretty header. $currenttime = scalar localtime; print "---[ $currenttime ]-----------------------------------------\n"; # Start grabbing information from the configuration file. Note that this is # right after the infinite loop. Good reason for this. We open and close # the configuration after each full run. This way, you can add new servers # to the monitor list, and never have to kill the monitor processes! open(CFG, $config) || die "Cannot open $config [$!]\n"; while () { chomp; s/#.*//; s/^\s+//; s/\s+$//; next unless length; ($host, $desc, $email, $delay, $numtimes, $numping) = split /:/; # Check to make sure the config file has the correct fields. If not, # "You go to hell! You go to hell and you die!" if (!$host || !$desc || !$email || !$delay || !$numping || !$numtimes) { die "Your configuration file appears to be incorrect"; } $checkflag=0; # Reset checkflag every run. This is used to "skip" down # and prevent looped e-mails/notifications foreach $check (@downhosts) { if ($check eq $host) { $checkflag=1; } } if ($checkflag == 0) # If this isn't a "down" host .... { print "Checking: $host [$desc][Delay:$delay] Status: "; if ($p->ping($host,$numping)) { print "Okay\n"; } else { print "Down\n"; print "$host down at ", scalar localtime, "\n"; print "Trying $numtimes more times (Sleeping for $delay seconds)\n"; for ($i=1; $i<$numtimes+1; $i++) { sleep $delay; if ($p->ping($host,$numping)) { print "$host is back up!\n"; $flag=0; # System came back up we'll we where waiting! # set the flag back to normal. } else { print "$host is still down (Attempt # $i)\n"; $flag=1 # System down, set down flag. } } # I don't ident this because I'm going to craft a e-mail and I don't # want crappy spaces. Sort screws up the flow. Oh well. if ($flag == 1) { print "$host is down. Reporting to $email at ", scalar localtime, "\n"; print "Added $host to down'ed internal list\n"; push (@downhosts, "$host"); $currenttime=scalar localtime; # WHAT TIME IS IT?!?! # Ok, make a e-mail for the admin (or whatever)... open (MAIL, "|/usr/sbin/sendmail -oi -t") || die "Error - Can't send mail [$!]"; print MAIL < close(CFG); # Close config. We'll re-open the next run. # Now, we check to see if any system that are in the downhost array # are backup. If they are, e-mail people and remove them from # the downhost array! if (!@downhosts) { print "- All systems are online -\n" } else { print "* Checking Unreachable Hosts *\n"; } @downhoststmp=(); # temp. place to store still down hosts. # the gets reset per-run. # I know what your thinking. "You checked to see if @downhost had data, # but, your doing a 'foreach' here anyways". Stop, and things. If # @downhosts has no data, then the foreach will be skipped. foreach $check (@downhosts) { # Once again we format this lousy, to keep the sanity of the e-mail. if ($p->ping($check,5)) # 5 is a safe number { # System is back UP! So, we call findhost() findhost($check); # to pull the information about the recovered # system. $currenttime=scalar localtime; # Grab current time. # Start the e-mail telling them the system is back up. open (MAIL, "|/usr/sbin/sendmail -oi -t") || die "Error - Can't send mail [$!]"; print MAIL <) { chomp; s/#.*//; s/^\s+//; s/\s+$//; next unless length; ($host, $desc, $email, $delay, $numping, $numtimes) = split /:/; if ($s eq $host) { return $host, $desc, $email, $delay, $numping, $numtimes; } } }