#!/usr/bin/perl -w # ------------------------------------------------------------------------------ # File Name: chech_raid.pl # Author: Thomas Nilsen - Norway # Date: 14/06/2003 # Version: 0.1 # Description: This script will check to see if any software raid # devices are down. # Email: thomas.nilsen@doc-s.co.uk # WWW: www.doc-s.co.uk # ------------------------------------------------------------------------------ # Copyright 2003 (c) Thomas Nilsen # Credits go to Ethan Galstad for coding Nagios # License GPL # ------------------------------------------------------------------------------ # Date Author Reason # ---- ------ ------ # 05/10/2004 PETER Palfrader Make it work without that 'use util (vars)' # 14/06/2003 TN Initial Release # - Format of mdstat assumed to be "2 line" per # device with [??] on the second line. # ------------------------------------------------------------------------------ use strict; use warnings; use Getopt::Long;; use vars qw($opt_V $opt_h $opt_t $opt_F $PROGNAME); use lib '/usr/local/nagios/libexec/'; my $TIMEOUT=15; my %ERRORS = ( OK => 0, WARNING => 1, CRITICAL => 2, UNKNOWN => -1 ); $PROGNAME="check_raid"; sub print_help (); sub print_usage (); $ENV{'PATH'}=''; $ENV{'BASH_ENV'}=''; $ENV{'ENV'}=''; my ( $line, $prevline, $stat, $state ,@device, $msg, $status, $timeout); $stat="/proc/mdstat"; #Option checking Getopt::Long::Configure('bundling'); $status = GetOptions( "V" => \$opt_V, "version" => \$opt_V, "h" => \$opt_h, "help" => \$opt_h, "F" => \$opt_F, "filename" => \$opt_F, "t" => \$opt_t, "timeout" => \$opt_t); # Version if ($opt_V) { print($PROGNAME,': $Revision: 0.1 $'); exit $ERRORS{'OK'}; } # Help if ($opt_h) { print_help(); exit $ERRORS{'OK'}; } # Filename supplied if ($opt_F) { $opt_F = shift; $stat = $1 if ($opt_F =~ /^(.*)$/); if ( ! -r $stat ) { print "Invalid mdstat file: $opt_F\n"; exit $ERRORS{'UNKNOWN'}; } } $timeout = $TIMEOUT; ($opt_t) && ($opt_t =~ /^([0-9]+)$/) && ($timeout = $1); # Just in case of problems, let's not hang Nagios $SIG{'ALRM'} = sub { print ("ERROR: No response (alarm)\n"); exit $ERRORS{'UNKNOWN'}; }; alarm($timeout); # Start checking the file... open (FH, $stat); $state = $ERRORS{'OK'}; $msg =""; # Now check the mdstat file.. while () { $line= $_; if( $line =~ / \[_|_\]|U_|_U /) { $state = $ERRORS{'CRITICAL'}; @device = split(/ /,$prevline); $msg = $msg . $device[0] . ": - "; } $prevline = $line; } close (FH); if ( $state == $ERRORS{'CRITICAL'} ) { print "CRITICAL - Device(s) $msg have failed\n"; } elsif ( $state == $ERRORS{'OK'} ) { print "OK - All devices are online\n"; } exit $state; sub print_usage () { print "Usage: $PROGNAME -t -F \n"; } sub print_help () { print_revision($PROGNAME,'$Revision: 0.1 $'); print "Copyright (c) 2003 Thomas Nilsen/Karl DeBisschop\n"; print "\n"; print_usage(); print "Checks the mdstat file for errors on any configured software raid.\n -t ( --timeout=INTEGER) Seconds before script times out (default: 10)\n -F ( --filename=FILE) Full path and name to mdstat file (usually '/proc/mdstat') \n\n"; }