summaryrefslogtreecommitdiff
path: root/nagios-check-raid.pl
blob: 94b99ed1fa3a7bbb81f7173127b369a2d33c8382 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/perl -w
# ------------------------------------------------------------------------------
# File Name:            chech_raid.pl
# Author:               Thomas Nilsen - Norway
# Date:                 14/06/2003
# Version:              0.1
# Description:          This script will check to see if any software raid
#                       devices are down.
# Email:                thomas.nilsen@doc-s.co.uk
# WWW:                  www.doc-s.co.uk
# ------------------------------------------------------------------------------
# Copyright 2003 (c) Thomas Nilsen
# Credits go to Ethan Galstad for coding Nagios
# License GPL
# ------------------------------------------------------------------------------
# Date          Author          Reason
# ----          ------          ------
# 05/10/2004    PETER Palfrader Make it work without that 'use util (vars)'
# 14/06/2003    TN              Initial Release
#                               - Format of mdstat assumed to be "2 line" per
#                                 device with [??] on the second line.
# ------------------------------------------------------------------------------

use strict;
use warnings;
use Getopt::Long;;
use vars qw($opt_V $opt_h $opt_t $opt_F $PROGNAME);
use lib '/usr/local/nagios/libexec/';
my $TIMEOUT=15;
my %ERRORS = ( OK => 0, WARNING => 1, CRITICAL => 2, UNKNOWN => -1 );


$PROGNAME="check_raid";

sub print_help ();
sub print_usage ();

$ENV{'PATH'}='';
$ENV{'BASH_ENV'}='';
$ENV{'ENV'}='';
my ( $line, $prevline, $stat, $state ,@device, $msg, $status, $timeout);

$stat="/proc/mdstat";

#Option checking
Getopt::Long::Configure('bundling');
$status = GetOptions(
                "V"   => \$opt_V, "version"    => \$opt_V,
                "h"   => \$opt_h, "help"       => \$opt_h,
		"F"   => \$opt_F, "filename"   => \$opt_F,
                "t"   => \$opt_t, "timeout"  => \$opt_t);
# Version
if ($opt_V) {
        print($PROGNAME,': $Revision: 0.1 $');
        exit $ERRORS{'OK'};
}
# Help 
if ($opt_h) {
        print_help();
        exit $ERRORS{'OK'};
}
# Filename supplied
if ($opt_F) {
	$opt_F = shift; 
	$stat = $1 if ($opt_F =~ /^(.*)$/);

	if ( ! -r $stat ) {
		print "Invalid mdstat file: $opt_F\n";
		exit $ERRORS{'UNKNOWN'};
	}
}

$timeout = $TIMEOUT;
($opt_t) && ($opt_t =~ /^([0-9]+)$/) && ($timeout = $1);

# Just in case of problems, let's not hang Nagios
$SIG{'ALRM'} = sub {
        print ("ERROR: No response (alarm)\n");
        exit $ERRORS{'UNKNOWN'};
};
alarm($timeout);

# Start checking the file...
open (FH, $stat);
$state = $ERRORS{'OK'};
$msg ="";

# Now check the mdstat file..
while (<FH>) {
	$line= $_;
	if( $line =~ / \[_|_\]|U_|_U /) {
		$state = $ERRORS{'CRITICAL'};
		@device = split(/ /,$prevline);
		$msg = $msg . $device[0] . ": - ";
	}	
	$prevline = $line;
}
close (FH);

if ( $state == $ERRORS{'CRITICAL'} ) { 
	print "CRITICAL - Device(s) $msg have failed\n"; 
} elsif ( $state == $ERRORS{'OK'} )
	 { print "OK - All devices are online\n"; }
exit $state;


sub print_usage () {
        print "Usage: $PROGNAME -t <timeout> -F <filename>\n";
}

sub print_help () {
        print_revision($PROGNAME,'$Revision: 0.1 $');
        print "Copyright (c) 2003 Thomas Nilsen/Karl DeBisschop\n";
        print "\n";
        print_usage();
        print "Checks the mdstat file for errors on any configured software raid.\n
-t ( --timeout=INTEGER)
	Seconds before script times out (default: 10)\n
-F ( --filename=FILE)
	Full path and name to mdstat file (usually '/proc/mdstat') \n\n";
}