1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
#!/usr/bin/perl -w
# ------------------------------------------------------------------------------
# File Name: chech_raid.pl
# Author: Thomas Nilsen - Norway
# Date: 14/06/2003
# Version: 0.1
# Description: This script will check to see if any software raid
# devices are down.
# Email: thomas.nilsen@doc-s.co.uk
# WWW: www.doc-s.co.uk
# ------------------------------------------------------------------------------
# Copyright 2003 (c) Thomas Nilsen
# Credits go to Ethan Galstad for coding Nagios
# License GPL
# ------------------------------------------------------------------------------
# Date Author Reason
# ---- ------ ------
# 05/10/2004 PETER Palfrader Make it work without that 'use util (vars)'
# 14/06/2003 TN Initial Release
# - Format of mdstat assumed to be "2 line" per
# device with [??] on the second line.
# ------------------------------------------------------------------------------
use strict;
use warnings;
use Getopt::Long;;
use vars qw($opt_V $opt_h $opt_t $opt_F $PROGNAME);
use lib '/usr/local/nagios/libexec/';
my $TIMEOUT=15;
my %ERRORS = ( OK => 0, WARNING => 1, CRITICAL => 2, UNKNOWN => -1 );
$PROGNAME="check_raid";
sub print_help ();
sub print_usage ();
$ENV{'PATH'}='';
$ENV{'BASH_ENV'}='';
$ENV{'ENV'}='';
my ( $line, $prevline, $stat, $state ,@device, $msg, $status, $timeout);
$stat="/proc/mdstat";
#Option checking
Getopt::Long::Configure('bundling');
$status = GetOptions(
"V" => \$opt_V, "version" => \$opt_V,
"h" => \$opt_h, "help" => \$opt_h,
"F" => \$opt_F, "filename" => \$opt_F,
"t" => \$opt_t, "timeout" => \$opt_t);
# Version
if ($opt_V) {
print($PROGNAME,': $Revision: 0.1 $');
exit $ERRORS{'OK'};
}
# Help
if ($opt_h) {
print_help();
exit $ERRORS{'OK'};
}
# Filename supplied
if ($opt_F) {
$opt_F = shift;
$stat = $1 if ($opt_F =~ /^(.*)$/);
if ( ! -r $stat ) {
print "Invalid mdstat file: $opt_F\n";
exit $ERRORS{'UNKNOWN'};
}
}
$timeout = $TIMEOUT;
($opt_t) && ($opt_t =~ /^([0-9]+)$/) && ($timeout = $1);
# Just in case of problems, let's not hang Nagios
$SIG{'ALRM'} = sub {
print ("ERROR: No response (alarm)\n");
exit $ERRORS{'UNKNOWN'};
};
alarm($timeout);
# Start checking the file...
open (FH, $stat);
$state = $ERRORS{'OK'};
$msg ="";
# Now check the mdstat file..
while (<FH>) {
$line= $_;
if( $line =~ / \[_|_\]|U_|_U /) {
$state = $ERRORS{'CRITICAL'};
@device = split(/ /,$prevline);
$msg = $msg . $device[0] . ": - ";
}
$prevline = $line;
}
close (FH);
if ( $state == $ERRORS{'CRITICAL'} ) {
print "CRITICAL - Device(s) $msg have failed\n";
} elsif ( $state == $ERRORS{'OK'} )
{ print "OK - All devices are online\n"; }
exit $state;
sub print_usage () {
print "Usage: $PROGNAME -t <timeout> -F <filename>\n";
}
sub print_help () {
print_revision($PROGNAME,'$Revision: 0.1 $');
print "Copyright (c) 2003 Thomas Nilsen/Karl DeBisschop\n";
print "\n";
print_usage();
print "Checks the mdstat file for errors on any configured software raid.\n
-t ( --timeout=INTEGER)
Seconds before script times out (default: 10)\n
-F ( --filename=FILE)
Full path and name to mdstat file (usually '/proc/mdstat') \n\n";
}
|