#!/usr/bin/perl -w # check _physical_ disk status of disks on HP smart array controllers # requires hpacucli # # does _not_ check raid status. use arrayprobe for that. # Copyright (c) 2008 Peter Palfrader # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. use strict; # nagios exit codes my %CODE = ( 'OK' => 0, 'WARNING' => 1, 'CRITICAL' => 2, 'UNKNOWN' => 3 ); my $EXITCODE = 'OK'; $SIG{'__DIE__'} = sub { print STDERR @_; exit $CODE{'UNKNOWN'}; }; sub runcmd($) { my ($cmd) = @_; $cmd = "sudo hpacucli $cmd"; open(FH, $cmd."|") or die ("Cannot run $cmd: $!"); my @lines = ; close FH; die ("no results from $cmd\n") if (scalar @lines == 0); return \@lines; } sub record($) { my ($newexit) = @_; die "code $newexit not defined\n" unless defined $CODE{$newexit}; if ($CODE{$newexit} > $CODE{$EXITCODE}) { $EXITCODE = $newexit; }; } my $ctrlallshow = runcmd("controller all show"); my @controllers; for (@$ctrlallshow) { chomp; next if /^$/; if (/in Slot ([0-9]+) /) { push @controllers, $1; next; }; die ("Cannot read line '$_' gotten from hpacucli controller all show\n"); }; if (scalar @controllers == 0) { print "UNKNONW: No smartarray controllers found with hpacucli\n"; exit $CODE{'UNKNOWN'} }; my @resultstr; for my $slot (sort @controllers) { my $pds = runcmd("controller slot=$slot pd all show"); my @drives; my $nodrives = 0; my %status; for (@$pds) { chomp; next if /^$/; next if (/^\S.*in Slot $slot/); next if /^ *array [A-Z]$/; if (/^ *(array [A-Z]) \(Failed\)$/) { record('CRITICAL'); push @{$status{'Failed'}}, $1; } elsif (/^Error: The specified controller does not have any physical drives on it.$/) { $nodrives = 1; } elsif (/^ *physicaldrive (\S+) .* (OK|Predictive Failure|Failed|Rebuilding)(?:, spare)?\)$/) { my $drive = $1; my $status = $2; push @{$status{$status}}, $drive; if ($status eq 'OK') { } elsif ($status eq 'Predictive Failure' || $status eq 'Rebuilding') { record('WARNING'); } elsif ($status eq 'Failed') { record('CRITICAL'); } else { record('UNKNOWN'); }; push @drives, $drive; } else { die ("Cannot read line '$_' gotten from hpacucli controller slot=$slot pd all show\n"); }; }; # Check that all drives have the proper transfer speed. # sometimes stuff breaks and they fall back to 10mb/sec. for my $drive (@drives) { # skip drives that are known to have failed next if (exists $status{'Failed'} && grep {$drive eq $_} @{$status{'Failed'}}); my $type; if ($drive =~ /^[0-9]+:[0-9]+$/) { # scsi drives $type = 'SCSI'; } elsif ($drive =~ /^[0-9]+I:[0-9]+:[0-9]+$/) { # SAS $type = 'SAS'; } else { # I'm not going to run pass arguments of unknown form to the shell.. warn ("Unknown diskdrive ID $drive\n"); next; } my $pd = runcmd("controller slot=$slot pd $drive show"); while (defined $pd->[0] && !($pd->[0] =~ /physicaldrive/)) { shift @$pd; }; shift @$pd; my %value; for (@$pd) { if (m/^\s*(.*?):\s*(.*?)\s*$/) { $value{$1} = $2; } } my $key; my $expected; if ($type eq 'SCSI') { $key = 'Transfer Speed'; if (!defined $value{'Transfer Mode'}) { record('WARNING'); push @{$status{'unknown transfer mode'}}, $drive; next; } elsif ($value{'Transfer Mode'} eq 'Ultra 3 Wide') { $expected = '160 MB/Sec'; } elsif ($value{'Transfer Mode'} eq 'Ultra 320 Wide') { $expected = '320 MB/Sec'; } else { record('WARNING'); push @{$status{'unknown transfer mode'}}, $drive."(".$value{'Transfer Mode'}.")"; next; }; } elsif ($type eq 'SAS') { $key = 'PHY Transfer Rate'; if ($value{'PHY Count'} eq '2') { $expected = '3.0GBPS, Unknown'; } else { $expected = '3.0GBPS'; } } else { warn "Should not be here. Do not know what to do with type '$type'\n"; next; } if (!defined $value{$key}) { record('WARNING'); push @{$status{'unknown transfer speed'}}, $drive; } elsif ($value{$key} ne $expected) { record('WARNING'); push @{$status{'bad transfer speed'}}, $drive."(".$value{$key}.")"; }; }; if ($nodrives && scalar keys %status > 0) { push @resultstr, "Slot $slot: have no drives but status results?"; record('UNKNOWN'); next; } elsif ($nodrives) { push @resultstr, "Slot $slot: no drives"; next; }; my $cst = runcmd("controller slot=$slot show status"); for (@$cst) { chomp; next if /^$/; next if (/^\S.*in Slot $slot/); if (/^ *(.*) Status: (.*)$/) { my $system = $1; my $status = $2; push @{$status{$status}}, $system; if ($status ne 'OK') { record('WARNING'); }; } else { die ("Cannot read line '$_' gotten from hpacucli controller slot=$slot show status\n"); }; }; my $status = join(" - ", (map { $_.": ".join(", ", @{$status{$_}}) } keys %status)); push @resultstr, "Slot $slot: $status"; }; print "$EXITCODE: ", join(" --- ", @resultstr), "\n"; exit $CODE{$EXITCODE};