summaryrefslogtreecommitdiff
path: root/munin/smart_
diff options
context:
space:
mode:
Diffstat (limited to 'munin/smart_')
-rwxr-xr-xmunin/smart_344
1 files changed, 344 insertions, 0 deletions
diff --git a/munin/smart_ b/munin/smart_
new file mode 100755
index 0000000..9d1d062
--- /dev/null
+++ b/munin/smart_
@@ -0,0 +1,344 @@
+#!/usr/bin/env python
+
+# taken from the debian munin-node package, version 1.2.4-1
+###########################################################
+
+# -*- encoding: iso-8859-1 -*-
+#
+# Wildcard-plugin to monitor S.M.A.R.T attribute values through smartctl,
+# which is part of smartmontools package:
+# http://smartmontools.sourceforge.net/
+#
+# To monitor a S.M.A.R.T device, link smart_<device> to this file.
+# E.g.
+# ln -s /usr/share/munin/plugins/smart_ /etc/munin/plugins/smart_hda
+# ...will monitor /dev/hda.
+#
+# Needs following minimal configuration in plugin-conf.d/munin-node:
+# [smart_*]
+# user root
+# group disk
+#
+# Parameters
+# smartpath - Specify path to smartctl program (Default: /usr/sbin/smartctl)
+# smartargs - Override '-a' argument passed to smartctl with '-A -i'+smartargs
+#
+# Parameters can be specified on a per-drive basis, eg:
+# [smart_hda]
+# user root
+# group disk
+# env.smartargs -H -c -l error -l selftest -l selective -d ata
+# env.smartpath /usr/local/sbin/smartctl
+#
+# [smart_twa0-1]
+# user root
+# group disk
+# env.smartargs -H -l error -d 3ware,1
+#
+# [smart_twa0-2]
+# user root
+# group disk
+# env.smartargs -H -l error -d 3ware,2
+#
+# Author: Nicolas Stransky <Nico@neo-lan.net>
+#
+# v1.0 22/08/2004 - First draft
+# v1.2 28/08/2004 - Clean up the code, add a verbose option
+# v1.3 14/11/2004 - Compatibility with python<2.2. See comments in the code
+# v1.4 17/11/2004 - Deal with non zero exit codes of smartctl
+# - config now prints the critical thresholds, as reported by smartctl
+# v1.5 18/11/2004 - Plot smartctl_exit_code bitmask
+# v1.6 21/11/2004 - Add autoconf and suggest capabilities
+# - smartctl path can be passed through "smartpath" environment variable
+# - Additional smartctl args can be passed through "smartargs" environment variable
+# v1.7 29/11/2004 - Add suggest capabilities for NetBSD, OpenBSD, FreeBSD and SunOS.
+# - Allow to override completely the smartctl arguments with "smartargs"
+# v1.8 16/02/2005 - Exit status field now only triggers warnings, not criticals.
+# v1.9 07/07/2005 - Allow to query several drives on the same 3ware card.
+# - Correct a bug when '-i' was not listed in smartargs
+# - Don't fail if no value was obtained for hard drive model
+# v1.10 19/08/2005 - smartctl_exit_code is now a numerical value
+#
+# Copyright (c) 2004,2005 Nicolas Stransky.
+#
+# Permission to use, copy, and modify this software with or without fee
+# is hereby granted, provided that this entire notice is included in
+# all source code copies of any software which is or includes a copy or
+# modification of this software.
+#
+# THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+# IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+# REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+# MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+# PURPOSE.
+#
+#
+# Magic markers
+#%# capabilities=autoconf suggest
+#%# family=auto
+
+import os, sys, string, pickle
+from math import log
+
+# Increase verbosity of the plugin by putting verbose=True (for
+# debugging purposes)
+# For python < 2.2 compliance, put verbose=0 instead of "False"
+verbose=False
+
+# Modify to your needs:
+statefiledir='/var/lib/munin/plugin-state/'
+
+
+def verboselog(s):
+ sys.stderr.write(plugin_name+': '+s+'\n')
+
+if not verbose :
+ verboselog = lambda s: None
+
+def read_values(hard_drive) :
+ num_exit_status=0
+ try :
+ verboselog('Reading S.M.A.R.T values')
+ os.putenv('LC_ALL','C')
+ smart_output=os.popen(os.getenv('smartpath','/usr/sbin/smartctl')+' '+os.getenv('smartargs','-a')+' -A -i /dev/'+hard_drive)
+ read_values=0
+ # For python < 2.2 compliance, switch comments on next 2 lines.
+ for l in smart_output :
+ #for l in smart_output.readlines() :
+ if l[:-1]=='' :
+ read_values=0
+ elif l[:13]=='Device Model:' or l[:7]=='Device:' :
+ model_list=string.split(string.split(l,':')[1])
+ try: model_list.remove('Version')
+ except : None
+ model=string.join(model_list)
+ if read_values==1 :
+ smart_attribute=string.split(l)
+ smart_values[string.replace(smart_attribute[1],'-','_')]={"value":smart_attribute[3],"threshold":smart_attribute[5]}
+ elif l[:18]=="ID# ATTRIBUTE_NAME" :
+ # Start reading the Attributes block
+ read_values=1
+ exit_status=smart_output.close()
+ if exit_status!=None :
+ # smartctl exit code is a bitmask, check man page.
+ num_exit_status=int(exit_status/256)
+ if num_exit_status<=2 :
+ verboselog('smartctl cannot access S.M.A.R.T values on drive '+hard_drive+'. Command exited with code '+str(num_exit_status)+' ('+str(exit_status/256)+')')
+ sys.exit(1)
+ else :
+ verboselog('smartctl exited with code '+str(num_exit_status)+'. '+hard_drive+' may be FAILING RIGHT NOW !')
+ except :
+ verboselog('Cannot access S.M.A.R.T values ! Check user rights or propper smartmontools installation. Quitting...')
+ sys.exit(1)
+ if smart_values=={} :
+ verboselog('Can\'t find any S.M.A.R.T value to plot ! Quitting...')
+ sys.exit(1)
+ smart_values["smartctl_exit_status"]={"value":str(num_exit_status),"threshold":"1"}
+ try: smart_values["model"]=model
+ # For some reason we may have no value for "model"
+ except : smart_values["model"]="unknown"
+
+def open_state_file(mode) :
+ return open(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state',mode)
+
+def update_state_file() :
+ try:
+ verboselog('Saving statefile')
+ pickle.dump(smart_values,open_state_file("w"))
+ except :
+ verboselog('Error trying to save state file ! Check access rights')
+
+def print_plugin_values() :
+ verboselog('Printing S.M.A.R.T values')
+ for key in smart_values.keys() :
+ if key=="model" : continue
+ print(key+".value "+smart_values[key]["value"])
+
+def print_config(hard_drive,smart_values) :
+ if os.path.exists(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state'):
+ try :
+ verboselog('Try to recall previous S.M.A.R.T attributes for '+string.join(hard_drive,","))
+ smart_values_state=pickle.load(open_state_file("r"))
+ except :
+ verboselog('Error opening existing state file !')
+ sys.exit(1)
+ else :
+ verboselog('No state file, reading S.M.A.R.T values for the first time')
+ read_values(hard_drive[0])
+ pickle.dump(smart_values,open_state_file("w"))
+ smart_values_state=smart_values
+
+ verboselog('Printing configuration')
+ print('graph_title S.M.A.R.T values for drive '+string.join(hard_drive,","))
+ print('graph_vlabel Attribute S.M.A.R.T value')
+ print('graph_args --base 1000 --lower-limit 0')
+ print('graph_category disk')
+ print('graph_info This graph shows the value of all S.M.A.R.T attributes of drive '+string.join(hard_drive,",")+' ('+smart_values_state['model']+'). smartctl_exit_status is the return value of smartctl. A non-zero return value indicates an error, a potential error, or a fault on the drive.')
+ attributes=smart_values_state.keys()
+ attributes.sort()
+ for key in attributes :
+ if key in ['smartctl_exit_status','model'] : continue
+ print(key+'.label '+key)
+ print(key+'.draw LINE2')
+ # Following line can be commented out if necessary. Only useful for warning reports about S.M.A.R.T values
+ print(key+'.critical '+smart_values_state[key]["threshold"]+':')
+ # Let's place smartctl_exit_status at the end
+ print('smartctl_exit_status.label smartctl_exit_status')
+ print('smartctl_exit_status.draw LINE2')
+ # Following line can be commented out if necessary. Only useful for warning reports about S.M.A.R.T values
+ print('smartctl_exit_status.warning '+smart_values_state['smartctl_exit_status']["threshold"])
+
+def get_hard_drive_name() :
+ try :
+ hard_drive=[plugin_name[string.rindex(plugin_name,'_')+1:]]
+ if os.uname()[0]=="SunOS" :
+ try :
+ # if hard_drive name starts with "rdsk" or "rmt", try to reconstruct the path
+ if hard_drive[0][0:4]=="rdsk":
+ hard_drive[0]=os.path.join("rdsk",hard_drive[0][4:])
+ elif hard_drive[0][0:3]=="rmt":
+ hard_drive[0]=os.path.join("rmt",hard_drive[0][3:])
+ except :
+ verboselog('Failed to find SunOS hard_drive')
+ # For 3ware cards, we have to set multiple plugins for the same hard drive name.
+ # Let's see if we find a '-' in the drive name.
+ if hard_drive[0].find('-')!=-1:
+ # Put the drive name and it's number in a list
+ hard_drive=[hard_drive[0][:string.rindex(hard_drive[0],'-')],hard_drive[0][string.rindex(hard_drive[0],'-')+1:]]
+ # Chech that the drive exists in /dev
+ if not os.path.exists('/dev/'+hard_drive[0]):
+ verboselog('/dev/'+hard_drive[0]+' not found !')
+ sys.exit(1)
+ return(hard_drive)
+ except :
+ verboselog('No S.M.A.R.T device name found in plugin\'s symlink !')
+ sys.exit(1)
+
+def find_smart_drives() :
+ # Try to autodetect Linux, *BSD, SunOS drives. Don't try to autodetect drives on a 3Ware card.
+ drives=[]
+ if os.uname()[0]=="Linux" :
+ if os.path.exists('/sys/block/'):
+ # Running 2.6, yeah
+ try :
+ for drive in os.listdir('/sys/block/') :
+ if drive[:2] in ['md','fd','lo','ra','dm'] : continue # Ignore MD, Floppy, loop , RAM and LVM devices.
+ try :
+ read_values(drive)
+ drives.append(drive)
+ except :
+ continue
+ except :
+ verboselog('Failed to list devices in /sys/block')
+ else :
+ verboselog('Not running linux2.6, failing back to /proc/partitions')
+ try :
+ partitions=open('/proc/partitions','r')
+ L=partitions.readlines()
+ for l in L :
+ words=string.split(l)
+ if len(words)==0 or words[0][0] not in string.digits : continue
+ if words[0] in ['1','9','58','254'] : continue # Ignore RAM, md, LVM and LVM2 devices
+ if words[-1][-1] not in string.digits :
+ try :
+ read_values(words[-1])
+ drives.append(words[-1])
+ except :
+ continue
+ verboselog('Found drives in /proc/partitions ! '+str(drives))
+ except :
+ verboselog('Failed to list devices in /proc/partitions')
+ elif os.uname()[0]=="OpenBSD" :
+ try :
+ sysctl_kerndisks=os.popen('sysctl hw.disknames')
+ kerndisks=string.strip(sysctl_kerndisks.readline())
+ for drive in string.split(kerndisks[string.rindex(kerndisks,'=')+1:],',') :
+ if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
+ try :
+ read_values(drive+'c')
+ drives.append(drive+'c')
+ except :
+ continue
+ except :
+ verboselog('Failed to list OpenBSD disks')
+ elif os.uname()[0]=="FreeBSD" :
+ try :
+ sysctl_kerndisks=os.popen('sysctl kern.disks')
+ kerndisks=string.strip(sysctl_kerndisks.readline())
+ for drive in string.split(kerndisks)[1:] :
+ if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
+ try :
+ read_values(drive)
+ drives.append(drive)
+ except :
+ continue
+ except :
+ verboselog('Failed to list FreeBSD disks')
+ elif os.uname()[0]=="NetBSD" :
+ try :
+ sysctl_kerndisks=os.popen('sysctl hw.disknames')
+ kerndisks=string.strip(sysctl_kerndisks.readline())
+ for drive in string.split(kerndisks)[2:] :
+ if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
+ try :
+ read_values(drive+'c')
+ drives.append(drive+'c')
+ except :
+ continue
+ except :
+ verboselog('Failed to list NetBSD disks')
+ elif os.uname()[0]=="SunOS" :
+ try :
+ from glob import glob
+ for drivepath in glob('/dev/rdsk/*s2') :
+ try :
+ drive=os.path.basename(drivepath)
+ read_values('rdsk'+drive)
+ drives.append('rdsk'+drive)
+ except :
+ continue
+ for drivepath in glob('/dev/rmt/*') :
+ try :
+ drive=os.path.basename(drivepath)
+ read_values('rmt'+drive)
+ drives.append('rmt'+drive)
+ except :
+ continue
+ except :
+ verboselog('Failed to list SunOS disks')
+ return(drives)
+
+### Main part ###
+
+smart_values={}
+model=''
+plugin_name=list(os.path.split(sys.argv[0]))[1]
+verboselog('plugins\' UID: '+str(os.geteuid())+' / plugins\' GID: '+str(os.getegid()))
+
+# Parse arguments
+if len(sys.argv)>1 :
+ if sys.argv[1]=="config" :
+ hard_drive=get_hard_drive_name()
+ print_config(hard_drive,smart_values)
+ sys.exit(0)
+ elif sys.argv[1]=="autoconf" :
+ if os.path.exists(os.getenv('smartpath','/usr/sbin/smartctl')) :
+ print('yes')
+ sys.exit(0)
+ else :
+ print('no (smartmontools not found)')
+ sys.exit(1)
+ elif sys.argv[1]=="suggest" :
+ for drive in find_smart_drives() :
+ print(drive)
+ sys.exit(0)
+ elif sys.argv[1]!="" :
+ verboselog('unknown argument "'+sys.argv[1]+'"')
+ sys.exit(1)
+
+# No argument given, doing the real job:
+hard_drive=get_hard_drive_name()
+read_values(hard_drive[0])
+update_state_file()
+print_plugin_values()
+