#!/usr/bin/env python # taken from the debian munin-node package, version 1.2.4-1 ########################################################### # -*- encoding: iso-8859-1 -*- # # Wildcard-plugin to monitor S.M.A.R.T attribute values through smartctl, # which is part of smartmontools package: # http://smartmontools.sourceforge.net/ # # To monitor a S.M.A.R.T device, link smart_<device> to this file. # E.g. # ln -s /usr/share/munin/plugins/smart_ /etc/munin/plugins/smart_hda # ...will monitor /dev/hda. # # Needs following minimal configuration in plugin-conf.d/munin-node: # [smart_*] # user root # group disk # # Parameters # smartpath - Specify path to smartctl program (Default: /usr/sbin/smartctl) # smartargs - Override '-a' argument passed to smartctl with '-A -i'+smartargs # # Parameters can be specified on a per-drive basis, eg: # [smart_hda] # user root # group disk # env.smartargs -H -c -l error -l selftest -l selective -d ata # env.smartpath /usr/local/sbin/smartctl # # [smart_twa0-1] # user root # group disk # env.smartargs -H -l error -d 3ware,1 # # [smart_twa0-2] # user root # group disk # env.smartargs -H -l error -d 3ware,2 # # Author: Nicolas Stransky <Nico@neo-lan.net> # # v1.0 22/08/2004 - First draft # v1.2 28/08/2004 - Clean up the code, add a verbose option # v1.3 14/11/2004 - Compatibility with python<2.2. See comments in the code # v1.4 17/11/2004 - Deal with non zero exit codes of smartctl # - config now prints the critical thresholds, as reported by smartctl # v1.5 18/11/2004 - Plot smartctl_exit_code bitmask # v1.6 21/11/2004 - Add autoconf and suggest capabilities # - smartctl path can be passed through "smartpath" environment variable # - Additional smartctl args can be passed through "smartargs" environment variable # v1.7 29/11/2004 - Add suggest capabilities for NetBSD, OpenBSD, FreeBSD and SunOS. # - Allow to override completely the smartctl arguments with "smartargs" # v1.8 16/02/2005 - Exit status field now only triggers warnings, not criticals. # v1.9 07/07/2005 - Allow to query several drives on the same 3ware card. # - Correct a bug when '-i' was not listed in smartargs # - Don't fail if no value was obtained for hard drive model # v1.10 19/08/2005 - smartctl_exit_code is now a numerical value # # Copyright (c) 2004,2005 Nicolas Stransky. # # Permission to use, copy, and modify this software with or without fee # is hereby granted, provided that this entire notice is included in # all source code copies of any software which is or includes a copy or # modification of this software. # # THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR # IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY # REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE # MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR # PURPOSE. # # # Magic markers #%# capabilities=autoconf suggest #%# family=auto import os, sys, string, pickle from math import log # Increase verbosity of the plugin by putting verbose=True (for # debugging purposes) # For python < 2.2 compliance, put verbose=0 instead of "False" verbose=False # Modify to your needs: statefiledir='/var/lib/munin/plugin-state/' def verboselog(s): sys.stderr.write(plugin_name+': '+s+'\n') if not verbose : verboselog = lambda s: None def read_values(hard_drive) : num_exit_status=0 try : verboselog('Reading S.M.A.R.T values') os.putenv('LC_ALL','C') smart_output=os.popen(os.getenv('smartpath','/usr/sbin/smartctl')+' '+os.getenv('smartargs','-a')+' -A -i /dev/'+hard_drive) read_values=0 # For python < 2.2 compliance, switch comments on next 2 lines. for l in smart_output : #for l in smart_output.readlines() : if l[:-1]=='' : read_values=0 elif l[:13]=='Device Model:' or l[:7]=='Device:' : model_list=string.split(string.split(l,':')[1]) try: model_list.remove('Version') except : None model=string.join(model_list) if read_values==1 : smart_attribute=string.split(l) smart_values[string.replace(smart_attribute[1],'-','_')]={"value":smart_attribute[3],"threshold":smart_attribute[5]} elif l[:18]=="ID# ATTRIBUTE_NAME" : # Start reading the Attributes block read_values=1 exit_status=smart_output.close() if exit_status!=None : # smartctl exit code is a bitmask, check man page. num_exit_status=int(exit_status/256) if num_exit_status<=2 : verboselog('smartctl cannot access S.M.A.R.T values on drive '+hard_drive+'. Command exited with code '+str(num_exit_status)+' ('+str(exit_status/256)+')') sys.exit(1) else : verboselog('smartctl exited with code '+str(num_exit_status)+'. '+hard_drive+' may be FAILING RIGHT NOW !') except : verboselog('Cannot access S.M.A.R.T values ! Check user rights or propper smartmontools installation. Quitting...') sys.exit(1) if smart_values=={} : verboselog('Can\'t find any S.M.A.R.T value to plot ! Quitting...') sys.exit(1) smart_values["smartctl_exit_status"]={"value":str(num_exit_status),"threshold":"1"} try: smart_values["model"]=model # For some reason we may have no value for "model" except : smart_values["model"]="unknown" def open_state_file(mode) : return open(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state',mode) def update_state_file() : try: verboselog('Saving statefile') pickle.dump(smart_values,open_state_file("w")) except : verboselog('Error trying to save state file ! Check access rights') def print_plugin_values() : verboselog('Printing S.M.A.R.T values') for key in smart_values.keys() : if key=="model" : continue print(key+".value "+smart_values[key]["value"]) def print_config(hard_drive,smart_values) : if os.path.exists(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state'): try : verboselog('Try to recall previous S.M.A.R.T attributes for '+string.join(hard_drive,",")) smart_values_state=pickle.load(open_state_file("r")) except : verboselog('Error opening existing state file !') sys.exit(1) else : verboselog('No state file, reading S.M.A.R.T values for the first time') read_values(hard_drive[0]) pickle.dump(smart_values,open_state_file("w")) smart_values_state=smart_values verboselog('Printing configuration') print('graph_title S.M.A.R.T values for drive '+string.join(hard_drive,",")) print('graph_vlabel Attribute S.M.A.R.T value') print('graph_args --base 1000 --lower-limit 0') print('graph_category disk') print('graph_info This graph shows the value of all S.M.A.R.T attributes of drive '+string.join(hard_drive,",")+' ('+smart_values_state['model']+'). smartctl_exit_status is the return value of smartctl. A non-zero return value indicates an error, a potential error, or a fault on the drive.') attributes=smart_values_state.keys() attributes.sort() for key in attributes : if key in ['smartctl_exit_status','model'] : continue print(key+'.label '+key) print(key+'.draw LINE2') # Following line can be commented out if necessary. Only useful for warning reports about S.M.A.R.T values print(key+'.critical '+smart_values_state[key]["threshold"]+':') # Let's place smartctl_exit_status at the end print('smartctl_exit_status.label smartctl_exit_status') print('smartctl_exit_status.draw LINE2') # Following line can be commented out if necessary. Only useful for warning reports about S.M.A.R.T values print('smartctl_exit_status.warning '+smart_values_state['smartctl_exit_status']["threshold"]) def get_hard_drive_name() : try : hard_drive=[plugin_name[string.rindex(plugin_name,'_')+1:]] if os.uname()[0]=="SunOS" : try : # if hard_drive name starts with "rdsk" or "rmt", try to reconstruct the path if hard_drive[0][0:4]=="rdsk": hard_drive[0]=os.path.join("rdsk",hard_drive[0][4:]) elif hard_drive[0][0:3]=="rmt": hard_drive[0]=os.path.join("rmt",hard_drive[0][3:]) except : verboselog('Failed to find SunOS hard_drive') # For 3ware cards, we have to set multiple plugins for the same hard drive name. # Let's see if we find a '-' in the drive name. if hard_drive[0].find('-')!=-1: # Put the drive name and it's number in a list hard_drive=[hard_drive[0][:string.rindex(hard_drive[0],'-')],hard_drive[0][string.rindex(hard_drive[0],'-')+1:]] # Chech that the drive exists in /dev if not os.path.exists('/dev/'+hard_drive[0]): verboselog('/dev/'+hard_drive[0]+' not found !') sys.exit(1) return(hard_drive) except : verboselog('No S.M.A.R.T device name found in plugin\'s symlink !') sys.exit(1) def find_smart_drives() : # Try to autodetect Linux, *BSD, SunOS drives. Don't try to autodetect drives on a 3Ware card. drives=[] if os.uname()[0]=="Linux" : if os.path.exists('/sys/block/'): # Running 2.6, yeah try : for drive in os.listdir('/sys/block/') : if drive[:2] in ['md','fd','lo','ra','dm'] : continue # Ignore MD, Floppy, loop , RAM and LVM devices. try : read_values(drive) drives.append(drive) except : continue except : verboselog('Failed to list devices in /sys/block') else : verboselog('Not running linux2.6, failing back to /proc/partitions') try : partitions=open('/proc/partitions','r') L=partitions.readlines() for l in L : words=string.split(l) if len(words)==0 or words[0][0] not in string.digits : continue if words[0] in ['1','9','58','254'] : continue # Ignore RAM, md, LVM and LVM2 devices if words[-1][-1] not in string.digits : try : read_values(words[-1]) drives.append(words[-1]) except : continue verboselog('Found drives in /proc/partitions ! '+str(drives)) except : verboselog('Failed to list devices in /proc/partitions') elif os.uname()[0]=="OpenBSD" : try : sysctl_kerndisks=os.popen('sysctl hw.disknames') kerndisks=string.strip(sysctl_kerndisks.readline()) for drive in string.split(kerndisks[string.rindex(kerndisks,'=')+1:],',') : if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy try : read_values(drive+'c') drives.append(drive+'c') except : continue except : verboselog('Failed to list OpenBSD disks') elif os.uname()[0]=="FreeBSD" : try : sysctl_kerndisks=os.popen('sysctl kern.disks') kerndisks=string.strip(sysctl_kerndisks.readline()) for drive in string.split(kerndisks)[1:] : if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy try : read_values(drive) drives.append(drive) except : continue except : verboselog('Failed to list FreeBSD disks') elif os.uname()[0]=="NetBSD" : try : sysctl_kerndisks=os.popen('sysctl hw.disknames') kerndisks=string.strip(sysctl_kerndisks.readline()) for drive in string.split(kerndisks)[2:] : if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy try : read_values(drive+'c') drives.append(drive+'c') except : continue except : verboselog('Failed to list NetBSD disks') elif os.uname()[0]=="SunOS" : try : from glob import glob for drivepath in glob('/dev/rdsk/*s2') : try : drive=os.path.basename(drivepath) read_values('rdsk'+drive) drives.append('rdsk'+drive) except : continue for drivepath in glob('/dev/rmt/*') : try : drive=os.path.basename(drivepath) read_values('rmt'+drive) drives.append('rmt'+drive) except : continue except : verboselog('Failed to list SunOS disks') return(drives) ### Main part ### smart_values={} model='' plugin_name=list(os.path.split(sys.argv[0]))[1] verboselog('plugins\' UID: '+str(os.geteuid())+' / plugins\' GID: '+str(os.getegid())) # Parse arguments if len(sys.argv)>1 : if sys.argv[1]=="config" : hard_drive=get_hard_drive_name() print_config(hard_drive,smart_values) sys.exit(0) elif sys.argv[1]=="autoconf" : if os.path.exists(os.getenv('smartpath','/usr/sbin/smartctl')) : print('yes') sys.exit(0) else : print('no (smartmontools not found)') sys.exit(1) elif sys.argv[1]=="suggest" : for drive in find_smart_drives() : print(drive) sys.exit(0) elif sys.argv[1]!="" : verboselog('unknown argument "'+sys.argv[1]+'"') sys.exit(1) # No argument given, doing the real job: hard_drive=get_hard_drive_name() read_values(hard_drive[0]) update_state_file() print_plugin_values()