diff options
-rwxr-xr-x | build-nagios | 395 |
1 files changed, 250 insertions, 145 deletions
diff --git a/build-nagios b/build-nagios index 2de7513..00691ad 100755 --- a/build-nagios +++ b/build-nagios @@ -9,8 +9,56 @@ SHORTORG="rela" CONTACTGROUP="weaselgroup" GENERATED_PREFIX="/etc/NOREPLY/generated/nagios/" +OUT_NRPE_CONFFILE = GENERATED_PREFIX+"nrpe_#{ ORG }.cfg" +OUT_AUTO_HOSTS = GENERATED_PREFIX+"auto-hosts.cfg" +OUT_AUTO_HOSTGROUPS = GENERATED_PREFIX+"auto-hostgroups.cfg" +OUT_AUTO_SERVICES = GENERATED_PREFIX+"auto-services.cfg" +OUT_AUTO_DEPENDENCIES = GENERATED_PREFIX+"auto-dependencies.cfg" + MAX_CHECK_ATTEMPTS_DEFAULT=6 +NRPE_CHECKNAME="#{ ORG }_check_nrpe" # check that takes one argument: service name to be checked +NRPE_CHECKNAME_HOST="#{ ORG }_check_nrpe_host" # check that takes two arguments: relay host on which to run check, service name to be checked + +HOST_TEMPLATE_NAME='generic-host' # host templates that all our host definitions use +SERVICE_TEMPLATE_NAME='generic-service' # host templates that all our host definitions use +HOST_ALIVE_CHECK='check-host-alive' # host alive check if server is pingable +NRPE_PROCESS_SERVICE='process - nrpe' # nrpe checks will depend on this one + + +def warn (msg) + STDERR.puts msg +end +def set_if_unset(hash, key, value) + hash[key] = value unless hash.has_key?(key) +end +def set_complain_if_set(hash, key, value, type, name) + throw "#{type} definition '#{name}' has '#{key}' already defined" if hash.has_key?(key) + hash[key] = value +end + +# Make an array out of something. If there is nothing, create an empty array +# if it is just a string, make a list with just that element, if it already is +# an array keep it. +def ensure_array(something) + if (something == nil) + result = [] + elsif something.kind_of?(String) + result = [ something ] + elsif something.kind_of?(Array) + result = something + else + throw "Do now know how to make an array out of #{something}: " + something.to_yaml + end + return result +end + + +# This class keeps track of the checks done via NRPE and makes sure +# each gets a unique name. +# +# Unforutunately NRPE limits check names to some 30 characters, so +# we need to mangle service names near the end. class Nrpe def initialize @checks = {} @@ -49,27 +97,138 @@ class Nrpe return @checks end end - $nrpe = Nrpe.new() -def warn (msg) - STDERR.puts msg +# Prints the keys and values of hash to a file +# This is the function that prints the bodies of most our +# host/service/etc definitions +# +# It skips over such keys as are listed in exclude_keys +# and also skips private keys (those starting with an underscre) +def print_block(fd, kind, hash, exclude_keys) + fd.puts "define #{kind} {" + hash.each_pair{ |key, value| + next if key[0,1] == '_' + next if exclude_keys.include? key + fd.puts " #{key} #{value}" + } + fd.puts "}" + fd.puts +end + + +# Add the service definition service to hosts +# f is the file for service definitions, deps the file for dependencies +def addService(hosts, service, f, deps) + + set_if_unset service, 'use' , SERVICE_TEMPLATE_NAME + set_complain_if_set service, 'host_name' , hosts.join(',') , 'Service', service['service_description'] + set_if_unset service, 'max_check_attempts', MAX_CHECK_ATTEMPTS_DEFAULT + + service['max_check_attempts'] = MAX_CHECK_ATTEMPTS_DEFAULT + service['max_check_attempts'] if service['max_check_attempts'] < 0 + + if service['nrpe'] + throw "We already have a check_command (#{service['check_command']}) but we are in the NRPE block (nrpe: #{service['nrpe']})."+ + " This should have been caught much earlier" if service.has_key?('check_command'); + + check = $nrpe.add(service['service_description'], service['nrpe']) + service['check_command'] = "#{ NRPE_CHECKNAME }!#{ check }" + + service['depends'] = ensure_array( service['depends'] ) + service['depends'] << NRPE_PROCESS_SERVICE unless service['service_description'] == NRPE_PROCESS_SERVICE # Depend on NRPE unless we are it + end + + print_block f, 'service', service, %w(nrpe runfrom remotecheck + depends + hosts hostgroups excludehosts excludehostgroups) + + if service['depends'] + service['depends'].each{ |prerequisite| + hosts.each{ |host| + prerequisite_host = host + pre = prerequisite + # split off a hostname if there's one + bananasplit = prerequisite.split(':') + if bananasplit.size == 2 + prerequisite_host = bananasplit[0] + pre = bananasplit[1] + elsif bananasplit.size > 2 + throw "Cannot prase prerequisite #{prerequisite} for service #{service['service_description']} into host:service" + end + dependency = { + 'host_name' => prerequisite_host, + 'service_description' => pre, + 'dependent_host_name' => host, + 'dependent_service_description' => service['service_description'], + 'execution_failure_criteria' => 'n', + 'notification_failure_criteria' => 'w,u,c' + }; + print_block deps, 'servicedependency', dependency, %w() + } + } + end +end + +# hostlists in services can be given as both, single hosts and hostgroups +# This functinn merges hostgroups and a simple list of hosts +# +# it also takes a prefix so that it can be used for excludelists as well +def merge_hosts_and_hostgroups(service, servers, hostgroups, prefix) + hosts = [] + hosts = service[prefix+'hosts'].delete(" \t").split(/,/) if service[prefix+'hosts'] + hosts.each{ |host| + throw "host #{host} does not exist - used in service #{service['service_description']}" unless servers[host] + }; + if service[prefix+'hostgroups'] + service[prefix+'hostgroups'].delete(" \t").split(/,/).each{ |hg| + throw "hostgroup #{hg} does not exist - used in service #{service['service_description']}" unless hostgroups[hg] + hosts = hosts.concat hostgroups[hg]['_memberlist'] + } + end + + return hosts end +# Figure out the hosts a given service applies to +# +# For a given service find the list of hosts minus excluded hosts that this service runs on +def find_hosts(service, servers, hostgroups) + hosts = merge_hosts_and_hostgroups service, servers, hostgroups, '' + excludehosts = merge_hosts_and_hostgroups service, servers, hostgroups, 'exclude' + + excludehosts.each{ |host| + if hosts.delete(host) == nil + throw "Cannot remove host #{host} from service #{service['service_description']}: it's not included anyway or excluded twice." + end + } + return hosts +end + +############################################################################################# +############################################################################################# +############################################################################################# + +# Load the config config = YAML::load( File.open( 'nagios-master.cfg' ) ) -%w{auto-hosts.cfg auto-hostgroups.cfg auto-services.cfg auto-dependencies.cfg}.each{ |file| - File.delete(GENERATED_PREFIX+"#{file}") if FileTest.file?(GENERATED_PREFIX+"#{file}") +# Remove old created files +[ OUT_AUTO_HOSTS, OUT_AUTO_HOSTGROUPS, OUT_AUTO_SERVICES, OUT_AUTO_DEPENDENCIES, OUT_NRPE_CONFFILE ].each{ |file| + File.delete(file) if File.exist?(file) } +f_hosts = File.new(OUT_AUTO_HOSTS, "w") +f_hostgroups = File.new(OUT_AUTO_HOSTGROUPS, "w") +f_services = File.new(OUT_AUTO_SERVICES, "w") +f_dependencies = File.new(OUT_AUTO_DEPENDENCIES, "w") +f_nrpe = File.new(OUT_NRPE_CONFFILE, "w") + ################################# # create a few hostgroups ################################# - config['hostgroups'].each_value{ |hg| - hg['members'] = [] + hg['_memberlist'] = [] } config['servers'].each_pair{ |name, server| @@ -77,183 +236,129 @@ config['servers'].each_pair{ |name, server| server['hostgroups'].split(/,/).each{ |hg| hg.strip! throw "Hostgroup #{hg} is not defined" unless config['hostgroups'].has_key?(hg) - config['hostgroups'][hg]['members'] << name + config['hostgroups'][hg]['_memberlist'] << name }; } -# create all +# create the "all" and "pingable" hostgroups config['hostgroups']['all'] = {} config['hostgroups']['all']['alias'] = "all servers" -config['hostgroups']['all']['members'] = [] +config['hostgroups']['all']['_memberlist'] = [] config['hostgroups']['pingable'] = {} config['hostgroups']['pingable']['alias'] = "pingable servers" -config['hostgroups']['pingable']['members'] = [] +config['hostgroups']['pingable']['_memberlist'] = [] config['servers'].each_key{ |name| - config['hostgroups']['all']['members'] << name - config['hostgroups']['pingable']['members'] << name unless (config['servers'][name]['pingable'] == false) + config['hostgroups']['all']['_memberlist'] << name + config['hostgroups']['pingable']['_memberlist'] << name unless (config['servers'][name]['pingable'] == false) } - -####### -# Hosts and Hostgroups -f = File.new(GENERATED_PREFIX+"auto-hosts.cfg", "w") +############## +# HOSTS +############## config['servers'].each_pair{ |name, server| + # Formerly we used 'ip' instead of 'address' in our source file + # Handle this change but warn XXX if server.has_key?('ip') - STDERR.puts("Host definition for #{name} has an 'ip' field. Please use address instead"); + STDERR.puts("Host definition for #{name} has an 'ip' field. Please use 'address' instead"); server['address'] = server['ip']; server.delete('ip'); end - server['host_name'] = name - server['alias'] = name unless server.has_key?('alias') - server['check_command'] = 'check-host-alive' unless (server['pingable'] == false || server.has_key?('check_command')) - server['use'] = 'generic-host' unless server.has_key?('use'); - - f.puts "define host{" - server.each_pair{ |key, value| - next if %w(hostgroups pingable).include? key - f.puts " #{key} #{value}" - } - f.puts "}" - f.puts -} -f = File.new(GENERATED_PREFIX+"auto-hostgroups.cfg", "w") -config['hostgroups'].each_pair{ |name, hg| - next if hg['private'] - hg['contact_groups'] = CONTACTGROUP unless hg.has_key?('contact_groups') - f.puts "define hostgroup{" - f.puts " hostgroup_name #{ name }" - f.puts " members "+hg['members'].join(",") - hg.each_pair{ |key, value| - next if %w(members).include? key - f.puts " #{key} #{value}" - } - f.puts "}" + + set_complain_if_set server, 'host_name' , name, 'Host', name + set_if_unset server, 'alias' , name + set_if_unset server, 'use' , HOST_TEMPLATE_NAME + set_if_unset server, 'check_command', HOST_ALIVE_CHECK unless server['pingable'] == false + + print_block f_hosts, 'host', server, %w(hostgroups pingable) } -####### -# Services and Dependencies -def addService(hosts, service, f, deps) - hosts_comma = hosts.join(',') - - f.puts "define service{" - f.puts " use generic-service" - f.puts " host_name #{ hosts_comma }" - f.puts " service_description #{ service['name'] }" - f.puts " check_command #{ service['check'] }" if service['check'] - service['max_check_attempts']=MAX_CHECK_ATTEMPTS_DEFAULT unless service['max_check_attempts'] - service['max_check_attempts']=MAX_CHECK_ATTEMPTS_DEFAULT+service['max_check_attempts'] if service['max_check_attempts'] < 0 - if service['nrpe'] - check = $nrpe.add(service['name'], service['nrpe']) - f.puts " check_command #{ ORG }_check_nrpe!#{ check }" - service['depends'] = 'process - nrpe' unless service['depends'] or service['name'] == 'process - nrpe' - end - # put additional keys into services - service.each_pair{ |key, value| - # known keys: - next if %w(name check hosts nrpe remotecheck hostgroups excludehosts excludehostgroups depends runfrom).include? key - f.puts " #{key} #{value}" - } - f.puts "}" +############## +# HOSTGROUPS +############## +config['hostgroups'].each_pair{ |name, hg| + next if hg['private'] - if service['depends'] - service['depends'].each{ |prerequisite| - hosts.each{ |host| - prerequisite_host = host - pre = prerequisite - # split off a hostname if there's one - bananasplit = prerequisite.split(':') - if bananasplit.size == 2 - prerequisite_host = bananasplit[0] - pre = bananasplit[1] - elsif bananasplit.size > 2 - throw "Cannot prase prerequisite #{prerequisite} for service #{service['name']} into host:service" - end - deps.puts "define servicedependency{" - deps.puts " host_name #{ prerequisite_host }" - deps.puts " service_description #{ pre }" - deps.puts " dependent_host_name #{ host }" - deps.puts " dependent_service_description #{ service['name'] }" - deps.puts " execution_failure_criteria n" - deps.puts " notification_failure_criteria w,u,c" - deps.puts "}" - } - } - end -end + set_complain_if_set hg, 'hostgroup_name', name , 'Hostgroup', name + set_complain_if_set hg, 'members' , hg['_memberlist'].join(","), 'Hostgroup', name + set_if_unset hg, 'contact_groups', CONTACTGROUP + + print_block f_hostgroups, 'hostgroup', hg, %w() +} -f = File.new(GENERATED_PREFIX+"auto-services.cfg", "w") -deps = File.new(GENERATED_PREFIX+"auto-dependencies.cfg", "w") +############## +# SERVICES and DEPENDENCIES +############## config['services'].each{ |service| throw "Empty service or service not a hash" unless service.kind_of?(Hash) - throw "nrpe, check, and remotecheck are mutually exclusive in service #{service['name']}" if - (service['nrpe'] ? 1 : 0) + - (service['check'] ? 1 : 0) + - (service['remotecheck'] ? 1 : 0) >= 2 - # Figure out the hosts this service applies to - service['hosts'] = "" unless service['hosts'] - service['hosts'].delete!(" \t") - hosts = service['hosts'].split(/,/) - if service['hostgroups'] then - service['hostgroups'].delete(" \t").split(/,/).each{ |hg| - unless config['hostgroups'][hg] - throw "hostgroup #{hg} does not exist- used in service #{service['name']}" - end - hosts = hosts.concat config['hostgroups'][hg]['members'] - } + # Both 'name' and 'service_description' are valid for a service's name + # Internally we only use service_description as that's nagios' official term + if service.has_key?('name') + throw "Service definition has both a name (#{service['name']})" + + "and a service_description (#{service['service_description']})" if service.has_key?('service_description') + #STDERR.puts("Service definition #{service['name']} has a 'name' field. Please use 'service_description' instead"); + service['service_description'] = service['name']; + service.delete('name'); end - excludehosts = [] - if service['excludehosts'] then - excludehosts = service['excludehosts'].delete(" \t").split(/,/) + # Both 'check' and 'check_command' are valid for a service's check command + # Internally we only use check_command as that's nagios' official term + if service.has_key?('check') + throw "Service definition has both a check (#{service['check']})" + + "and a check_command (#{service['check_command']})" if service.has_key?('check_command') + #STDERR.puts("Service definition #{service['service_description']} has a 'check' field. Please use 'check_command' instead"); + service['check_command'] = service['check']; + service.delete('check'); end - if service['excludehostgroups'] then - service['excludehostgroups'].delete(" \t").split(/,/).each{ |hg| - unless config['hostgroups'][hg] - throw "hostgroup #{hg} does not exist- used in service #{service['name']}" - end - excludehosts = excludehosts.concat config['hostgroups'][hg]['members'] - } - end - excludehosts.each{ |host| - if hosts.delete(host) == nil - throw "Cannot remove host #{host} from service #{service['name']}: it's not included anyway" - end - } - throw "no hosts for service #{service['name']}" if hosts.empty? - if service['runfrom'] - throw "need remotecheck with runfrom" unless service['remotecheck'] + hosts = find_hosts service, config['servers'], config['hostgroups'] + throw "no hosts for service #{service['service_description']}" if hosts.empty? + + throw "nrpe, check, and remotecheck are mutually exclusive in service #{service['service_description']}" if + (service['nrpe'] ? 1 : 0) + + (service['check_command'] ? 1 : 0) + + (service['remotecheck'] ? 1 : 0) >= 2 + + if service['runfrom'] && service['remotecheck'] + # If the service check is to be run from a remote monitor server ("relay") + # add that as an NRPE check to be run on the relay and make this + # service also depend on NRPE on the relay relay = service['runfrom'] hosts.each{ |host| - host_ip = (config['servers'][host] ? config['servers'][host]['ip'] : gateway_name_to_ip[host] ) - check = $nrpe.add("#{host}_#{service['name']}", service['remotecheck'].gsub(/\$HOSTADDRESS\$/, host_ip)) - service['check'] = "#{ ORG }_check_nrpe_host!#{ config['servers'][ relay ]['ip'] }!#{ check }" - - if (service['depends'] == nil) - service['depends'] = [] - elsif service['depends'].kind_of?(String) - d = [ service['depends'] ]; - service['depends'] = d - end - service['depends'] << "#{ relay }:process - nrpe"; - - addService( [ host ], service, f, deps) + host_ip = config['servers'][host]['address'] + throw "For some reason I do not have an address for #{host}. This shouldn't be." unless host_ip + + check = $nrpe.add("#{host}_#{service['service_description']}", service['remotecheck'].gsub(/\$HOSTADDRESS\$/, host_ip)) + service['check_command'] = "#{NRPE_CHECKNAME_HOST}!#{ config['servers'][ relay ]['address'] }!#{ check }" + + # Make sure dependencies are an array. If there are none, create an empty array + # if depends is just a string, make a list with just that element + service['depends'] = ensure_array( service['depends'] ) + + # And append this new dependency + service['depends'] << "#{ relay }:#{ NRPE_PROCESS_SERVICE }"; + + addService( [ host ], service, f_services, f_dependencies) } - else + elsif service['runfrom'] || service['remotecheck'] + throw "runfrom and remotecheck must either appear both or not at all in service #{service['service_description']}" throw "must not remotecheck without runfrom" if service['remotecheck'] - addService(hosts, service, f, deps) + else + addService(hosts, service, f_services, f_dependencies) end } -f = File.new(GENERATED_PREFIX+"nrpe_#{ ORG }.cfg", "w") + +############## +# NRPE config file +############## $nrpe.checks.each_pair{ |name, check| - f.puts "command[#{ name }]=#{ check }" + f_nrpe.puts "command[#{ name }]=#{ check }" } |