summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbuild-nagios395
1 files changed, 250 insertions, 145 deletions
diff --git a/build-nagios b/build-nagios
index 2de7513..00691ad 100755
--- a/build-nagios
+++ b/build-nagios
@@ -9,8 +9,56 @@ SHORTORG="rela"
CONTACTGROUP="weaselgroup"
GENERATED_PREFIX="/etc/NOREPLY/generated/nagios/"
+OUT_NRPE_CONFFILE = GENERATED_PREFIX+"nrpe_#{ ORG }.cfg"
+OUT_AUTO_HOSTS = GENERATED_PREFIX+"auto-hosts.cfg"
+OUT_AUTO_HOSTGROUPS = GENERATED_PREFIX+"auto-hostgroups.cfg"
+OUT_AUTO_SERVICES = GENERATED_PREFIX+"auto-services.cfg"
+OUT_AUTO_DEPENDENCIES = GENERATED_PREFIX+"auto-dependencies.cfg"
+
MAX_CHECK_ATTEMPTS_DEFAULT=6
+NRPE_CHECKNAME="#{ ORG }_check_nrpe" # check that takes one argument: service name to be checked
+NRPE_CHECKNAME_HOST="#{ ORG }_check_nrpe_host" # check that takes two arguments: relay host on which to run check, service name to be checked
+
+HOST_TEMPLATE_NAME='generic-host' # host templates that all our host definitions use
+SERVICE_TEMPLATE_NAME='generic-service' # host templates that all our host definitions use
+HOST_ALIVE_CHECK='check-host-alive' # host alive check if server is pingable
+NRPE_PROCESS_SERVICE='process - nrpe' # nrpe checks will depend on this one
+
+
+def warn (msg)
+ STDERR.puts msg
+end
+def set_if_unset(hash, key, value)
+ hash[key] = value unless hash.has_key?(key)
+end
+def set_complain_if_set(hash, key, value, type, name)
+ throw "#{type} definition '#{name}' has '#{key}' already defined" if hash.has_key?(key)
+ hash[key] = value
+end
+
+# Make an array out of something. If there is nothing, create an empty array
+# if it is just a string, make a list with just that element, if it already is
+# an array keep it.
+def ensure_array(something)
+ if (something == nil)
+ result = []
+ elsif something.kind_of?(String)
+ result = [ something ]
+ elsif something.kind_of?(Array)
+ result = something
+ else
+ throw "Do now know how to make an array out of #{something}: " + something.to_yaml
+ end
+ return result
+end
+
+
+# This class keeps track of the checks done via NRPE and makes sure
+# each gets a unique name.
+#
+# Unforutunately NRPE limits check names to some 30 characters, so
+# we need to mangle service names near the end.
class Nrpe
def initialize
@checks = {}
@@ -49,27 +97,138 @@ class Nrpe
return @checks
end
end
-
$nrpe = Nrpe.new()
-def warn (msg)
- STDERR.puts msg
+# Prints the keys and values of hash to a file
+# This is the function that prints the bodies of most our
+# host/service/etc definitions
+#
+# It skips over such keys as are listed in exclude_keys
+# and also skips private keys (those starting with an underscre)
+def print_block(fd, kind, hash, exclude_keys)
+ fd.puts "define #{kind} {"
+ hash.each_pair{ |key, value|
+ next if key[0,1] == '_'
+ next if exclude_keys.include? key
+ fd.puts " #{key} #{value}"
+ }
+ fd.puts "}"
+ fd.puts
+end
+
+
+# Add the service definition service to hosts
+# f is the file for service definitions, deps the file for dependencies
+def addService(hosts, service, f, deps)
+
+ set_if_unset service, 'use' , SERVICE_TEMPLATE_NAME
+ set_complain_if_set service, 'host_name' , hosts.join(',') , 'Service', service['service_description']
+ set_if_unset service, 'max_check_attempts', MAX_CHECK_ATTEMPTS_DEFAULT
+
+ service['max_check_attempts'] = MAX_CHECK_ATTEMPTS_DEFAULT + service['max_check_attempts'] if service['max_check_attempts'] < 0
+
+ if service['nrpe']
+ throw "We already have a check_command (#{service['check_command']}) but we are in the NRPE block (nrpe: #{service['nrpe']})."+
+ " This should have been caught much earlier" if service.has_key?('check_command');
+
+ check = $nrpe.add(service['service_description'], service['nrpe'])
+ service['check_command'] = "#{ NRPE_CHECKNAME }!#{ check }"
+
+ service['depends'] = ensure_array( service['depends'] )
+ service['depends'] << NRPE_PROCESS_SERVICE unless service['service_description'] == NRPE_PROCESS_SERVICE # Depend on NRPE unless we are it
+ end
+
+ print_block f, 'service', service, %w(nrpe runfrom remotecheck
+ depends
+ hosts hostgroups excludehosts excludehostgroups)
+
+ if service['depends']
+ service['depends'].each{ |prerequisite|
+ hosts.each{ |host|
+ prerequisite_host = host
+ pre = prerequisite
+ # split off a hostname if there's one
+ bananasplit = prerequisite.split(':')
+ if bananasplit.size == 2
+ prerequisite_host = bananasplit[0]
+ pre = bananasplit[1]
+ elsif bananasplit.size > 2
+ throw "Cannot prase prerequisite #{prerequisite} for service #{service['service_description']} into host:service"
+ end
+ dependency = {
+ 'host_name' => prerequisite_host,
+ 'service_description' => pre,
+ 'dependent_host_name' => host,
+ 'dependent_service_description' => service['service_description'],
+ 'execution_failure_criteria' => 'n',
+ 'notification_failure_criteria' => 'w,u,c'
+ };
+ print_block deps, 'servicedependency', dependency, %w()
+ }
+ }
+ end
+end
+
+# hostlists in services can be given as both, single hosts and hostgroups
+# This functinn merges hostgroups and a simple list of hosts
+#
+# it also takes a prefix so that it can be used for excludelists as well
+def merge_hosts_and_hostgroups(service, servers, hostgroups, prefix)
+ hosts = []
+ hosts = service[prefix+'hosts'].delete(" \t").split(/,/) if service[prefix+'hosts']
+ hosts.each{ |host|
+ throw "host #{host} does not exist - used in service #{service['service_description']}" unless servers[host]
+ };
+ if service[prefix+'hostgroups']
+ service[prefix+'hostgroups'].delete(" \t").split(/,/).each{ |hg|
+ throw "hostgroup #{hg} does not exist - used in service #{service['service_description']}" unless hostgroups[hg]
+ hosts = hosts.concat hostgroups[hg]['_memberlist']
+ }
+ end
+
+ return hosts
end
+# Figure out the hosts a given service applies to
+#
+# For a given service find the list of hosts minus excluded hosts that this service runs on
+def find_hosts(service, servers, hostgroups)
+ hosts = merge_hosts_and_hostgroups service, servers, hostgroups, ''
+ excludehosts = merge_hosts_and_hostgroups service, servers, hostgroups, 'exclude'
+
+ excludehosts.each{ |host|
+ if hosts.delete(host) == nil
+ throw "Cannot remove host #{host} from service #{service['service_description']}: it's not included anyway or excluded twice."
+ end
+ }
+ return hosts
+end
+
+#############################################################################################
+#############################################################################################
+#############################################################################################
+
+# Load the config
config = YAML::load( File.open( 'nagios-master.cfg' ) )
-%w{auto-hosts.cfg auto-hostgroups.cfg auto-services.cfg auto-dependencies.cfg}.each{ |file|
- File.delete(GENERATED_PREFIX+"#{file}") if FileTest.file?(GENERATED_PREFIX+"#{file}")
+# Remove old created files
+[ OUT_AUTO_HOSTS, OUT_AUTO_HOSTGROUPS, OUT_AUTO_SERVICES, OUT_AUTO_DEPENDENCIES, OUT_NRPE_CONFFILE ].each{ |file|
+ File.delete(file) if File.exist?(file)
}
+f_hosts = File.new(OUT_AUTO_HOSTS, "w")
+f_hostgroups = File.new(OUT_AUTO_HOSTGROUPS, "w")
+f_services = File.new(OUT_AUTO_SERVICES, "w")
+f_dependencies = File.new(OUT_AUTO_DEPENDENCIES, "w")
+f_nrpe = File.new(OUT_NRPE_CONFFILE, "w")
+
#################################
# create a few hostgroups
#################################
-
config['hostgroups'].each_value{ |hg|
- hg['members'] = []
+ hg['_memberlist'] = []
}
config['servers'].each_pair{ |name, server|
@@ -77,183 +236,129 @@ config['servers'].each_pair{ |name, server|
server['hostgroups'].split(/,/).each{ |hg|
hg.strip!
throw "Hostgroup #{hg} is not defined" unless config['hostgroups'].has_key?(hg)
- config['hostgroups'][hg]['members'] << name
+ config['hostgroups'][hg]['_memberlist'] << name
};
}
-# create all
+# create the "all" and "pingable" hostgroups
config['hostgroups']['all'] = {}
config['hostgroups']['all']['alias'] = "all servers"
-config['hostgroups']['all']['members'] = []
+config['hostgroups']['all']['_memberlist'] = []
config['hostgroups']['pingable'] = {}
config['hostgroups']['pingable']['alias'] = "pingable servers"
-config['hostgroups']['pingable']['members'] = []
+config['hostgroups']['pingable']['_memberlist'] = []
config['servers'].each_key{ |name|
- config['hostgroups']['all']['members'] << name
- config['hostgroups']['pingable']['members'] << name unless (config['servers'][name]['pingable'] == false)
+ config['hostgroups']['all']['_memberlist'] << name
+ config['hostgroups']['pingable']['_memberlist'] << name unless (config['servers'][name]['pingable'] == false)
}
-
-#######
-# Hosts and Hostgroups
-f = File.new(GENERATED_PREFIX+"auto-hosts.cfg", "w")
+##############
+# HOSTS
+##############
config['servers'].each_pair{ |name, server|
+ # Formerly we used 'ip' instead of 'address' in our source file
+ # Handle this change but warn XXX
if server.has_key?('ip')
- STDERR.puts("Host definition for #{name} has an 'ip' field. Please use address instead");
+ STDERR.puts("Host definition for #{name} has an 'ip' field. Please use 'address' instead");
server['address'] = server['ip'];
server.delete('ip');
end
- server['host_name'] = name
- server['alias'] = name unless server.has_key?('alias')
- server['check_command'] = 'check-host-alive' unless (server['pingable'] == false || server.has_key?('check_command'))
- server['use'] = 'generic-host' unless server.has_key?('use');
-
- f.puts "define host{"
- server.each_pair{ |key, value|
- next if %w(hostgroups pingable).include? key
- f.puts " #{key} #{value}"
- }
- f.puts "}"
- f.puts
-}
-f = File.new(GENERATED_PREFIX+"auto-hostgroups.cfg", "w")
-config['hostgroups'].each_pair{ |name, hg|
- next if hg['private']
- hg['contact_groups'] = CONTACTGROUP unless hg.has_key?('contact_groups')
- f.puts "define hostgroup{"
- f.puts " hostgroup_name #{ name }"
- f.puts " members "+hg['members'].join(",")
- hg.each_pair{ |key, value|
- next if %w(members).include? key
- f.puts " #{key} #{value}"
- }
- f.puts "}"
+
+ set_complain_if_set server, 'host_name' , name, 'Host', name
+ set_if_unset server, 'alias' , name
+ set_if_unset server, 'use' , HOST_TEMPLATE_NAME
+ set_if_unset server, 'check_command', HOST_ALIVE_CHECK unless server['pingable'] == false
+
+ print_block f_hosts, 'host', server, %w(hostgroups pingable)
}
-#######
-# Services and Dependencies
-def addService(hosts, service, f, deps)
- hosts_comma = hosts.join(',')
-
- f.puts "define service{"
- f.puts " use generic-service"
- f.puts " host_name #{ hosts_comma }"
- f.puts " service_description #{ service['name'] }"
- f.puts " check_command #{ service['check'] }" if service['check']
- service['max_check_attempts']=MAX_CHECK_ATTEMPTS_DEFAULT unless service['max_check_attempts']
- service['max_check_attempts']=MAX_CHECK_ATTEMPTS_DEFAULT+service['max_check_attempts'] if service['max_check_attempts'] < 0
- if service['nrpe']
- check = $nrpe.add(service['name'], service['nrpe'])
- f.puts " check_command #{ ORG }_check_nrpe!#{ check }"
- service['depends'] = 'process - nrpe' unless service['depends'] or service['name'] == 'process - nrpe'
- end
- # put additional keys into services
- service.each_pair{ |key, value|
- # known keys:
- next if %w(name check hosts nrpe remotecheck hostgroups excludehosts excludehostgroups depends runfrom).include? key
- f.puts " #{key} #{value}"
- }
- f.puts "}"
+##############
+# HOSTGROUPS
+##############
+config['hostgroups'].each_pair{ |name, hg|
+ next if hg['private']
- if service['depends']
- service['depends'].each{ |prerequisite|
- hosts.each{ |host|
- prerequisite_host = host
- pre = prerequisite
- # split off a hostname if there's one
- bananasplit = prerequisite.split(':')
- if bananasplit.size == 2
- prerequisite_host = bananasplit[0]
- pre = bananasplit[1]
- elsif bananasplit.size > 2
- throw "Cannot prase prerequisite #{prerequisite} for service #{service['name']} into host:service"
- end
- deps.puts "define servicedependency{"
- deps.puts " host_name #{ prerequisite_host }"
- deps.puts " service_description #{ pre }"
- deps.puts " dependent_host_name #{ host }"
- deps.puts " dependent_service_description #{ service['name'] }"
- deps.puts " execution_failure_criteria n"
- deps.puts " notification_failure_criteria w,u,c"
- deps.puts "}"
- }
- }
- end
-end
+ set_complain_if_set hg, 'hostgroup_name', name , 'Hostgroup', name
+ set_complain_if_set hg, 'members' , hg['_memberlist'].join(","), 'Hostgroup', name
+ set_if_unset hg, 'contact_groups', CONTACTGROUP
+
+ print_block f_hostgroups, 'hostgroup', hg, %w()
+}
-f = File.new(GENERATED_PREFIX+"auto-services.cfg", "w")
-deps = File.new(GENERATED_PREFIX+"auto-dependencies.cfg", "w")
+##############
+# SERVICES and DEPENDENCIES
+##############
config['services'].each{ |service|
throw "Empty service or service not a hash" unless service.kind_of?(Hash)
- throw "nrpe, check, and remotecheck are mutually exclusive in service #{service['name']}" if
- (service['nrpe'] ? 1 : 0) +
- (service['check'] ? 1 : 0) +
- (service['remotecheck'] ? 1 : 0) >= 2
- # Figure out the hosts this service applies to
- service['hosts'] = "" unless service['hosts']
- service['hosts'].delete!(" \t")
- hosts = service['hosts'].split(/,/)
- if service['hostgroups'] then
- service['hostgroups'].delete(" \t").split(/,/).each{ |hg|
- unless config['hostgroups'][hg]
- throw "hostgroup #{hg} does not exist- used in service #{service['name']}"
- end
- hosts = hosts.concat config['hostgroups'][hg]['members']
- }
+ # Both 'name' and 'service_description' are valid for a service's name
+ # Internally we only use service_description as that's nagios' official term
+ if service.has_key?('name')
+ throw "Service definition has both a name (#{service['name']})" +
+ "and a service_description (#{service['service_description']})" if service.has_key?('service_description')
+ #STDERR.puts("Service definition #{service['name']} has a 'name' field. Please use 'service_description' instead");
+ service['service_description'] = service['name'];
+ service.delete('name');
end
- excludehosts = []
- if service['excludehosts'] then
- excludehosts = service['excludehosts'].delete(" \t").split(/,/)
+ # Both 'check' and 'check_command' are valid for a service's check command
+ # Internally we only use check_command as that's nagios' official term
+ if service.has_key?('check')
+ throw "Service definition has both a check (#{service['check']})" +
+ "and a check_command (#{service['check_command']})" if service.has_key?('check_command')
+ #STDERR.puts("Service definition #{service['service_description']} has a 'check' field. Please use 'check_command' instead");
+ service['check_command'] = service['check'];
+ service.delete('check');
end
- if service['excludehostgroups'] then
- service['excludehostgroups'].delete(" \t").split(/,/).each{ |hg|
- unless config['hostgroups'][hg]
- throw "hostgroup #{hg} does not exist- used in service #{service['name']}"
- end
- excludehosts = excludehosts.concat config['hostgroups'][hg]['members']
- }
- end
- excludehosts.each{ |host|
- if hosts.delete(host) == nil
- throw "Cannot remove host #{host} from service #{service['name']}: it's not included anyway"
- end
- }
- throw "no hosts for service #{service['name']}" if hosts.empty?
- if service['runfrom']
- throw "need remotecheck with runfrom" unless service['remotecheck']
+ hosts = find_hosts service, config['servers'], config['hostgroups']
+ throw "no hosts for service #{service['service_description']}" if hosts.empty?
+
+ throw "nrpe, check, and remotecheck are mutually exclusive in service #{service['service_description']}" if
+ (service['nrpe'] ? 1 : 0) +
+ (service['check_command'] ? 1 : 0) +
+ (service['remotecheck'] ? 1 : 0) >= 2
+
+ if service['runfrom'] && service['remotecheck']
+ # If the service check is to be run from a remote monitor server ("relay")
+ # add that as an NRPE check to be run on the relay and make this
+ # service also depend on NRPE on the relay
relay = service['runfrom']
hosts.each{ |host|
- host_ip = (config['servers'][host] ? config['servers'][host]['ip'] : gateway_name_to_ip[host] )
- check = $nrpe.add("#{host}_#{service['name']}", service['remotecheck'].gsub(/\$HOSTADDRESS\$/, host_ip))
- service['check'] = "#{ ORG }_check_nrpe_host!#{ config['servers'][ relay ]['ip'] }!#{ check }"
-
- if (service['depends'] == nil)
- service['depends'] = []
- elsif service['depends'].kind_of?(String)
- d = [ service['depends'] ];
- service['depends'] = d
- end
- service['depends'] << "#{ relay }:process - nrpe";
-
- addService( [ host ], service, f, deps)
+ host_ip = config['servers'][host]['address']
+ throw "For some reason I do not have an address for #{host}. This shouldn't be." unless host_ip
+
+ check = $nrpe.add("#{host}_#{service['service_description']}", service['remotecheck'].gsub(/\$HOSTADDRESS\$/, host_ip))
+ service['check_command'] = "#{NRPE_CHECKNAME_HOST}!#{ config['servers'][ relay ]['address'] }!#{ check }"
+
+ # Make sure dependencies are an array. If there are none, create an empty array
+ # if depends is just a string, make a list with just that element
+ service['depends'] = ensure_array( service['depends'] )
+
+ # And append this new dependency
+ service['depends'] << "#{ relay }:#{ NRPE_PROCESS_SERVICE }";
+
+ addService( [ host ], service, f_services, f_dependencies)
}
- else
+ elsif service['runfrom'] || service['remotecheck']
+ throw "runfrom and remotecheck must either appear both or not at all in service #{service['service_description']}"
throw "must not remotecheck without runfrom" if service['remotecheck']
- addService(hosts, service, f, deps)
+ else
+ addService(hosts, service, f_services, f_dependencies)
end
}
-f = File.new(GENERATED_PREFIX+"nrpe_#{ ORG }.cfg", "w")
+
+##############
+# NRPE config file
+##############
$nrpe.checks.each_pair{ |name, check|
- f.puts "command[#{ name }]=#{ check }"
+ f_nrpe.puts "command[#{ name }]=#{ check }"
}