summaryrefslogtreecommitdiff
path: root/build-nagios
blob: 14737ec86e6bebb291f8d7804363d8237c0bc3db (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
#!/usr/bin/ruby

# Copyright (c) 2004, 2005 Peter Palfrader <peter@palfrader.org>

require "yaml"

ORG="relativity"
SHORTORG="rela"
CONTACTGROUP="weaselgroup"
GENERATED_PREFIX="/etc/NOREPLY/generated/nagios/"

nagios_filename = {};
%w(hosts hostgroups services dependencies hostextinfo serviceextinfo).each{
	|x| nagios_filename[x] = GENERATED_PREFIX+"auto-#{x}.cfg"
}
nagios_filename['nrpe'] = GENERATED_PREFIX+"nrpe_#{ ORG }.cfg"


MAX_CHECK_ATTEMPTS_DEFAULT=6

NRPE_CHECKNAME="#{ ORG }_check_nrpe"           # check that takes one argument:  service name to be checked
NRPE_CHECKNAME_HOST="#{ ORG }_check_nrpe_host" # check that takes two arguments: relay host on which to run check, service name to be checked

HOST_TEMPLATE_NAME='generic-host'          # host templates that all our host definitions use
SERVICE_TEMPLATE_NAME='generic-service'    # host templates that all our host definitions use
HOST_ALIVE_CHECK='check-host-alive'        # host alive check if server is pingable
NRPE_PROCESS_SERVICE='process - nrpe'      # nrpe checks will depend on this one


def warn (msg)
	STDERR.puts msg
end
def set_if_unset(hash, key, value)
	hash[key] = value unless hash.has_key?(key)
end
def set_complain_if_set(hash, key, value, type, name)
	throw "#{type} definition '#{name}' has '#{key}' already defined" if hash.has_key?(key)
	hash[key] = value
end

# Make an array out of something.  If there is nothing, create an empty array
# if it is just a string, make a list with just that element, if it already is
# an array keep it.
def ensure_array(something)
	if (something == nil)
		result = []
	elsif something.kind_of?(String)
		result = [ something ]
	elsif something.kind_of?(Array)
		result = something
	else
		throw "Do now know how to make an array out of #{something}: " + something.to_yaml
	end
	return result
end


# This class keeps track of the checks done via NRPE and makes sure
# each gets a unique name.
#
# Unforutunately NRPE limits check names to some 30 characters, so
# we need to mangle service names near the end.
class Nrpe
	def initialize
		@checks = {}
	end

	def make_name( name, check )
		name = name.tr_s("^a-zA-Z", "_").gsub("process", "ps")

		result = "#{ SHORTORG }_" + name[0,19]

		hash = ''
		skew = ''
		while (@checks.has_key?(result + hash))
			# hash it, so that we don't lose uniqeness by cutting it off
			hash = (check+skew).crypt("$1$")
			hash = hash[-5,5]  # 5 chars are enough
			hash.tr!("/", "_")
			skew += ' ' # change it a bit so the hash changes
		end
		result += hash
		return result      # max of 32 or so chars
	end

	def add( name, check )
		if @checks.has_value? check
			@checks.each_pair{ |key, value|
				return key if value == check
			}
		end
		key = make_name(name, check)
		@checks[ key ] = check
		return key
	end

	def checks
		return @checks
	end
end
$nrpe = Nrpe.new()


# Prints the keys and values of hash to a file
# This is the function that prints the bodies of most our
# host/service/etc definitions
#
# It skips over such keys as are listed in exclude_keys
# and also skips private keys (those starting with an underscre)
def print_block(fd, kind, hash, exclude_keys)
	fd.puts "define #{kind} {"
	hash.each_pair{ |key, value|
		next if key[0,1] == '_'
		next if exclude_keys.include? key
		fd.puts "	#{key}		#{value}"
	}
	fd.puts "}"
	fd.puts
end


# Add the service definition service to hosts
# f is the file for service definitions, deps the file for dependencies
def addService(hosts, service, files)

	set_if_unset        service, 'use'               , SERVICE_TEMPLATE_NAME
	set_complain_if_set service, 'host_name'         , hosts.join(',')      , 'Service', service['service_description']
	set_if_unset        service, 'max_check_attempts', MAX_CHECK_ATTEMPTS_DEFAULT

	service['max_check_attempts'] = MAX_CHECK_ATTEMPTS_DEFAULT + service['max_check_attempts'] if service['max_check_attempts'] < 0

	if service['nrpe']
		throw "We already have a check_command (#{service['check_command']}) but we are in the NRPE block (nrpe: #{service['nrpe']})."+
			"  This should have been caught much earlier" if service.has_key?('check_command');

		check = $nrpe.add(service['service_description'], service['nrpe'])
		service['check_command'] = "#{ NRPE_CHECKNAME }!#{ check }"

		service['depends'] = ensure_array( service['depends'] )
		service['depends'] << NRPE_PROCESS_SERVICE unless service['service_description'] == NRPE_PROCESS_SERVICE  # Depend on NRPE unless we are it
	end

	print_block files['services'], 'service', service, %w(nrpe runfrom remotecheck
	                                                      depends
	                                                      hosts hostgroups excludehosts excludehostgroups)

	if service['depends']
		service['depends'].each{ |prerequisite|
			hosts.each{ |host|
				prerequisite_host = host
				pre = prerequisite
				# split off a hostname if there's one
				bananasplit = prerequisite.split(':')
				if bananasplit.size == 2
					prerequisite_host = bananasplit[0]
					pre = bananasplit[1]
				elsif bananasplit.size > 2
					throw "Cannot prase prerequisite #{prerequisite} for service #{service['service_description']} into host:service"
				end
				dependency = {
					'host_name'                     => prerequisite_host,
					'service_description'           => pre,
					'dependent_host_name'           => host,
					'dependent_service_description' => service['service_description'],
					'execution_failure_criteria'    => 'n',
					'notification_failure_criteria' => 'w,u,c'
				};
				print_block files['dependencies'], 'servicedependency', dependency, %w()
			}
		}
	end


	set_complain_if_set service['_extinfo'], 'service_description' , service['service_description'], 'serviceextinfo', service['service_description']
	set_complain_if_set service['_extinfo'], 'host_name'           , hosts.join(',')               , 'serviceextinfo', service['service_description']

	print_block files['serviceextinfo'], 'serviceextinfo', service['_extinfo'], %w()
end

# hostlists in services can be given as both, single hosts and hostgroups
# This functinn merges hostgroups and a simple list of hosts
#
# it also takes a prefix so that it can be used for excludelists as well
def merge_hosts_and_hostgroups(service, servers, hostgroups, prefix)
	hosts = []
	hosts = service[prefix+'hosts'].split(/,/).map{ |x| x.strip } if service[prefix+'hosts']
	hosts.each{ |host|
		throw "host #{host} does not exist - used in service #{service['service_description']}" unless servers[host]
	};
	if service[prefix+'hostgroups']
		service[prefix+'hostgroups'].split(/,/).map{ |x| x.strip }.each{ |hg|
			throw "hostgroup #{hg} does not exist - used in service #{service['service_description']}" unless hostgroups[hg]
			hosts = hosts.concat hostgroups[hg]['_memberlist']
		}
	end

	return hosts
end

# Figure out the hosts a given service applies to
#
# For a given service find the list of hosts minus excluded hosts that this service runs on
def find_hosts(service, servers, hostgroups)
	hosts        = merge_hosts_and_hostgroups service, servers, hostgroups, ''
	excludehosts = merge_hosts_and_hostgroups service, servers, hostgroups, 'exclude'

	excludehosts.each{ |host|
		if hosts.delete(host) == nil
			throw "Cannot remove host #{host} from service #{service['service_description']}: it's not included anyway or excluded twice."
		end
	}

	return hosts
end

# Move all elements that have a key that starts with "extinfo-"
# into the _extinfo subhash
def split_away_extinfo(hash)
	hash['_extinfo'] = {}
	hash.keys.each{ |key|
		if key[0, 8] == 'extinfo-'
			hash['_extinfo'][ key[8, key.length-8] ] = hash[key]
			hash.delete(key);
		end
	}
end


#############################################################################################
#############################################################################################
#############################################################################################

# Load the config
config = YAML::load( File.open( 'nagios-master.cfg' ) )

files = {}
# Remove old created files
nagios_filename.each_pair{ |name, filename|
	files[name] = File.new(filename, "w")
}

#################################
# create a few hostgroups
#################################
# create the "all" and "pingable" hostgroups
config['hostgroups']['all'] = {}
config['hostgroups']['all']['alias'] = "all servers"
config['hostgroups']['pingable'] = {}
config['hostgroups']['pingable']['alias'] = "pingable servers"

config['hostgroups'].each_pair{ |name, hg|
	throw "Empty hostgroup or hostgroup #{name} not a hash" unless hg.kind_of?(Hash)
	split_away_extinfo hg

	hg['_memberlist'] = []
}

config['servers'].each_pair{ |name, server|
	throw "Empty server or server #{name} not a hash" unless server.kind_of?(Hash)

	split_away_extinfo server

	throw "No hostgroups defined for #{name}" unless server['hostgroups']
	server['_hostgroups'] = server['hostgroups'].split(/,/).map{ |x| x.strip };
	server['_hostgroups'] << 'all'
	server['_hostgroups'] << 'pingable' unless server['pingable'] == false

	server['_hostgroups'].each{ |hg|
		throw "Hostgroup #{hg} is not defined" unless config['hostgroups'].has_key?(hg)
		config['hostgroups'][hg]['_memberlist'] << name
	};
}

##############
# HOSTS
##############
config['servers'].each_pair{ |name, server|
	# Formerly we used 'ip' instead of 'address' in our source file
	# Handle this change but warn					XXX
	if server.has_key?('ip')
		STDERR.puts("Host definition for #{name} has an 'ip' field.  Please use 'address' instead");
		server['address'] = server['ip'];
		server.delete('ip');
	end

	set_complain_if_set server, 'host_name'    , name, 'Host', name
	set_if_unset        server, 'alias'        , name
	set_if_unset        server, 'use'          , HOST_TEMPLATE_NAME
	set_if_unset        server, 'check_command', HOST_ALIVE_CHECK    unless server['pingable'] == false

	print_block files['hosts']      , 'host'       , server            , %w(hostgroups pingable)



	# Handle hostextinfo
	config['hostgroups'][  server['_hostgroups'].first  ]['_extinfo'].each_pair{ |k, v|
		# substitute hostname into the notes_url
		v = sprintf(v,name) if k == 'notes_url'

		set_if_unset server['_extinfo'], k ,v
	}

	set_complain_if_set server['_extinfo'], 'host_name'       , name, 'hostextinfo', name
	set_if_unset        server['_extinfo'], 'vrml_image'      , server['_extinfo']['icon_image'] if server['_extinfo'].has_key?('icon_image')
	set_if_unset        server['_extinfo'], 'statusmap_image' , server['_extinfo']['icon_image'] if server['_extinfo'].has_key?('icon_image')

	print_block files['hostextinfo'], 'hostextinfo', server['_extinfo'], %w()
}



##############
# HOSTGROUPS
##############
config['hostgroups'].each_pair{ |name, hg|
	next if hg['private']

	set_complain_if_set hg, 'hostgroup_name', name                       , 'Hostgroup', name
	set_complain_if_set hg, 'members'       , hg['_memberlist'].join(","), 'Hostgroup', name
	set_if_unset        hg, 'contact_groups', CONTACTGROUP

	print_block files['hostgroups'], 'hostgroup', hg, %w()
}


##############
# SERVICES and DEPENDENCIES
##############
config['services'].each{ |service|
	throw "Empty service or service not a hash" unless service.kind_of?(Hash)

	split_away_extinfo service


	# Both 'name' and 'service_description' are valid for a service's name
	# Internally we only use service_description as that's nagios' official term
	if service.has_key?('name')
		throw "Service definition has both a name (#{service['name']})" +
		      "and a service_description (#{service['service_description']})" if service.has_key?('service_description')
		#STDERR.puts("Service definition #{service['name']} has a 'name' field.  Please use 'service_description' instead");
		service['service_description'] = service['name'];
		service.delete('name');
	end
	# Both 'check' and 'check_command' are valid for a service's check command
	# Internally we only use check_command as that's nagios' official term
	if service.has_key?('check')
		throw "Service definition has both a check (#{service['check']})" +
		      "and a check_command (#{service['check_command']})" if service.has_key?('check_command')
		#STDERR.puts("Service definition #{service['service_description']} has a 'check' field.  Please use 'check_command' instead");
		service['check_command'] = service['check'];
		service.delete('check');
	end


	hosts = find_hosts service, config['servers'], config['hostgroups']
	throw "no hosts for service #{service['service_description']}" if hosts.empty?

	throw "nrpe, check, and remotecheck are mutually exclusive in service #{service['service_description']}" if 
		(service['nrpe'] ? 1 : 0) +
		(service['check_command'] ? 1 : 0) +
		(service['remotecheck'] ? 1 : 0)  >= 2

	if service['runfrom'] && service['remotecheck']
		# If the service check is to be run from a remote monitor server ("relay")
		# add that as an NRPE check to be run on the relay and make this
		# service also depend on NRPE on the relay
		relay = service['runfrom']

		hosts.each{ |host|
			# how to recursively copy this thing?
			hostservice = YAML::load( service.to_yaml )
			host_ip = config['servers'][host]['address']
			throw "For some reason I do not have an address for #{host}.  This shouldn't be." unless host_ip

			check = $nrpe.add("#{host}_#{hostservice['service_description']}", hostservice['remotecheck'].gsub(/\$HOSTADDRESS\$/, host_ip))
			hostservice['check_command'] = "#{NRPE_CHECKNAME_HOST}!#{ config['servers'][ relay ]['address'] }!#{ check }"

			# Make sure dependencies are an array.  If there are none, create an empty array
			# if depends is just a string, make a list with just that element
			hostservice['depends'] = ensure_array( hostservice['depends'] )
			# And append this new dependency
			hostservice['depends'] << "#{ relay }:#{ NRPE_PROCESS_SERVICE }";

			addService( [ host ], hostservice, files)
		}
	elsif service['runfrom'] || service['remotecheck']
		throw "runfrom and remotecheck must either appear both or not at all in service #{service['service_description']}"
		throw "must not remotecheck without runfrom" if service['remotecheck']
	else
		addService(hosts, service, files)
	end
}


##############
# NRPE config file
##############
$nrpe.checks.each_pair{ |name, check|
	files['nrpe'].puts "command[#{ name }]=#{ check }"
}