#!/usr/bin/ruby

require 'rmail'
require 'yaml'

Thread.abort_on_exception = true

def check_maildir(d)
	throw "#{d} is not a maildir" unless FileTest.directory?(d)
	throw "#{d} is not a maildir" unless FileTest.directory?(d+"/new")
	throw "#{d} is not a maildir" unless FileTest.directory?(d+"/cur")
	throw "#{d} is not a maildir" unless FileTest.directory?(d+"/tmp")
	true
end

# usually only mails that are tagges as spam by SA and bogofilter
# are automatically discared
# mails with an SA score over this are however discarded even
# if bogo is unsure or says ham
SA_SCORE_SUFFICIENT = 10.0


OUTBOX    = "mail/outbox"
SPAMLEARN = "mail/spam-learn"
HAMLEARN  = "mail/ham-learn"
check_maildir OUTBOX
check_maildir SPAMLEARN
check_maildir HAMLEARN
APPROVE_PASSWORD = YAML::load( File.open( 'mailman-passwords.yaml' ) )

if ARGV[0] == "spam"
	ACTION    = "spam"
	MAILIN    = "mail/spam-in"
	check_maildir MAILIN
elsif ARGV[0] == "ham"
	ACTION    = "ham"
	MAILIN    = "mail/ham-in"
	check_maildir MAILIN
elsif ARGV[0] == "classify"
	ACTION    = "classify"
	MAILIN    = "mail/mailman-moderator-requests"
	OUTBOX_HUMANS = "mail/outbox-tolistmods"
	MAIL_PROCESSED_SPAM       = "mail/mailman-moderator-requests-spam"
	MAIL_PROCESSED_HAM        = "mail/mailman-moderator-requests-ham"
	MAIL_PROCESSED_FORWARDED  = "mail/mailman-moderator-requests-forwarded"
	check_maildir MAILIN
	check_maildir OUTBOX_HUMANS
	check_maildir MAIL_PROCESSED_SPAM
	check_maildir MAIL_PROCESSED_HAM
	check_maildir MAIL_PROCESSED_FORWARDED
else
	STDERR.puts "Usage: #{$0} ham|spam|classify"
	exit 1
end



DOMAIN='lists.oftc.net'
FROM="listmod@blackhole.oftc.net"
HOSTNAME=`hostname`.chop
ERRORSTO=FROM

class Counter
	@@counter = 0

	def Counter.value
		@@counter += 1
	end
end

def uniqueName
	"%d.%d_%d.%s"%[
		Time.now.to_i,
		$$,
		Counter.value,
		HOSTNAME]
end

def store_in_maildir(md, msg)
	fn = uniqueName
	File.open(md+"/tmp/"+fn, "w", 0600) do |f|
		f.puts msg
	end
	File.link(md+"/tmp/"+fn, md+"/new/"+fn)
	File.unlink(md+"/tmp/"+fn)
	md+"/new/"+fn
end

def handle_mailman_queued_mail(cookie, request_address, password)
	mail_request = RMail::Message.new()
	mail_request.header['From'] = FROM
	mail_request.header['To'] = request_address
	mail_request.header['Subject'] = "Re: confirm #{cookie}"
	mail_request.header['Approved'] = password if password
	mail_request.body = ''
	store_in_maildir(OUTBOX, mail_request)
end
def discard(cookie, request_address)
	handle_mailman_queued_mail(cookie, request_address, nil)
end
def approve(cookie, request_address)
	unless APPROVE_PASSWORD[request_address]
		mail_error = RMail::Message.new()
		mail_error.header['From'] = FROM
		mail_error.header['To'] = ERRORSTO
		mail_error.header['Subject'] = "approving #{cookie} failed"
		mail_error.body = 'Sorry, this script does not have the admin/moderator password for'+"\n#{request_address}."
		store_in_maildir(OUTBOX, mail_error)
		return
	end
	handle_mailman_queued_mail(cookie, request_address, APPROVE_PASSWORD[request_address])
end


def runcmd(command, input)
	rdin , wrin  = IO.pipe
	rdout, wrout = IO.pipe
	rderr, wrerr = IO.pipe

	pid = fork
	unless pid
		# child
		wrin.close
		rdout.close
		rderr.close
		STDIN.reopen rdin
		STDOUT.reopen wrout
		STDERR.reopen wrerr
		exec(*command)
		throw("fell through exec(). WTF.")
	end
	rdin.close
	wrout.close
	wrerr.close

	out = []
	err = []
	tin  = Thread.new { wrin.print input; wrin.close }
	tout = Thread.new { out = rdout.readlines }
	terr = Thread.new { err = rderr.readlines }
	tin.join
	tout.join
	terr.join
	Process.wait pid

	exitstatus = $?.exitstatus

	[exitstatus, out, err]
end

def runnoerrors(command, input)
	exitstatus, out, err = runcmd(command, input)
	cmd = command.join(' ')
	throw "command '#{cmd}' returned with non-zero exit status #{exitstatus}" if exitstatus != 0
	throw "command '#{cmd}' returned with output on stderr: #{err.join}" if err.length > 0

	out
end

def sa_check(message)
	out = runnoerrors(['spamc', '-R'], message)
	line1 = out.shift
	matchdata = /^([-0-9.]+)/.match line1
	throw "Could not find score in spamassassin output line1: '#{line1}'" unless matchdata and matchdata[1]
	score = matchdata[1].to_f

	c = (score < 1.0) ? "Ham" :
	    (score > 6.0) ? "Spam" :
	                    "Unsure"
	
	[c, out.join, score]
end

def bogo_check(message)
	out = runnoerrors(['bogofilter', '-l', '-e', '-v', '-v'], message)
	line1 = out[0]
	matchdata = /^X-Bogosity: ([a-zA-Z0-9]+)/.match line1
	throw "Could not find classfication in bogofilter output line1: '#{line1}'" unless matchdata and matchdata[1]
	c = matchdata[1]

	[c, out.join]
end


def process_mail(filename)
	message = File.open(filename) { |f| RMail::Parser.read(f) }
	# some sanity checks
	message.body.shift if message.body.length == 4
	throw "Mailman moderation mails are expected to have 3 mime parts (or 4 if with spam info from our script)" unless message.body.length == 3
	throw "Mime Part 0 does have an unexpected content type: #{message.body[0].header['Content-Type']}" unless message.body[0].header['Content-Type'] == 'text/plain; charset="us-ascii"'
	throw "Mime Part 1 does have an unexpected content type: #{message.body[1].header['Content-Type']}" unless message.body[1].header['Content-Type'] == 'message/rfc822'
	throw "Mime Part 2 does have an unexpected content type: #{message.body[2].header['Content-Type']}" unless message.body[2].header['Content-Type'] == 'message/rfc822'
	explanation_body = message.body[0].body
	held_part = RMail::Parser.read( message.body[1].body )
	discard_part = RMail::Parser.read( message.body[2].body )

	# more sanity checks
	throw "Did not find 'As list administrator, your..' boilerplate in mail" unless explanation_body =~ /^As list administrator, your authorization is requested for the/
	throw "Did not find listname in mail" unless explanation_body =~ /^ *List: *.*@#{DOMAIN}/
	throw "discard_part does have an unexpected content type: #{discard_part.header['Content-Type']}" unless discard_part.header['Content-Type'] == 'text/plain; charset="us-ascii"'

	request_address = discard_part.header['From']
	throw "discard_part does not have a from address" unless request_address

	matchdata = /confirm ([0-9a-f]*)/.match discard_part.header['Subject']
	throw "Could not find cookie in discard_part" unless matchdata and matchdata[1]
	cookie = matchdata[1]


	if ACTION == "ham"
		store_in_maildir(HAMLEARN, held_part)
		approve(cookie, request_address)
	elsif ACTION == "spam"
		store_in_maildir(SPAMLEARN, held_part)
		discard(cookie, request_address)
	elsif ACTION == "classify"
		sa_class  , sa_text  , sa_score = sa_check(held_part)
		bogo_class, bogo_text = bogo_check(held_part)

		spam_info = RMail::Message.new()
		spam_info.header['Content-Type'] = 'text/plain; charset="us-ascii"'
		spam_info.header['MIME-Version'] = '1.0'
		spam_info.header['Content-Transfer-Encoding'] = '8bit'
		spam_info.body = sa_text + "\n\n" + bogo_text
		message.body.unshift spam_info

		if sa_class == "Ham" and bogo_class == "Ham"
			store_in_maildir(MAIL_PROCESSED_HAM, message)
			approve(cookie, request_address)
		elsif sa_class == "Spam" and bogo_class == "Spam"
			store_in_maildir(MAIL_PROCESSED_SPAM, message)
			discard(cookie, request_address)
		elsif sa_class == "Spam" and sa_score > SA_SCORE_SUFFICIENT # but bogo did not match
			store_in_maildir(SPAMLEARN, held_part) # so we let it learn it
			store_in_maildir(MAIL_PROCESSED_SPAM, message)
			discard(cookie, request_address)
		else
			store_in_maildir(MAIL_PROCESSED_FORWARDED, message)

			# rewrite Delivered-To to X-OLD-Delivered-To to avoid mail loop warnings.
			m = message.to_s.split(/\n/)
			new_m = []
			while true
				break if m.size == 0
				line = m.shift
				new_m << line.gsub(/^Delivered-To:/, 'X-OLD-Delivered-To:')
				break if line == ""
			end
			new_m.concat m
			store_in_maildir(OUTBOX_HUMANS, m.join("\n"))
		end
	else
		STDERR.puts "Unknown action #{ACTION}!"
		exit 1
	end
end

Dir[MAILIN+"/new/*"].each do |filename|
	begin
		process_mail filename
		File.unlink filename
	rescue Exception => e
		STDERR.puts "Error when processing #{filename}: #{e}"
	end
end