#!/usr/bin/ruby

# Copyright (c) 2006, 2007 Peter Palfrader <peter@palfrader.org>
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

require 'rmail'
require 'yaml'

Thread.abort_on_exception = true

def check_maildir(d)
	throw "#{d} is not a maildir" unless FileTest.directory?(d)
	throw "#{d} is not a maildir" unless FileTest.directory?(d+"/new")
	throw "#{d} is not a maildir" unless FileTest.directory?(d+"/cur")
	throw "#{d} is not a maildir" unless FileTest.directory?(d+"/tmp")
	true
end

# usually only mails that are tagges as spam by SA and bogofilter
# are automatically discared
# mails with an SA score over this are however discarded even
# if bogo is unsure or says ham
SA_SCORE_SUFFICIENT = 10.0


OUTBOX    = "mail/outbox"
SPAMLEARN = "mail/spam-learn"
HAMLEARN  = "mail/ham-learn"
ERRORBOX  = "mail/errors"
check_maildir OUTBOX
check_maildir ERRORBOX
check_maildir SPAMLEARN
check_maildir HAMLEARN
APPROVE_PASSWORD = YAML::load( File.open( 'mailman-passwords.yaml' ) )


if ARGV[0] == "spam"
	ACTION    = "spam"
	MAILIN    = "mail/spam-in"
	check_maildir MAILIN
elsif ARGV[0] == "ham"
	ACTION    = "ham"
	MAILIN    = "mail/ham-in"
	check_maildir MAILIN
elsif ARGV[0] == "classify"
	ACTION    = "classify"
	MAILIN    = "mail/mailman-moderator-requests"
	OUTBOX_HUMANS = "mail/outbox-tolistmods"
	MAIL_PROCESSED_SPAM       = "mail/mailman-moderator-requests-spam"
	MAIL_PROCESSED_HAM        = "mail/mailman-moderator-requests-ham"
	MAIL_PROCESSED_FORWARDED  = "mail/mailman-moderator-requests-forwarded"
	check_maildir MAILIN
	check_maildir OUTBOX_HUMANS
	check_maildir MAIL_PROCESSED_SPAM
	check_maildir MAIL_PROCESSED_HAM
	check_maildir MAIL_PROCESSED_FORWARDED
else
	STDERR.puts "Usage: #{$0} ham|spam|classify"
	exit 1
end



DOMAIN='lists.oftc.net'
FROM="listmod@blackhole.oftc.net"
HOSTNAME=`hostname`.chop
ERRORSTO=FROM

class Counter
	@@counter = 0

	def Counter.value
		@@counter += 1
	end
end

def uniqueName
	"%d.%d_%d.%s"%[
		Time.now.to_i,
		$$,
		Counter.value,
		HOSTNAME]
end

def move_mail_file(from, tofolder)
	fn = uniqueName
	target = tofolder+"/new/"+fn
	File.link(from, target)
	File.unlink(from)
	target
end

def store_in_maildir(md, msg)
	fn = uniqueName
	File.open(md+"/tmp/"+fn, "w", 0600) do |f|
		f.puts msg
	end
	File.link(md+"/tmp/"+fn, md+"/new/"+fn)
	File.unlink(md+"/tmp/"+fn)
	md+"/new/"+fn
end

def handle_mailman_queued_mail(cookie, request_address, password)
	mail_request = RMail::Message.new()
	mail_request.header['From'] = FROM
	mail_request.header['To'] = request_address
	mail_request.header['X-Listbot-Domain'] = DOMAIN
	mail_request.header['X-Listbot-Type'] = 'Mailman-Request'
	mail_request.header['X-List-Administrivia'] = 'yes'
	mail_request.header['Subject'] = "Re: confirm #{cookie}"
	mail_request.header['Approved'] = password if password
	mail_request.body = ''
	store_in_maildir(OUTBOX, mail_request)
end
def discard(cookie, request_address)
	handle_mailman_queued_mail(cookie, request_address, nil)
end
def approve(cookie, request_address)
	unless APPROVE_PASSWORD[request_address]
		mail_error = RMail::Message.new()
		mail_error.header['From'] = FROM
		mail_error.header['To'] = ERRORSTO
		mail_error.header['X-Listbot-Domain'] = DOMAIN
		mail_error.header['X-Listbot-Type'] = 'Error'
		mail_error.header['X-List-Administrivia'] = 'yes'
		mail_error.header['Subject'] = "approving #{cookie} failed"
		mail_error.body = 'Sorry, this script does not have the admin/moderator password for'+"\n#{request_address}."
		store_in_maildir(OUTBOX, mail_error)
		return
	end
	handle_mailman_queued_mail(cookie, request_address, APPROVE_PASSWORD[request_address])
end


def runcmd(command, input)
	rdin , wrin  = IO.pipe
	rdout, wrout = IO.pipe
	rderr, wrerr = IO.pipe

	pid = fork
	unless pid
		# child
		wrin.close
		rdout.close
		rderr.close
		STDIN.reopen rdin
		STDOUT.reopen wrout
		STDERR.reopen wrerr
		exec(*command)
		throw("fell through exec(). WTF.")
	end
	rdin.close
	wrout.close
	wrerr.close

	out = []
	err = []
	tin  = Thread.new { wrin.print input; wrin.close }
	tout = Thread.new { out = rdout.readlines }
	terr = Thread.new { err = rderr.readlines }
	tin.join
	tout.join
	terr.join
	Process.wait pid

	exitstatus = $?.exitstatus

	[exitstatus, out, err]
end

def runnoerrors(command, input)
	exitstatus, out, err = runcmd(command, input)
	cmd = command.join(' ')
	throw "command '#{cmd}' returned with non-zero exit status #{exitstatus}" if exitstatus != 0
	throw "command '#{cmd}' returned with output on stderr: #{err.join}" if err.length > 0

	out
end

def sa_check(message)
	if message.to_s.length > 250000
		return ["Unsure", "File over 250000 Bytes in size, SA check skipped", 0]
	end

	out = runnoerrors(['spamc', '-R'], message)
	line1 = out.shift
	matchdata = /^([-0-9.]+)/.match line1
	throw "Could not find score in spamassassin output line1: '#{line1}'" unless matchdata and matchdata[1]
	score = matchdata[1].to_f

	c = (score < 1.0) ? "Ham" :
	    (score > 6.0) ? "Spam" :
	                    "Unsure"
	
	[c, out.join, score]
end

def bogo_check(message)
	out = runnoerrors(['bogofilter', '-l', '-e', '-v', '-v'], message)
	line1 = out[0]
	matchdata = /^X-Bogosity: ([a-zA-Z0-9]+)/.match line1
	throw "Could not find classfication in bogofilter output line1: '#{line1}'" unless matchdata and matchdata[1]
	c = matchdata[1]

	[c, out.join]
end

def fetch_resent_info(message)
	origmsgid = message.header['Message-Id']
	from = message.header['Resent-From']
	date = message.header['Resent-Date']
	msgid = message.header['Resent-Message-Id']
	header = message.header.to_s

	throw "Did not find Message-Id header in mail" unless origmsgid
	throw "Did not find Resent-From header in mail" unless from
	throw "Did not find Resent-Date header in mail" unless date
	throw "Did not find Resent-Message-Id header in mail" unless msgid

	return { 'origmsgid' => origmsgid,
		 'from' => from,
		 'date' => date,
		 'msgid' => msgid,
		 'header' => header }
end

def send_ack(action, resent_info)
	mail = RMail::Message.new()
	mail.header['From'] = FROM
	mail.header['To'] = ERRORSTO
	mail.header['Subject'] = "#{action} by #{resent_info['from']}"
	mail.header['X-Listbot-Domain'] = DOMAIN
	mail.header['X-Listbot-Type'] = 'Acknowledgement'
	mail.header['X-List-Administrivia'] = 'yes'
	mail.header['In-Reply-To'] = resent_info['origmsgid']
	mail.header['References'] = resent_info['origmsgid']+' '+resent_info['msgid']
	mail.body = "On #{resent_info['date']} #{resent_info['from']} #{action} this message.\n" +
	            "\n" +
	            "Request headers follow:\n" +
	            resent_info['header']
	store_in_maildir(OUTBOX, mail)
end

def process_mail(filename)
	message = File.open(filename) { |f| RMail::Parser.read(f) }
	# some sanity checks
	message.body.shift if message.body.length == 4
	throw "Mailman moderation mails are expected to have 3 mime parts (or 4 if with spam info from our script)" unless message.body.length == 3
	throw "Mime Part 0 does have an unexpected content type: #{message.body[0].header['Content-Type']}" unless message.body[0].header['Content-Type'] == 'text/plain; charset="us-ascii"'
	throw "Mime Part 1 does have an unexpected content type: #{message.body[1].header['Content-Type']}" unless message.body[1].header['Content-Type'] == 'message/rfc822'
	throw "Mime Part 2 does have an unexpected content type: #{message.body[2].header['Content-Type']}" unless message.body[2].header['Content-Type'] == 'message/rfc822'
	explanation_body = message.body[0].body
	held_part = RMail::Parser.read( message.body[1].body )
	discard_part = RMail::Parser.read( message.body[2].body )

	# more sanity checks
	throw "Did not find 'As list administrator, your..' boilerplate in mail" unless explanation_body =~ /^As list administrator, your authorization is requested for the/
	throw "Did not find listname in mail" unless explanation_body =~ /^ *List: *.*@#{DOMAIN}/
	throw "discard_part does have an unexpected content type: #{discard_part.header['Content-Type']}" unless discard_part.header['Content-Type'] == 'text/plain; charset="us-ascii"'

	request_address = discard_part.header['From']
	throw "discard_part does not have a from address" unless request_address

	matchdata = /confirm ([0-9a-f]*)/.match discard_part.header['Subject']
	throw "Could not find cookie in discard_part" unless matchdata and matchdata[1]
	cookie = matchdata[1]


	if ACTION == "ham"
		resent_info = fetch_resent_info(message)
		store_in_maildir(HAMLEARN, held_part)
		approve(cookie, request_address)
		send_ack('APPROVED', resent_info)
	elsif ACTION == "spam"
		resent_info = fetch_resent_info(message)
		store_in_maildir(SPAMLEARN, held_part)
		discard(cookie, request_address)
		send_ack('DISCARDED', resent_info)
	elsif ACTION == "classify"
		sa_class  , sa_text  , sa_score = sa_check(held_part)
		bogo_class, bogo_text = bogo_check(held_part)

		spam_info = RMail::Message.new()
		spam_info.header['Content-Type'] = 'text/plain; charset="us-ascii"'
		spam_info.header['MIME-Version'] = '1.0'
		spam_info.header['Content-Transfer-Encoding'] = '8bit'
		spam_info.body = sa_text + "\n\n" + bogo_text
		message.body.unshift spam_info

		if sa_class == "Ham" and bogo_class == "Ham"
			store_in_maildir(MAIL_PROCESSED_HAM, message)
			approve(cookie, request_address)
		elsif sa_class == "Spam" and bogo_class == "Spam"
			store_in_maildir(MAIL_PROCESSED_SPAM, message)
			discard(cookie, request_address)
		elsif sa_class == "Spam" and sa_score > SA_SCORE_SUFFICIENT # but bogo did not match
			store_in_maildir(SPAMLEARN, held_part) # so we let it learn it
			store_in_maildir(MAIL_PROCESSED_SPAM, message)
			discard(cookie, request_address)
		else
			store_in_maildir(MAIL_PROCESSED_FORWARDED, message)

			# rewrite Delivered-To to X-OLD-Delivered-To to avoid mail loop warnings.
			m = message.to_s.split(/\n/)
			new_m = []
			while m.size > 0 and (line = m.shift) != ""
				new_m << line.gsub(/^Delivered-To:/, 'X-OLD-Delivered-To:')
			end
			new_m.concat m
			new_m.unshift "X-Listbot-Domain: #{DOMAIN}"
			new_m.unshift 'X-Listbot-Type: Moderator-Request'
			new_m.unshift 'X-List-Administrivia: yes'
			store_in_maildir(OUTBOX_HUMANS, new_m.join("\n"))
		end
	else
		STDERR.puts "Unknown action #{ACTION}!"
		exit 1
	end
end

Dir[MAILIN+"/new/*"].each do |filename|
	begin
		process_mail filename
		File.unlink filename
	rescue Exception => e
		begin
			newname = move_mail_file(filename, ERRORBOX)
			mail_error = RMail::Message.new()
			mail_error.header['From'] = FROM
			mail_error.header['To'] = ERRORSTO
			mail_error.header['X-Listbot-Domain'] = DOMAIN
			mail_error.header['X-Listbot-Type'] = 'Error'
			mail_error.header['X-List-Administrivia'] = 'yes'
			mail_error.header['Subject'] = "handling of #{filename} failed"
			mail_error.body = "Processing of #{filename} failed: #{e}\n" +
			                  "Moved to #{newname}\n" +
					  "\n" +
					  "complete backtrace:\n"+
					  e.backtrace.join("\n")
			store_in_maildir(OUTBOX, mail_error)
		rescue Exception => e2
			STDERR.puts "Error when processing #{filename}: #{e}"
			STDERR.puts "During error handling we encountered a new problem: #{e2}"
			STDERR.puts "backtrace of e:"
			STDERR.puts e.backtrace.join("\n")
			STDERR.puts
			STDERR.puts "backtrace of e2:"
			STDERR.puts e2.backtrace.join("\n")
		end
	end
end