From eef706ac7e1dfdaa6e61108f9e2cffba611ebda8 Mon Sep 17 00:00:00 2001 From: Peter Palfrader Date: Thu, 15 Jun 2006 21:10:19 +0000 Subject: Do spam classification right here git-svn-id: svn+ssh://asteria.noreply.org/svn/weaselutils/trunk@128 bc3d92e2-beff-0310-a7cd-cc87d7ac0ede --- split-mailman-mails-and-discard-and-save | 206 +++++++++++++++++++++++++------ 1 file changed, 167 insertions(+), 39 deletions(-) (limited to 'split-mailman-mails-and-discard-and-save') diff --git a/split-mailman-mails-and-discard-and-save b/split-mailman-mails-and-discard-and-save index a466fcb..61298cc 100755 --- a/split-mailman-mails-and-discard-and-save +++ b/split-mailman-mails-and-discard-and-save @@ -3,23 +3,54 @@ require 'rmail' require 'yaml' +Thread.abort_on_exception = true + +def check_maildir(d) + throw "#{d} is not a maildir" unless FileTest.directory?(d) + throw "#{d} is not a maildir" unless FileTest.directory?(d+"/new") + throw "#{d} is not a maildir" unless FileTest.directory?(d+"/cur") + throw "#{d} is not a maildir" unless FileTest.directory?(d+"/tmp") + true +end + + + + +OUTBOX="mail/outbox" +check_maildir OUTBOX +APPROVE_PASSWORD = YAML::load( File.open( 'mailman-passwords.yaml' ) ) + if ARGV[0] == "spam" - MAILIN="mail/spam-in" - MAILLEARN="mail/spam-learn" - OUTBOX="mail/outbox" - DO_APPROVE = false - APPROVE_PASSWORD = nil + ACTION = "spam" + MAILIN = "mail/spam-in" + MAILLEARN = "mail/spam-learn" + check_maildir MAILIN + check_maildir MAILLEARN elsif ARGV[0] == "ham" - MAILIN="mail/ham-in" - MAILLEARN="mail/ham-learn" - OUTBOX="mail/outbox" - DO_APPROVE = true - APPROVE_PASSWORD = YAML::load( File.open( 'mailman-passwords.yaml' ) ) + ACTION = "ham" + MAILIN = "mail/ham-in" + MAILLEARN = "mail/ham-learn" + check_maildir MAILIN + check_maildir MAILLEARN +elsif ARGV[0] == "classify" + ACTION = "classify" + MAILIN = "mail/mailman-moderator-requests" + OUTBOX_HUMANS = "mail/outbox-tolistmods" + MAIL_PROCESSED_SPAM = "mail/mailman-moderator-requests-spam" + MAIL_PROCESSED_HAM = "mail/mailman-moderator-requests-ham" + MAIL_PROCESSED_FORWARDED = "mail/mailman-moderator-requests-forwarded" + check_maildir MAILIN + check_maildir OUTBOX_HUMANS + check_maildir MAIL_PROCESSED_SPAM + check_maildir MAIL_PROCESSED_HAM + check_maildir MAIL_PROCESSED_FORWARDED else STDERR.puts "Usage: #{$0} ham|spam" exit 1 end + + DOMAIN='lists.oftc.net' FROM="listmod@blackhole.oftc.net" HOSTNAME=`hostname`.chop @@ -41,14 +72,6 @@ def uniqueName HOSTNAME] end -def check_maildir(d) - throw "#{d} is not a maildir" unless FileTest.directory?(d) - throw "#{d} is not a maildir" unless FileTest.directory?(d+"/new") - throw "#{d} is not a maildir" unless FileTest.directory?(d+"/cur") - throw "#{d} is not a maildir" unless FileTest.directory?(d+"/tmp") - true -end - def store_in_maildir(md, msg) fn = uniqueName File.open(md+"/tmp/"+fn, "w", 0600) do |f| @@ -59,10 +82,106 @@ def store_in_maildir(md, msg) md+"/new/"+fn end +def handle_mailman_queued_mail(cookie, request_address, password) + mail_request = RMail::Message.new() + mail_request.header['From'] = FROM + mail_request.header['To'] = request_address + mail_request.header['Subject'] = "Re: confirm #{cookie}" + mail_request.header['Approved'] = password if password + mail_request.body = '' + store_in_maildir(OUTBOX, mail_request) +end +def discard(cookie, request_address) + handle_mailman_queued_mail(cookie, request_address, nil) +end +def approve(cookie, request_address) + unless APPROVE_PASSWORD[request_address] + mail_error = RMail::Message.new() + mail_error.header['From'] = FROM + mail_error.header['To'] = ERRORSTO + mail_error.header['Subject'] = "approving #{cookie} failed" + mail_error.body = 'Sorry, this script does not have the admin/moderator password for'+"\n#{request_address}." + store_in_maildir(OUTBOX, mail_error) + return + end + handle_mailman_queued_mail(cookie, request_address, APPROVE_PASSWORD[request_address]) +end + + +def runcmd(command, input) + rdin , wrin = IO.pipe + rdout, wrout = IO.pipe + rderr, wrerr = IO.pipe + + pid = fork + unless pid + # child + wrin.close + rdout.close + rderr.close + STDOUT.reopen wrout + STDERR.reopen wrerr + exec(*command) + throw("fell through exec(). WTF.") + end + rdin.close + wrout.close + wrerr.close + + out = [] + err = [] + tin = Thread.new { wrin.print input } + tout = Thread.new { out = rdout.readlines } + terr = Thread.new { err = rderr.readlines } + tin.join + tout.join + terr.join + Process.wait pid + + exitstatus = $?.exitstatus + + [exitstatus, out, err] +end + +def runnoerrors(command, input) + exitstatus, out, err = runcmd(command, input) + cmd = command.join(' ') + throw "command '#{cmd}' returned with non-zero exit status #{exitstatus}" + throw "command '#{cmd}' returned with output on stderr: #{err.join}" + + out +end + +def sa_check(message) + out = runnoerrors(['spamc', '-R'], message) + line1 = out.shift + matchdata = /^([-0-9.]+)/.match line1 + throw "Could not find score in spamassassin output line1: '#{line1}'" unless matchdata and matchdata[1] + score = matchdata[1].to_f + + c = (score < 1.0) ? "ham" : + (score > 6.0) ? "spam" : + "unsure" + + [c, out.join] +end + +def bogo_check(message) + out = runnoerrors(['bogofilter', '-l', '-e', '-v', '-v'], message) + line1 = out[0] + matchdata = /^X-Bogosity: ([a-zA-Z0-9]+)/.match line1 + throw "Could not find classfication in bogofilter output line1: '#{line1}'" unless matchdata and matchdata[1] + c = matchdata[1] + + [c, out.join] +end + + def process_mail(filename) message = File.open(filename) { |f| RMail::Parser.read(f) } # some sanity checks - throw "Mailman moderation mails are expected to have 3 mime parts" unless message.body.length == 3 + message.body.shift if message.body.length == 4 + throw "Mailman moderation mails are expected to have 3 mime parts (or 4 if with spam info from our script)" unless message.body.length == 3 throw "Mime Part 0 does have an unexpected content type: #{message.body[0].header['Content-Type']}" unless message.body[0].header['Content-Type'] == 'text/plain; charset="us-ascii"' throw "Mime Part 1 does have an unexpected content type: #{message.body[1].header['Content-Type']}" unless message.body[1].header['Content-Type'] == 'message/rfc822' throw "Mime Part 2 does have an unexpected content type: #{message.body[2].header['Content-Type']}" unless message.body[2].header['Content-Type'] == 'message/rfc822' @@ -82,30 +201,39 @@ def process_mail(filename) throw "Could not find cookie in discard_part" unless matchdata and matchdata[1] cookie = matchdata[1] - store_in_maildir(MAILLEARN, held_part) - - if DO_APPROVE and not APPROVE_PASSWORD[request_address] then - mail_error = RMail::Message.new() - mail_error.header['From'] = FROM - mail_error.header['To'] = ERRORSTO - mail_error.header['Subject'] = "approving #{cookie} failed" - mail_error.body = 'Sorry, this script does not have the admin/moderator password for'+"\n#{request_address}." - store_in_maildir(OUTBOX, mail_error) + + if ACTION == "ham" + store_in_maildir(MAILLEARN, held_part) + approve(cookie, request_address) + elsif ACTION == "spam" + store_in_maildir(MAILLEARN, held_part) + discard(cookie, request_address) + elsif ACTION == "classify" + sa_class , sa_text = sa_check(held_part) + bogo_class, bogo_text = bogo_check(held_part) + + if sa_class == "ham" and bogo_class == "ham" + store_in_maildir(MAIL_PROCESSED_HAM, message) + approve(cookie, request_address) + elsif sa_class == "spam" and bogo_class == "spam" + store_in_maildir(MAIL_PROCESSED_SPAM, message) + discard(cookie, request_address) + else + spam_info = RMail::Message.new() + spam_info.header['Content-Type'] = 'text/plain; charset="us-ascii"' + spam_info.header['MIME-Version'] = '1.0' + spam_info.header['Content-Transfer-Encoding'] = '8bit' + spam_info.body = sa_text + "\n\n" + bogo_text + message.body.unshift spam_info + + store_in_maildir(OUTBOX_HUMANS, message) + end else - mail_request = RMail::Message.new() - mail_request.header['From'] = FROM - mail_request.header['To'] = request_address - mail_request.header['Subject'] = "Re: confirm #{cookie}" - mail_request.header['Approved'] = APPROVE_PASSWORD[request_address] if DO_APPROVE - mail_request.body = '' - store_in_maildir(OUTBOX, mail_request) + STDERR.puts "Unknown action #{ACTION}!" + exit 1 end end -check_maildir MAILIN -check_maildir MAILLEARN -check_maildir OUTBOX - Dir[MAILIN+"/new/*"].each do |filename| begin process_mail filename -- cgit v1.2.3