From 90e27cc42fd410bd4cc28693214be37c1484aad6 Mon Sep 17 00:00:00 2001 From: Peter Palfrader Date: Thu, 15 Jun 2006 21:21:35 +0000 Subject: Do spam classification right here, 2 git-svn-id: svn+ssh://asteria.noreply.org/svn/weaselutils/trunk@129 bc3d92e2-beff-0310-a7cd-cc87d7ac0ede --- split-mailman-mails-and-discard-and-save | 41 ++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 18 deletions(-) (limited to 'split-mailman-mails-and-discard-and-save') diff --git a/split-mailman-mails-and-discard-and-save b/split-mailman-mails-and-discard-and-save index 61298cc..7fc175f 100755 --- a/split-mailman-mails-and-discard-and-save +++ b/split-mailman-mails-and-discard-and-save @@ -16,22 +16,22 @@ end -OUTBOX="mail/outbox" +OUTBOX = "mail/outbox" +SPAMLEARN = "mail/spam-learn" +HAMLEARN = "mail/ham-learn" check_maildir OUTBOX +check_maildir SPAMLEARN +check_maildir HAMLEARN APPROVE_PASSWORD = YAML::load( File.open( 'mailman-passwords.yaml' ) ) if ARGV[0] == "spam" ACTION = "spam" MAILIN = "mail/spam-in" - MAILLEARN = "mail/spam-learn" check_maildir MAILIN - check_maildir MAILLEARN elsif ARGV[0] == "ham" ACTION = "ham" MAILIN = "mail/ham-in" - MAILLEARN = "mail/ham-learn" check_maildir MAILIN - check_maildir MAILLEARN elsif ARGV[0] == "classify" ACTION = "classify" MAILIN = "mail/mailman-moderator-requests" @@ -45,7 +45,7 @@ elsif ARGV[0] == "classify" check_maildir MAIL_PROCESSED_HAM check_maildir MAIL_PROCESSED_FORWARDED else - STDERR.puts "Usage: #{$0} ham|spam" + STDERR.puts "Usage: #{$0} ham|spam|classify" exit 1 end @@ -119,6 +119,7 @@ def runcmd(command, input) wrin.close rdout.close rderr.close + STDIN.reopen rdin STDOUT.reopen wrout STDERR.reopen wrerr exec(*command) @@ -130,7 +131,7 @@ def runcmd(command, input) out = [] err = [] - tin = Thread.new { wrin.print input } + tin = Thread.new { wrin.print input; wrin.close } tout = Thread.new { out = rdout.readlines } terr = Thread.new { err = rderr.readlines } tin.join @@ -146,8 +147,8 @@ end def runnoerrors(command, input) exitstatus, out, err = runcmd(command, input) cmd = command.join(' ') - throw "command '#{cmd}' returned with non-zero exit status #{exitstatus}" - throw "command '#{cmd}' returned with output on stderr: #{err.join}" + throw "command '#{cmd}' returned with non-zero exit status #{exitstatus}" if exitstatus != 0 + throw "command '#{cmd}' returned with output on stderr: #{err.join}" if err.length > 0 out end @@ -159,11 +160,11 @@ def sa_check(message) throw "Could not find score in spamassassin output line1: '#{line1}'" unless matchdata and matchdata[1] score = matchdata[1].to_f - c = (score < 1.0) ? "ham" : - (score > 6.0) ? "spam" : - "unsure" + c = (score < 1.0) ? "Ham" : + (score > 6.0) ? "Spam" : + "Unsure" - [c, out.join] + [c, out.join, score] end def bogo_check(message) @@ -203,19 +204,23 @@ def process_mail(filename) if ACTION == "ham" - store_in_maildir(MAILLEARN, held_part) + store_in_maildir(HAMLEARN, held_part) approve(cookie, request_address) elsif ACTION == "spam" - store_in_maildir(MAILLEARN, held_part) + store_in_maildir(SPAMLEARN, held_part) discard(cookie, request_address) elsif ACTION == "classify" - sa_class , sa_text = sa_check(held_part) + sa_class , sa_text , sa_score = sa_check(held_part) bogo_class, bogo_text = bogo_check(held_part) - if sa_class == "ham" and bogo_class == "ham" + if sa_class == "Ham" and bogo_class == "Ham" store_in_maildir(MAIL_PROCESSED_HAM, message) approve(cookie, request_address) - elsif sa_class == "spam" and bogo_class == "spam" + elsif sa_class == "Spam" and bogo_class == "Spam" + store_in_maildir(MAIL_PROCESSED_SPAM, message) + discard(cookie, request_address) + elsif sa_class == "Spam" and sa_score > 10 # but bogo did not match + store_in_maildir(SPAMLEARN, held_part) # so we let it learn it store_in_maildir(MAIL_PROCESSED_SPAM, message) discard(cookie, request_address) else -- cgit v1.2.3