summaryrefslogtreecommitdiff
path: root/split-mailman-mails-and-discard-and-save
diff options
context:
space:
mode:
authorPeter Palfrader <peter@palfrader.org>2006-06-15 21:21:35 +0000
committerweasel <weasel@bc3d92e2-beff-0310-a7cd-cc87d7ac0ede>2006-06-15 21:21:35 +0000
commit90e27cc42fd410bd4cc28693214be37c1484aad6 (patch)
tree34e0c18d75f3e4db5267acc85ed1262292733ab3 /split-mailman-mails-and-discard-and-save
parenteef706ac7e1dfdaa6e61108f9e2cffba611ebda8 (diff)
Do spam classification right here, 2
git-svn-id: svn+ssh://asteria.noreply.org/svn/weaselutils/trunk@129 bc3d92e2-beff-0310-a7cd-cc87d7ac0ede
Diffstat (limited to 'split-mailman-mails-and-discard-and-save')
-rwxr-xr-xsplit-mailman-mails-and-discard-and-save41
1 files changed, 23 insertions, 18 deletions
diff --git a/split-mailman-mails-and-discard-and-save b/split-mailman-mails-and-discard-and-save
index 61298cc..7fc175f 100755
--- a/split-mailman-mails-and-discard-and-save
+++ b/split-mailman-mails-and-discard-and-save
@@ -16,22 +16,22 @@ end
-OUTBOX="mail/outbox"
+OUTBOX = "mail/outbox"
+SPAMLEARN = "mail/spam-learn"
+HAMLEARN = "mail/ham-learn"
check_maildir OUTBOX
+check_maildir SPAMLEARN
+check_maildir HAMLEARN
APPROVE_PASSWORD = YAML::load( File.open( 'mailman-passwords.yaml' ) )
if ARGV[0] == "spam"
ACTION = "spam"
MAILIN = "mail/spam-in"
- MAILLEARN = "mail/spam-learn"
check_maildir MAILIN
- check_maildir MAILLEARN
elsif ARGV[0] == "ham"
ACTION = "ham"
MAILIN = "mail/ham-in"
- MAILLEARN = "mail/ham-learn"
check_maildir MAILIN
- check_maildir MAILLEARN
elsif ARGV[0] == "classify"
ACTION = "classify"
MAILIN = "mail/mailman-moderator-requests"
@@ -45,7 +45,7 @@ elsif ARGV[0] == "classify"
check_maildir MAIL_PROCESSED_HAM
check_maildir MAIL_PROCESSED_FORWARDED
else
- STDERR.puts "Usage: #{$0} ham|spam"
+ STDERR.puts "Usage: #{$0} ham|spam|classify"
exit 1
end
@@ -119,6 +119,7 @@ def runcmd(command, input)
wrin.close
rdout.close
rderr.close
+ STDIN.reopen rdin
STDOUT.reopen wrout
STDERR.reopen wrerr
exec(*command)
@@ -130,7 +131,7 @@ def runcmd(command, input)
out = []
err = []
- tin = Thread.new { wrin.print input }
+ tin = Thread.new { wrin.print input; wrin.close }
tout = Thread.new { out = rdout.readlines }
terr = Thread.new { err = rderr.readlines }
tin.join
@@ -146,8 +147,8 @@ end
def runnoerrors(command, input)
exitstatus, out, err = runcmd(command, input)
cmd = command.join(' ')
- throw "command '#{cmd}' returned with non-zero exit status #{exitstatus}"
- throw "command '#{cmd}' returned with output on stderr: #{err.join}"
+ throw "command '#{cmd}' returned with non-zero exit status #{exitstatus}" if exitstatus != 0
+ throw "command '#{cmd}' returned with output on stderr: #{err.join}" if err.length > 0
out
end
@@ -159,11 +160,11 @@ def sa_check(message)
throw "Could not find score in spamassassin output line1: '#{line1}'" unless matchdata and matchdata[1]
score = matchdata[1].to_f
- c = (score < 1.0) ? "ham" :
- (score > 6.0) ? "spam" :
- "unsure"
+ c = (score < 1.0) ? "Ham" :
+ (score > 6.0) ? "Spam" :
+ "Unsure"
- [c, out.join]
+ [c, out.join, score]
end
def bogo_check(message)
@@ -203,19 +204,23 @@ def process_mail(filename)
if ACTION == "ham"
- store_in_maildir(MAILLEARN, held_part)
+ store_in_maildir(HAMLEARN, held_part)
approve(cookie, request_address)
elsif ACTION == "spam"
- store_in_maildir(MAILLEARN, held_part)
+ store_in_maildir(SPAMLEARN, held_part)
discard(cookie, request_address)
elsif ACTION == "classify"
- sa_class , sa_text = sa_check(held_part)
+ sa_class , sa_text , sa_score = sa_check(held_part)
bogo_class, bogo_text = bogo_check(held_part)
- if sa_class == "ham" and bogo_class == "ham"
+ if sa_class == "Ham" and bogo_class == "Ham"
store_in_maildir(MAIL_PROCESSED_HAM, message)
approve(cookie, request_address)
- elsif sa_class == "spam" and bogo_class == "spam"
+ elsif sa_class == "Spam" and bogo_class == "Spam"
+ store_in_maildir(MAIL_PROCESSED_SPAM, message)
+ discard(cookie, request_address)
+ elsif sa_class == "Spam" and sa_score > 10 # but bogo did not match
+ store_in_maildir(SPAMLEARN, held_part) # so we let it learn it
store_in_maildir(MAIL_PROCESSED_SPAM, message)
discard(cookie, request_address)
else