summaryrefslogtreecommitdiff
path: root/split-mailman-mails-and-discard-and-save
diff options
context:
space:
mode:
authorPeter Palfrader <peter@palfrader.org>2006-06-15 21:10:19 +0000
committerweasel <weasel@bc3d92e2-beff-0310-a7cd-cc87d7ac0ede>2006-06-15 21:10:19 +0000
commiteef706ac7e1dfdaa6e61108f9e2cffba611ebda8 (patch)
tree174edeb77e783d846d5aa1d2388748eb661137f0 /split-mailman-mails-and-discard-and-save
parent488fdc0acb45c69bfba4957e020d6128cb62798d (diff)
Do spam classification right here
git-svn-id: svn+ssh://asteria.noreply.org/svn/weaselutils/trunk@128 bc3d92e2-beff-0310-a7cd-cc87d7ac0ede
Diffstat (limited to 'split-mailman-mails-and-discard-and-save')
-rwxr-xr-xsplit-mailman-mails-and-discard-and-save206
1 files changed, 167 insertions, 39 deletions
diff --git a/split-mailman-mails-and-discard-and-save b/split-mailman-mails-and-discard-and-save
index a466fcb..61298cc 100755
--- a/split-mailman-mails-and-discard-and-save
+++ b/split-mailman-mails-and-discard-and-save
@@ -3,23 +3,54 @@
require 'rmail'
require 'yaml'
+Thread.abort_on_exception = true
+
+def check_maildir(d)
+ throw "#{d} is not a maildir" unless FileTest.directory?(d)
+ throw "#{d} is not a maildir" unless FileTest.directory?(d+"/new")
+ throw "#{d} is not a maildir" unless FileTest.directory?(d+"/cur")
+ throw "#{d} is not a maildir" unless FileTest.directory?(d+"/tmp")
+ true
+end
+
+
+
+
+OUTBOX="mail/outbox"
+check_maildir OUTBOX
+APPROVE_PASSWORD = YAML::load( File.open( 'mailman-passwords.yaml' ) )
+
if ARGV[0] == "spam"
- MAILIN="mail/spam-in"
- MAILLEARN="mail/spam-learn"
- OUTBOX="mail/outbox"
- DO_APPROVE = false
- APPROVE_PASSWORD = nil
+ ACTION = "spam"
+ MAILIN = "mail/spam-in"
+ MAILLEARN = "mail/spam-learn"
+ check_maildir MAILIN
+ check_maildir MAILLEARN
elsif ARGV[0] == "ham"
- MAILIN="mail/ham-in"
- MAILLEARN="mail/ham-learn"
- OUTBOX="mail/outbox"
- DO_APPROVE = true
- APPROVE_PASSWORD = YAML::load( File.open( 'mailman-passwords.yaml' ) )
+ ACTION = "ham"
+ MAILIN = "mail/ham-in"
+ MAILLEARN = "mail/ham-learn"
+ check_maildir MAILIN
+ check_maildir MAILLEARN
+elsif ARGV[0] == "classify"
+ ACTION = "classify"
+ MAILIN = "mail/mailman-moderator-requests"
+ OUTBOX_HUMANS = "mail/outbox-tolistmods"
+ MAIL_PROCESSED_SPAM = "mail/mailman-moderator-requests-spam"
+ MAIL_PROCESSED_HAM = "mail/mailman-moderator-requests-ham"
+ MAIL_PROCESSED_FORWARDED = "mail/mailman-moderator-requests-forwarded"
+ check_maildir MAILIN
+ check_maildir OUTBOX_HUMANS
+ check_maildir MAIL_PROCESSED_SPAM
+ check_maildir MAIL_PROCESSED_HAM
+ check_maildir MAIL_PROCESSED_FORWARDED
else
STDERR.puts "Usage: #{$0} ham|spam"
exit 1
end
+
+
DOMAIN='lists.oftc.net'
FROM="listmod@blackhole.oftc.net"
HOSTNAME=`hostname`.chop
@@ -41,14 +72,6 @@ def uniqueName
HOSTNAME]
end
-def check_maildir(d)
- throw "#{d} is not a maildir" unless FileTest.directory?(d)
- throw "#{d} is not a maildir" unless FileTest.directory?(d+"/new")
- throw "#{d} is not a maildir" unless FileTest.directory?(d+"/cur")
- throw "#{d} is not a maildir" unless FileTest.directory?(d+"/tmp")
- true
-end
-
def store_in_maildir(md, msg)
fn = uniqueName
File.open(md+"/tmp/"+fn, "w", 0600) do |f|
@@ -59,10 +82,106 @@ def store_in_maildir(md, msg)
md+"/new/"+fn
end
+def handle_mailman_queued_mail(cookie, request_address, password)
+ mail_request = RMail::Message.new()
+ mail_request.header['From'] = FROM
+ mail_request.header['To'] = request_address
+ mail_request.header['Subject'] = "Re: confirm #{cookie}"
+ mail_request.header['Approved'] = password if password
+ mail_request.body = ''
+ store_in_maildir(OUTBOX, mail_request)
+end
+def discard(cookie, request_address)
+ handle_mailman_queued_mail(cookie, request_address, nil)
+end
+def approve(cookie, request_address)
+ unless APPROVE_PASSWORD[request_address]
+ mail_error = RMail::Message.new()
+ mail_error.header['From'] = FROM
+ mail_error.header['To'] = ERRORSTO
+ mail_error.header['Subject'] = "approving #{cookie} failed"
+ mail_error.body = 'Sorry, this script does not have the admin/moderator password for'+"\n#{request_address}."
+ store_in_maildir(OUTBOX, mail_error)
+ return
+ end
+ handle_mailman_queued_mail(cookie, request_address, APPROVE_PASSWORD[request_address])
+end
+
+
+def runcmd(command, input)
+ rdin , wrin = IO.pipe
+ rdout, wrout = IO.pipe
+ rderr, wrerr = IO.pipe
+
+ pid = fork
+ unless pid
+ # child
+ wrin.close
+ rdout.close
+ rderr.close
+ STDOUT.reopen wrout
+ STDERR.reopen wrerr
+ exec(*command)
+ throw("fell through exec(). WTF.")
+ end
+ rdin.close
+ wrout.close
+ wrerr.close
+
+ out = []
+ err = []
+ tin = Thread.new { wrin.print input }
+ tout = Thread.new { out = rdout.readlines }
+ terr = Thread.new { err = rderr.readlines }
+ tin.join
+ tout.join
+ terr.join
+ Process.wait pid
+
+ exitstatus = $?.exitstatus
+
+ [exitstatus, out, err]
+end
+
+def runnoerrors(command, input)
+ exitstatus, out, err = runcmd(command, input)
+ cmd = command.join(' ')
+ throw "command '#{cmd}' returned with non-zero exit status #{exitstatus}"
+ throw "command '#{cmd}' returned with output on stderr: #{err.join}"
+
+ out
+end
+
+def sa_check(message)
+ out = runnoerrors(['spamc', '-R'], message)
+ line1 = out.shift
+ matchdata = /^([-0-9.]+)/.match line1
+ throw "Could not find score in spamassassin output line1: '#{line1}'" unless matchdata and matchdata[1]
+ score = matchdata[1].to_f
+
+ c = (score < 1.0) ? "ham" :
+ (score > 6.0) ? "spam" :
+ "unsure"
+
+ [c, out.join]
+end
+
+def bogo_check(message)
+ out = runnoerrors(['bogofilter', '-l', '-e', '-v', '-v'], message)
+ line1 = out[0]
+ matchdata = /^X-Bogosity: ([a-zA-Z0-9]+)/.match line1
+ throw "Could not find classfication in bogofilter output line1: '#{line1}'" unless matchdata and matchdata[1]
+ c = matchdata[1]
+
+ [c, out.join]
+end
+
+
def process_mail(filename)
message = File.open(filename) { |f| RMail::Parser.read(f) }
# some sanity checks
- throw "Mailman moderation mails are expected to have 3 mime parts" unless message.body.length == 3
+ message.body.shift if message.body.length == 4
+ throw "Mailman moderation mails are expected to have 3 mime parts (or 4 if with spam info from our script)" unless message.body.length == 3
throw "Mime Part 0 does have an unexpected content type: #{message.body[0].header['Content-Type']}" unless message.body[0].header['Content-Type'] == 'text/plain; charset="us-ascii"'
throw "Mime Part 1 does have an unexpected content type: #{message.body[1].header['Content-Type']}" unless message.body[1].header['Content-Type'] == 'message/rfc822'
throw "Mime Part 2 does have an unexpected content type: #{message.body[2].header['Content-Type']}" unless message.body[2].header['Content-Type'] == 'message/rfc822'
@@ -82,30 +201,39 @@ def process_mail(filename)
throw "Could not find cookie in discard_part" unless matchdata and matchdata[1]
cookie = matchdata[1]
- store_in_maildir(MAILLEARN, held_part)
-
- if DO_APPROVE and not APPROVE_PASSWORD[request_address] then
- mail_error = RMail::Message.new()
- mail_error.header['From'] = FROM
- mail_error.header['To'] = ERRORSTO
- mail_error.header['Subject'] = "approving #{cookie} failed"
- mail_error.body = 'Sorry, this script does not have the admin/moderator password for'+"\n#{request_address}."
- store_in_maildir(OUTBOX, mail_error)
+
+ if ACTION == "ham"
+ store_in_maildir(MAILLEARN, held_part)
+ approve(cookie, request_address)
+ elsif ACTION == "spam"
+ store_in_maildir(MAILLEARN, held_part)
+ discard(cookie, request_address)
+ elsif ACTION == "classify"
+ sa_class , sa_text = sa_check(held_part)
+ bogo_class, bogo_text = bogo_check(held_part)
+
+ if sa_class == "ham" and bogo_class == "ham"
+ store_in_maildir(MAIL_PROCESSED_HAM, message)
+ approve(cookie, request_address)
+ elsif sa_class == "spam" and bogo_class == "spam"
+ store_in_maildir(MAIL_PROCESSED_SPAM, message)
+ discard(cookie, request_address)
+ else
+ spam_info = RMail::Message.new()
+ spam_info.header['Content-Type'] = 'text/plain; charset="us-ascii"'
+ spam_info.header['MIME-Version'] = '1.0'
+ spam_info.header['Content-Transfer-Encoding'] = '8bit'
+ spam_info.body = sa_text + "\n\n" + bogo_text
+ message.body.unshift spam_info
+
+ store_in_maildir(OUTBOX_HUMANS, message)
+ end
else
- mail_request = RMail::Message.new()
- mail_request.header['From'] = FROM
- mail_request.header['To'] = request_address
- mail_request.header['Subject'] = "Re: confirm #{cookie}"
- mail_request.header['Approved'] = APPROVE_PASSWORD[request_address] if DO_APPROVE
- mail_request.body = ''
- store_in_maildir(OUTBOX, mail_request)
+ STDERR.puts "Unknown action #{ACTION}!"
+ exit 1
end
end
-check_maildir MAILIN
-check_maildir MAILLEARN
-check_maildir OUTBOX
-
Dir[MAILIN+"/new/*"].each do |filename|
begin
process_mail filename