#!/usr/bin/python2 Id = """$Id: crm_mail.py 66 2003-06-04 16:27:23Z hopper $""" URL = """$URL: file://localhost/home/hopper/src/svn/random-src/trunk/python/random/mailfilt/crm_mail.py $""" import sys import os import os.path import time import socket import errno import exceptions import email import email.Generator import email.Parser import email.Errors import email.Encoders import re maildir = os.path.join(os.environ['HOME'], 'Maildir') crmdir = os.path.join(os.environ['HOME'], '.crm') hostname = socket.gethostname() identity_table = ''.join(map(lambda x: chr(x), xrange(0, 256))) pid = os.getpid() class MailError(exceptions.Exception): def __init__(self, permanent, msg): self.msg = msg self.permanent = permanent self.args = ("Mail delivery error", msg) def convertPart(part): ptype = part.get_main_type() if (ptype == 'text') or (ptype is None): encoding = part.get('Content-Transfer-Encoding', '7bit') if not (encoding in ['7bit', '8bit']): p = part.get_payload(decode = 1) p = p.translate(identity_table, '\0') del part['Content-Transfer-Encoding'] part.set_payload(p) email.Encoders.encode_7or8bit(part) else: part.set_payload('') badcomment_re = re.compile("(?(?!\s)", re.M | re.S) def stripEvilComments(part): ptype = part.get_main_type() subtype = part.get_subtype() if (ptype == 'text') and (subtype in ['html', 'xml', 'xhtml']): payload = part.get_payload() payload = re.sub(badcomment_re, '', payload) part.set_payload(payload) def cleanupMessage(msgsrc, msgdst, savedest = None): try: msg = email.Parser.Parser().parse(msgsrc) except email.Errors.MessageParseError, e: raise MailError(1, "Your mailer makes bad messages, so I'm refusing to deliver them.\n" + str(e)) for part in msg.walk(): if not part.is_multipart(): convertPart(part) stripEvilComments(part) email.Generator.Generator(msgdst, 0)(msg) if savedest: email.Generator.Generator(savedest, 0)(msg) return msg def addToMaildir(directory): fileok = 0 while not fileok: fname = os.path.join(directory, 'tmp', '%d.%d.%s' % ( int(time.time()), pid, hostname )) try: os.stat(fname) except OSError, e: if e.errno == errno.ENOENT: fileok = 1 if not fileok: time.sleep(2) f = file(fname, 'w') f.close() return fname def doClassify(cls, fname): clsspam = '%s.spam.css' % cls clsnospam = '%s.nospam.css' % cls crmpath = os.path.join(crmdir, 'classify2.crm') clsnospam = os.path.join(crmdir, clsnospam) clsspam = os.path.join(crmdir, clsspam) cl2cmd = '%s %s %s < "%s"' % ( crmpath, clsnospam, clsspam, fname) p = os.popen(cl2cmd, 'r') l = p.readline().strip() s = p.read() if l == clsspam: b = 1 else: b = 0 return (b, s, l) statre = re.compile("P\\(succ\\):\\s+([0-9.e+-]+),\\s+P\\(fail\\):\\s+([0-9.e+-]+)", re.M | re.S) def realSpamFilter(cls, gooddest): delfiles = [] os.chdir(maildir) try: fname = addToMaildir(gooddest) delfiles.append(fname) fnametmp = fname + 'tmp' delfiles.append(fnametmp) # sys.stderr.write('Writing to "%s"\n' % fname) f = file(fname, 'w') count = 0 f.write('X-Spam-Filter: unkn %s %d\n' % ( cls, int(time.time()) )) s = sys.stdin.read(4096) while s: count += len(s) f.write(s) s = sys.stdin.read(4096) if count > 17000000: raise MailError(1, "Message too big!") f.close() f = None cleanupMessage(file(fname, 'r'), file(fnametmp, 'w'), file('/tmp/ok.msg', 'w')) spamtest = doClassify(cls, fnametmp) fnamenew = fname if spamtest[0]: fnamenew = addToMaildir('.Spam?') delfiles.append(fnamenew) sys.stderr.write("spam:_") else: fnamenew = fname sys.stderr.write("notspam:_") m = statre.search(spamtest[1]) if not m: sys.stderr.write(spamtest[1]) else: groups = m.groups() sys.stderr.write("(%s)_succ:_%s___fail:_%s\n" % (cls, groups[0], groups[1])) fnamenew = os.path.join(os.path.dirname(fnamenew), '../new', os.path.basename(fnamenew)) fnamenew = os.path.normpath(fnamenew) # sys.stderr.write('fname: "%s" fnamenew: "%s"\n' % (fname, fnamenew)) os.link(fname, fnamenew) finally: for fname in delfiles: try: os.unlink(fname) except: pass def spamFilter(cls, gooddest): try: realSpamFilter(cls, gooddest) return 99 except MailError, e: if e.permanent: sys.stderr.write("Delivery failed: " + e.msg + "\n") return 100 else: sys.stderr.write("Delivery delayed: " + e.msg + "\n") return 111 except: sys.stderr.write("Delivery delayed for an unknown reason\n") einfo = sys.exc_info() sys.excepthook(einfo[0], einfo[1], einfo[2]) return 111 def learnMsg(fname, isspam, category = None): tmppath = os.path.join(os.environ.get('TMPDIR', '/tmp'), "%d.%d.%d" % (os.getuid(), os.getpid(), time.time())) os.mkdir(tmppath, 0700) delfiles = [] try: tmpfile = os.path.join(tmppath, 'clean.msg') delfiles.append(tmpfile) okfile = os.path.join(os.environ.get('TMPDIR', '/tmp'), 'ok.msg') msg = cleanupMessage(file(fname, 'r'), file(tmpfile, 'w'), file(okfile, 'w')) if (not category) and msg.has_key('X-Spam-Filter'): match = re.match('\s*(?:(maps|spam|unkn)\s+)?(\S+)\s+([0-9]+)', msg['X-Spam-Filter']) if match: category = match.group(2) msg = None if not category: raise ValueError('category must have a value, or message must ' 'have "X-Spam-Header" header field.') dbfile = '%s.%s.css' % (category, (isspam and 'spam') or 'nospam') learncmd = os.path.join(crmdir, 'learn.crm') dbfile = os.path.join(crmdir, dbfile) cmdline = '%s "%s" < "%s"' % (learncmd, dbfile, tmpfile) sys.stderr.write(cmdline + '\n') exitcode = os.system(cmdline) if exitcode != 0: raise RuntimeError(("Attempt to execute '%s' resulted in exit " "code %d") % (cmdline, exitcode)) delfiles.append(fname) finally: for fname in delfiles: try: os.unlink(fname) except: pass try: os.rmdir(tmppath) except: pass