#!/usr/bin/python #-*- coding: utf-8 -*- # Copyright 2010 Mir Calculate. http://www.calculate-linux.org # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import Milter, tempfile, traceback from time import strftime, time, localtime import sys, os, datetime, time, email, types, re import email.header from email.utils import getaddresses,parsedate from email import message_from_string from os.path import join as pathjoin, exists as pathexists from shutil import copy as copyfile, move as movefile from optparse import OptionParser from threading import Lock # fix ecre in email module email.header.ecre = re.compile(r''' =\? # literal =? (?P[^?]*?) # non-greedy up to the next ? is the charset \? # literal ? (?P[qb]) # either a "q" or a "b", case insensitive \? # literal ? (?P.*?) # non-greedy up to the next ?= is the encoded string \?= # literal ?= (?=[,)(_" \t]|$) # whitespace or the end of the string ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) MAXLETTERSIZE=1024*1024*512 def mkdir_force(path): """Make directory. If directory exists then return False else True""" parent = os.path.split(path)[0] if not pathexists(parent): mkdir_force(parent) else: if pathexists(path): return False os.mkdir(path) return True class Logger: class __impl: debug = False lock = Lock() logfile = sys.stdout def printLOG(self,s): self.lock.acquire() try: self.logfile.write(strftime("%Y/%m/%d %H:%M:%S ")+"%s\n"%s) self.logfile.flush() except: sys.stderr.write(strftime("%Y%m%d%H%M%S ") + "".join(apply(traceback.format_exception, sys.exc_info()))) self.lock.release() def printDEBUG(self,s): if self.debug: self.printLOG("DEBUG:%s"%s) def printERROR(self,s): self.lock.acquire() try: sys.stderr.write("%s"%s) sys.stderr.flush() except: pass self.lock.release() __instance = None def __init__(self): if Logger.__instance is None: Logger.__instance = Logger.__impl() self.__dict__['_Logger_instance'] = Logger.__instance def __getattr__(self,attr): return getattr(self.__instance, attr) def __setattr__(self,attr,value): return setattr(self.__instance, attr, value) def dbg_except(): """Debug function, for try-except block""" Logger().printLOG(strftime("%Y%m%d%H%M%S ") + "".join(apply(traceback.format_exception, sys.exc_info()))) def strstime(format="%Y-%m-%d_%H:%M",seconds=0): return strftime(format, localtime(seconds)) class Letter: """Letter: get info by different field""" reFrom = re.compile("(^|\n)\s*(От:|Отправитель:|From:)(\s*\n)?([^\n]+)") reForward = re.compile("Fwd?:") defaults = {'To':'nobody', 'From':'nobody', 'Subject':'nosubject', 'Cc':'nobody', 'Delivered-To':'nobody', 'Date':'Mon, 1 Jan 2000 00:00:00 +0400 (MSD)' } def __init__(self,file=None,content=None,message=None): """Create letter from file (by name) or content""" self.cacheSender = None self.cacheReceiver = None # letter by filename if file: content = open(file,"r").read() self.filepath = file self.mail = message_from_string(content) # letter by content elif content: self.filepath = "stdin" self.mail = message_from_string(content) # letter by message object elif message: self.filepath = "stdin" self.mail = message else: Logger().printDEBUG( "Wrong file or content or message: '%s', '%s', '%s'", str(file),str(content),str(message)) def _getAddressesFrom(self,field): return getaddresses([self.mail.get(field,"")]) def getAllEmails(self): """Get all emails from all fields (To,From,Delivered-To,Cc)""" return self._fixEmails(self.getReceiverEmails() + self.getSenderEmails()) def _fixEmails(self,emails): """Discard from emails list wrong and duplicate emails""" return list(set([email for email in emails if "@" in email])) def getReceiverEmails(self): if not self.cacheReceiver: self.cacheReceiver = self._getReceiverEmails() Logger().printDEBUG("Receiver:%s"%self.cacheReceiver) return self.cacheReceiver def _getReceiverEmails(self): """Get all emails from all receiv fields (To,Delivered-To,Cc)""" resent_to = self.getResentTo() delivered_to = self.getDeliveredTo() if resent_to and delivered_to: order_field = self.mail.keys() if order_field.index("Delivered-To") < order_field.index("Resent-To"): receiver_emails = delivered_to else: receiver_emails = resent_to else: receiver_emails = resent_to or delivered_to or \ self.getTo()+self.getCc() return self._fixEmails(receiver_emails) def _getParts(self): """Get payload of message""" return [Letter(message=payload) for payload in self.mail.get_payload()] def _isForwarded(self): """Detected is letter forwarded""" return "Subject" in self.mail \ and self.reForward.search(self.getSubject()) \ or "Subject" not in self.mail def _getTextPlain(self): """Get content from body message""" letter_text = self.mail.get_payload(decode=True) charset = self.mail.get_charsets() # if charset specified and it is not utf-8 try: if charset and charset[0] and not charset[0].lower() in ['utf-8','utf8']: letter_text = letter_text.decode(charset[0]).encode('utf-8') except Exception,e: Logger().printDEBUG("wrong decode from %s: %s" % (charset[0], e)) return letter_text def getSenderEmails(self): """Cache function for _getSenderEmails""" if not self.cacheSender: self.cacheSender = self._getSenderEmails() Logger().printDEBUG("Sender:%s"%self.cacheSender) return self.cacheSender def _getSenderEmails(self): """Get all email from sender (Forwarding processed)""" if self._isForwarded(): if self.mail.is_multipart(): # get all parts and take all emails from parts # get emails from all parts letters = self._getParts() # get all emails from parts emails = reduce(lambda x,y: x + y._getSenderEmails(), letters, []) # get last email from part or get From field of this letter return self._fixEmails(emails[-1:] or self.getFrom()) if self.mail.get_content_type() == "text/plain": letter_text = self._getTextPlain() # search From in mail content match = self.reFrom.findall(letter_text) if match: return self._fixEmails([ i[1].lower() for i in getaddresses([match[-1][3]]) ]) # get sender by From field return self.getFrom() def getDeliveredTo(self): """Get 'Delivered-To' field""" return self._fixEmails([ i[1].lower() for i in self._getAddressesFrom('Delivered-To') ]) def getResentTo(self): """Get 'Resent-To' field""" return self._fixEmails([ i[1].lower() for i in self._getAddressesFrom('Resent-To') ]) def getCc(self): """Get 'Cc' field""" return self._fixEmails([ i[1].lower() for i in self._getAddressesFrom('Cc') ]) def getTo(self): """Get 'To' field""" return self._fixEmails([ i[1].lower() for i in self._getAddressesFrom('To') ]) def getFrom(self): """Get 'From' field""" return self._fixEmails([ i[1].lower() for i in self._getAddressesFrom('From') ]) def getSubject(self): """Get 'Subject' field""" try: decodedSubject =email.header.decode_header(self.mail.get('Subject', self.defaults['Subject'])) # join to string return "".join( [ # decode first element by second element i[0].decode(i[1] or "utf-8").encode('utf-8') # decoded header for i in decodedSubject ]) except: Logger().printDEBUG("Wrong subject:%s" % self.mail.get("Subject")) return self.defaults['Subject'] def getDate(self): """Get 'Date' field in format time.mktime""" return time.mktime( parsedate(self.mail.get("Date",self.defaults['Date']))) # Get letter file name def getFile(self): return self.filepath def _genfilename(self,suffix): """Generate new file name by datetime, suffix and subject""" maxfilename = 100 filename = ("%s_%s%s" % \ (time.strftime("%Y-%m-%d_%H:%M", time.localtime(self.getDate())), suffix, self.getSubject())).replace('/','') return filename.decode('utf-8')[:maxfilename].encode('utf-8') def copyTo(self,destination,suffix="",filename=None): destination = pathjoin(destination,filename or self._genfilename(suffix)) # if destination file exists, then rename by addition number if pathexists(destination): i = 2 while pathexists("%s%02d"%(destination,i)): i += 1 destination = "%s%02d"%(destination,i) f = open(destination,"w") f.write(self.mail.as_string()) f.close() # set mtime to letter file by letter date os.utime(destination, (self.getDate(),)*2) class MailOpponent: """Mail opponent""" def __init__(self,parent,email,name=None): self.email = email self.parent = parent self.directory = name or self.email if mkdir_force(self.getDirectory()): self.date = 0 else: self.date = os.lstat(self.getDirectory()).st_mtime def rename(self,newname): """Rename opponent folder""" oldpath = self.getDirectory() if self.directory != newname: self.directory = newname movefile(oldpath,self.getDirectory()) os.utime(self.getDirectory(),(self.date,)*2) def getDate(self): return self.date def renameByLatestLetter(self): """Rename folder of opponent by latest letter date (Date) (email)""" self.rename("%s_%s" % (strstime("%Y-%m-%d_%H:%M",self.date), self.email)) def getDirectory(self): """Get full path to mail opponent""" return pathjoin(self.parent.getDirectory(),self.directory) def addLetter(self,letter): """Add letter to opponent folder""" if self.email in letter.getReceiverEmails(): letter.copyTo(self.getDirectory(),suffix="Out:") else: letter.copyTo(self.getDirectory(),suffix="In:") self.date = max(self.date, letter.getDate()) os.utime(self.getDirectory(),(self.date,)*2) class MailBox: """Mail box. Keep mail sorted by opponents. (opponent union)""" reOpponent = re.compile('^(\d{4}-\d{2}-\d{2}_\d{2}:\d{2}_)(.*)$') def __init__(self,parent,emails): """Create mail box in specified place. parent MailKepper object emails emails for this mailbox """ self.parent = parent self.emails = emails self.ops = {} self.directory = emails[0] if mkdir_force(self.getDirectory()): self.date = 0 else: # get all exists opponents for d in os.listdir(self.getDirectory()): res = self.reOpponent.search(d) if res: self.addOpponent(res.groups()[1], d) else: self.addOpponent(d, d) self.date = os.lstat(self.getDirectory()).st_mtime def fixMtime(self): # fix mtime of mailbox directory by date (latest letter) os.utime(self.getDirectory(),(self.date,)*2) def getDirectory(self): """Get full path to mail box""" return pathjoin(self.parent.getDirectory(),self.directory) def addOpponent(self,email,name=None): self.ops[email] = MailOpponent(self, email, name) return self.ops[email] def addLetter(self,letter): """Add letter to mailbox.""" # detect who is mailbox (sender or receiver) # mailbox is sender if set(self.emails) & set(letter.getSenderEmails()): emails = letter.getReceiverEmails() # mailbox is receiver else: emails = letter.getSenderEmails() # add letter for all opponents by email for email in emails: op = self.ops.get(email,None) or self.addOpponent(email) op.addLetter(letter) self.date = max(self.date, letter.getDate()) class MailKeeper: """Object which keep mailboxes (mailboxes union)""" def __init__(self,storagedir=None,domains=[],errordir=None): # root directory for mail keeper self.root = storagedir # root directory for mail keeper self.errordir = errordir # dict of created mailboxes self.mailboxes = {} # create mail kepper directory mkdir_force(self.root) # create directory for error letters mkdir_force(self.errordir) #self.initMailBoxes() self.domains = domains self.reInDomain = re.compile(r"@([^@]+)?(%s)$"%"|".join(self.domains),re.S) self.iNum = 2 if "unknown" not in self.mailboxes: self.mailboxes["unknown"] = [MailBox(self,["unknown"])] def getAllMailboxes(self): """Get all mailboxes of mail keeper""" return set([mailbox for mailboxes in self.mailboxes.values() for mailbox in mailboxes]) def getDirectory(self): """Get full path to mail keeper""" return self.root def _getMailBoxByEmails(self,emails): """Get all mailboxes by emails""" return set([mailbox for email in emails for mailbox in self.mailboxes[email]]) def createMailBoxByLetter(self,letter): own_emails = [i for i in letter.getAllEmails() if self.reInDomain.search(i) ] if not set(own_emails) <= set(self.mailboxes.keys()): for email in own_emails: if not email in self.mailboxes.keys(): self.mailboxes[email] = [MailBox(self,[email])] def getMailBoxesByLetter(self,letter): mb_emails = set(self.mailboxes.keys()) & set(letter.getAllEmails()) return [ uniqmailbox for uniqmailbox in self._getMailBoxByEmails(mb_emails)] def getErrorLetterName(self,name="letter"): destination = pathjoin(self.errordir,name) # if destination file exists, then rename by addition number if pathexists(destination): while pathexists("%s%03d"%(destination,self.iNum)): self.iNum += 1 destination = "%s%02d"%(destination,self.iNum) return destination def addLetter(self,letter): """Add letter by detect owner, creating mailbox, and redirect to mailbox""" # get all emails from letter try: if self.domains: self.createMailBoxByLetter(letter) mboxs = self.getMailBoxesByLetter(letter) if mboxs: for mbox in mboxs: mbox.addLetter(letter) else: Logger().printLOG("unknown letter:%s" % (letter.getFile())) self.mailboxes["unknown"][0].addLetter(letter) except: dbg_except() letter.copyTo(self.errordir,filename=self.getErrorLetterName()) class SortMilter(Milter.Milter): """Milter for sorting mail.""" def __init__(self,mail_keeper,lock): self.tempname=self.mailfrom=self.connfrom=self.fp=None self.id = Milter.uniqueID() self.mail_keeper = mail_keeper self.lock = lock def connect(self, hostname, unused, hostaddr): """Execute on setting up SMTP-connect""" return Milter.CONTINUE def hello(self,hostname): """Execute after command HELO""" return Milter.CONTINUE def envfrom(self, f, *s): """Execute after command MAIL FROM. Begin new message.""" self.headers = [] self.bodysize = 0 self.mailbody = "" self.errorletter = None return Milter.CONTINUE def envrcpt(self, to, *s): """Execute after command RCPT TO.""" return Milter.CONTINUE def header(self, name, val): """Execute for each field of message header""" # Save field self.headers.append("%s: %s" % (name, val)) return Milter.CONTINUE def eoh(self): """Execute at end of header""" self.mailbody = "\n".join(self.headers) + "\n\n" return Milter.CONTINUE def body(self, chunk): """Execute for all fragmet of body of message""" self.bodysize += len(chunk) self.mailbody += chunk # don't process letter if it is big if self.bodysize > MAXLETTERSIZE: Logger().printLOG("received big letter") self.lock.acquire() try: if not self.errorletter: self.errorletter = open(self.mail_keeper.getErrorLetterName("bigletter"),"w") self.errorletter.write(self.mailbody) self.mailbody = "" except: dbg_except() self.lock.release() return Milter.CONTINUE def eom(self): """Execute at end of message""" if not self.errorletter: Logger().printDEBUG("processed letter") self.lock.acquire() try: letter = Letter(content=self.mailbody) self.mail_keeper.addLetter(letter) # send command "rename by latest letter" for all opponents in mail keeper all_mailbox = self.mail_keeper.getMailBoxesByLetter(letter) emails = letter.getAllEmails() # rename opponent mail folder for emails of letter for mailbox in all_mailbox: for op in mailbox.ops.values(): if op.email in emails: op.renameByLatestLetter() except: dbg_except() self.lock.release() else: self.errorletter.close() return Milter.ACCEPT def abort(self): """Execute for abnormal close connection""" return Milter.CONTINUE def close(self): """Execute after end of connect (include error disconnect)""" return Milter.CONTINUE def getMailFromFolder(dir): # find all files in specified directory and generate list of Letter for root, dirs, files in os.walk(dir): for i in files: yield Letter(os.path.join(root,i)) def runMilterMode(port,mail_keeper): os.chdir("/tmp/") socketname = "inet:%d@localhost" % port timeout = 600 # seconds lock = Lock() Milter.factory = lambda : SortMilter(mail_keeper,lock) Milter.set_flags(Milter.CHGBODY + Milter.CHGHDRS + Milter.ADDHDRS + Milter.DELRCPT + Milter.ADDRCPT) Logger().printLOG("""Sort Milter start""") try: Milter.runmilter("mainfilter", socketname, timeout) except Milter.milter.error: Logger().printLOG("""Failed to start Sort Milter""") Logger().printLOG("""Sort Milter shutdown""") def main(argv): # create mail keeper by storage_dir parameter parser = OptionParser() parser.add_option("--storage-dir", action="store", default="/var/calculate/mailbox", dest="storage_dir", metavar="DIR", help="Mail storage directory [default: %default]"), parser.add_option("--error-dir", action="store", default="/var/calculate/mailbox/error", dest="error_dir", metavar="DIR", help="Directory for letters with error"), parser.add_option("--domain", action="append", default=[], dest="domains", metavar="DOMAIN", help="Owner mail domain"), parser.add_option("--letters-dir", action="append", default = [], dest="letters_dir", metavar="DIR", help="Directory which contains letter for performing"), parser.add_option("--remove-success", action="store_true", default = False, dest="remove_success", help="Remove letters from directory if processed success"), parser.add_option("--letter-file", action="append", default = [], dest="letter_file", metavar="FILE", help="Letter file for performing"), parser.add_option("--letter-stdin", action="store_true", default=False, dest="letter_stdin", help="Get letter by standard input stream"), parser.add_option("--log", action="store", dest="log", default="-", help="log destination (stdout by default)") parser.add_option("--milter-port","-m", action="store", dest="milter_port", metavar="PORT", help="port for milter mode") parser.add_option("--debug", action="store_true", dest="debug", default=False, help="print debug messages") parser.parse_args() Logger().debug = parser.values.debug if parser.values.log == "-": Logger().logfile = sys.stdout else: Logger().logfile = open(parser.values.log,"a") # create mail keeper by storage_dir parameter mail_keeper = MailKeeper(storagedir=parser.values.storage_dir, domains=parser.values.domains, errordir=parser.values.error_dir) # work at milter mode on port specified by milter_port if parser.values.milter_port: try: port = int(parser.values.milter_port) except ValueError: parser.error("wrong milter port") runMilterMode(port, mail_keeper) else: # process all letter specified by letter-file option for lettername in parser.values.letter_file: if pathexists(lettername): mail_keeper.addLetter( Letter(file=lettername) ) else: Logger().printERROR("file not found:%s"%lettername) # process all mail directories specified by letters-dir option for directory in parser.values.letters_dir: for letter in getMailFromFolder(directory): try: Logger().printDEBUG("processed file:%s"%letter.getFile()) mail_keeper.addLetter(letter) if parser.values.remove_success: Logger().printDEBUG("removed file:%s"%letter.getFile()) os.remove(letter.getFile()) except: dbg_except() # process standard input stream if specified letter-stdin option if parser.values.letter_stdin: letter_string = sys.stdin.read() mail_keeper.addLetter(Letter(content=letter_string)) if __name__ == "__main__": main(sys.argv[1:])