You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
calculate-utils-2.1-server/scripts/sortmilter

683 lines
25 KiB

#!/usr/bin/python
#-*- coding: utf-8 -*-
# Copyright 2010 Mir Calculate. http://www.calculate-linux.org
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import Milter, tempfile, traceback
from time import strftime, time, localtime
import sys, os, datetime, time, email, types, re
import email.header
from email.utils import getaddresses,parsedate
from email import message_from_string
from os.path import join as pathjoin, exists as pathexists
from shutil import copy as copyfile, move as movefile
from optparse import OptionParser
from threading import Lock
from functools import reduce
# fix ecre in email module
email.header.ecre = re.compile(r'''
=\? # literal =?
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
\? # literal ?
(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
\? # literal ?
(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
\?= # literal ?=
(?=[,)(_" \t]|$) # whitespace or the end of the string
''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)
MAXLETTERSIZE=1024*1024*512
def mkdir_force(path):
"""Make directory. If directory exists then return False else True"""
parent = os.path.split(path)[0]
if not pathexists(parent):
mkdir_force(parent)
else:
if pathexists(path):
return False
os.mkdir(path)
return True
class Logger:
class __impl:
debug = False
lock = Lock()
logfile = sys.stdout
def printLOG(self,s):
self.lock.acquire()
try:
self.logfile.write(strftime("%Y/%m/%d %H:%M:%S ")+"%s\n"%s)
self.logfile.flush()
except:
sys.stderr.write(strftime("%Y%m%d%H%M%S ") +
"".join(traceback.format_exception(*sys.exc_info())))
self.lock.release()
def printDEBUG(self,s):
if self.debug:
self.printLOG("DEBUG:%s"%s)
def printERROR(self,s):
self.lock.acquire()
try:
sys.stderr.write("%s"%s)
sys.stderr.flush()
except:
pass
self.lock.release()
__instance = None
def __init__(self):
if Logger.__instance is None:
Logger.__instance = Logger.__impl()
self.__dict__['_Logger_instance'] = Logger.__instance
def __getattr__(self,attr):
return getattr(self.__instance, attr)
def __setattr__(self,attr,value):
return setattr(self.__instance, attr, value)
def dbg_except():
"""Debug function, for try-except block"""
Logger().printLOG(strftime("%Y%m%d%H%M%S ") +
"".join(traceback.format_exception(*sys.exc_info())))
def strstime(format="%Y-%m-%d_%H:%M",seconds=0):
return strftime(format, localtime(seconds))
class Letter:
"""Letter: get info by different field"""
reFrom = re.compile("(^|\n)\s*(От:|Отправитель:|From:)(\s*\n)?([^\n]+)")
reForward = re.compile("Fwd?:")
defaults = {'To':'nobody',
'From':'nobody',
'Subject':'nosubject',
'Cc':'nobody',
'Delivered-To':'nobody',
'Date':'Mon, 1 Jan 2000 00:00:00 +0400 (MSD)'
}
def __init__(self,file=None,content=None,message=None):
"""Create letter from file (by name) or content"""
self.cacheSender = None
self.cacheReceiver = None
# letter by filename
if file:
content = open(file, "r").read()
self.filepath = file
self.mail = message_from_string(content)
# letter by content
elif content:
self.filepath = "stdin"
self.mail = message_from_string(content)
# letter by message object
elif message:
self.filepath = "stdin"
self.mail = message
else:
Logger().printDEBUG(
"Wrong file or content or message: '%s', '%s', '%s'",
str(file), str(content), str(message))
def _getAddressesFrom(self,field):
return getaddresses([self.mail.get(field,"")])
def getAllEmails(self):
"""Get all emails from all fields (To,From,Delivered-To,Cc)"""
return self._fixEmails(self.getReceiverEmails() +
self.getSenderEmails())
def _fixEmails(self,emails):
"""Discard from emails list wrong and duplicate emails"""
return list(set([email for email in emails if "@" in email]))
def getReceiverEmails(self):
if not self.cacheReceiver:
self.cacheReceiver = self._getReceiverEmails()
Logger().printDEBUG("Receiver:%s"%self.cacheReceiver)
return self.cacheReceiver
def _getReceiverEmails(self):
"""Get all emails from all receiv fields (To,Delivered-To,Cc)"""
resent_to = self.getResentTo()
delivered_to = self.getDeliveredTo()
if resent_to and delivered_to:
order_field = list(self.mail.keys())
#TODO probably will be bugged in py3: order of keys in dict changed
if order_field.index("Delivered-To") < order_field.index("Resent-To"):
receiver_emails = delivered_to
else:
receiver_emails = resent_to
else:
receiver_emails = resent_to or delivered_to or \
self.getTo()+self.getCc()
return self._fixEmails(receiver_emails)
def _getParts(self):
"""Get payload of message"""
return [Letter(message=payload) for payload in self.mail.get_payload()]
def _isForwarded(self):
"""Detected is letter forwarded"""
return "Subject" in self.mail \
and self.reForward.search(self.getSubject()) \
or "Subject" not in self.mail
def _getTextPlain(self):
"""Get content from body message"""
# letter_text = self.mail.get_payload(decode=True)
# charset = self.mail.get_charsets()
# # if charset specified and it is not utf-8
# try:
# if charset and charset[0] and not charset[0].lower() in ['utf-8', 'utf8']:
# letter_text = letter_text.decode(charset[0])
# except Exception as e:
# Logger().printDEBUG("wrong decode from %s: %s" % (charset[0], e))
letter_text = self.mail.get_payload()
if isinstance(letter_text, str):
return letter_text
else:
Logger().printDEBUG("wrong content type (expected plain text)")
def getSenderEmails(self):
"""Cache function for _getSenderEmails"""
if not self.cacheSender:
self.cacheSender = self._getSenderEmails()
Logger().printDEBUG("Sender:%s" % self.cacheSender)
return self.cacheSender
def _getSenderEmails(self):
"""Get all email from sender (Forwarding processed)"""
if self._isForwarded():
if self.mail.is_multipart():
# get all parts and take all emails from parts
# get emails from all parts
letters = self._getParts()
# get all emails from parts
emails = reduce(lambda x,y: x + y._getSenderEmails(), letters, [])
# get last email from part or get From field of this letter
return self._fixEmails(emails[-1:] or self.getFrom())
if self.mail.get_content_type() == "text/plain":
letter_text = self._getTextPlain()
# search From in mail content
match = self.reFrom.findall(letter_text)
if match:
return self._fixEmails([ i[1].lower()
for i in getaddresses([match[-1][3]]) ])
# get sender by From field
return self.getFrom()
def getDeliveredTo(self):
"""Get 'Delivered-To' field"""
return self._fixEmails([ i[1].lower()
for i in self._getAddressesFrom('Delivered-To') ])
def getResentTo(self):
"""Get 'Resent-To' field"""
return self._fixEmails([ i[1].lower()
for i in self._getAddressesFrom('Resent-To') ])
def getCc(self):
"""Get 'Cc' field"""
return self._fixEmails([ i[1].lower()
for i in self._getAddressesFrom('Cc') ])
def getTo(self):
"""Get 'To' field"""
return self._fixEmails([ i[1].lower()
for i in self._getAddressesFrom('To') ])
def getFrom(self):
"""Get 'From' field"""
return self._fixEmails([ i[1].lower()
for i in self._getAddressesFrom('From') ])
def getSubject(self):
"""Get 'Subject' field"""
try:
decodedSubject = email.header.decode_header(self.mail.get('Subject',
self.defaults['Subject']))
# join to string
return "".join(
# decode first element by second element
i[0].decode(i[1] or "utf-8") if i[1] else i[0]
for i in decodedSubject
)
except Exception as e:
Logger().printDEBUG("Wrong subject:%s" % self.mail.get("Subject"))
return self.defaults['Subject']
def getDate(self):
"""Get 'Date' field in format time.mktime"""
return time.mktime(
parsedate(self.mail.get("Date",self.defaults['Date'])))
# Get letter file name
def getFile(self): return self.filepath
def _genfilename(self,suffix):
"""Generate new file name by datetime, suffix and subject"""
maxfilename = 100
filename = ("%s_%s%s" % \
(time.strftime("%Y-%m-%d_%H:%M", time.localtime(self.getDate())),
suffix, self.getSubject())).replace('/', '')
return filename[:maxfilename]
def copyTo(self, destination, suffix="", filename=None):
destination = pathjoin(destination, filename or self._genfilename(suffix))
# if destination file exists, then rename by addition number
if pathexists(destination):
i = 2
while pathexists("%s%02d" % (destination, i)):
i += 1
destination = "%s%02d" % (destination, i)
f = open(destination, "w")
f.write(self.mail.as_string())
f.close()
# set mtime to letter file by letter date
os.utime(destination, (self.getDate(),) * 2)
class MailOpponent:
"""Mail opponent"""
def __init__(self,parent,email,name=None):
self.email = email
self.parent = parent
self.directory = name or self.email
if mkdir_force(self.getDirectory()):
self.date = 0
else:
self.date = os.lstat(self.getDirectory()).st_mtime
def rename(self,newname):
"""Rename opponent folder"""
oldpath = self.getDirectory()
if self.directory != newname:
self.directory = newname
movefile(oldpath,self.getDirectory())
os.utime(self.getDirectory(),(self.date,)*2)
def getDate(self): return self.date
def renameByLatestLetter(self):
"""Rename folder of opponent by latest letter date (Date) (email)"""
self.rename("%s_%s" % (strstime("%Y-%m-%d_%H:%M",self.date), self.email))
def getDirectory(self):
"""Get full path to mail opponent"""
return pathjoin(self.parent.getDirectory(),self.directory)
def addLetter(self,letter):
"""Add letter to opponent folder"""
if self.email in letter.getReceiverEmails():
letter.copyTo(self.getDirectory(),suffix="Out:")
else:
letter.copyTo(self.getDirectory(),suffix="In:")
self.date = max(self.date, letter.getDate())
os.utime(self.getDirectory(), (self.date,) * 2)
class MailBox:
"""Mail box. Keep mail sorted by opponents. (opponent union)"""
reOpponent = re.compile('^(\d{4}-\d{2}-\d{2}_\d{2}:\d{2}_)(.*)$')
def __init__(self,parent,emails):
"""Create mail box in specified place.
parent MailKepper object
emails emails for this mailbox
"""
self.parent = parent
self.emails = emails
self.ops = {}
self.directory = emails[0]
if mkdir_force(self.getDirectory()):
self.date = 0
else:
# get all exists opponents
for d in os.listdir(self.getDirectory()):
res = self.reOpponent.search(d)
if res:
self.addOpponent(res.groups()[1], d)
else:
self.addOpponent(d, d)
self.date = os.lstat(self.getDirectory()).st_mtime
def fixMtime(self):
# fix mtime of mailbox directory by date (latest letter)
os.utime(self.getDirectory(),(self.date,) * 2)
def getDirectory(self):
"""Get full path to mail box"""
return pathjoin(self.parent.getDirectory(),self.directory)
def addOpponent(self,email,name=None):
self.ops[email] = MailOpponent(self, email, name)
return self.ops[email]
def addLetter(self,letter):
"""Add letter to mailbox."""
# detect who is mailbox (sender or receiver)
# mailbox is sender
if set(self.emails) & set(letter.getSenderEmails()):
emails = letter.getReceiverEmails()
# mailbox is receiver
else:
emails = letter.getSenderEmails()
# add letter for all opponents by email
for email in emails:
op = self.ops.get(email,None) or self.addOpponent(email)
op.addLetter(letter)
self.date = max(self.date, letter.getDate())
class MailKeeper:
"""Object which keep mailboxes (mailboxes union)"""
def __init__(self,storagedir=None, domains=None, errordir=None):
# root directory for mail keeper
self.root = storagedir
# root directory for mail keeper
self.errordir = errordir
# dict of created mailboxes
self.mailboxes = {}
# create mail kepper directory
mkdir_force(self.root)
# create directory for error letters
mkdir_force(self.errordir)
#self.initMailBoxes()
self.domains = domains if domains is not None else []
self.reInDomain = re.compile(r"@([^@]+)?(%s)$"%"|".join(self.domains),re.S)
self.iNum = 2
if "unknown" not in self.mailboxes:
self.mailboxes["unknown"] = [MailBox(self,["unknown"])]
def getAllMailboxes(self):
"""Get all mailboxes of mail keeper"""
return set([mailbox for mailboxes in self.mailboxes.values()
for mailbox in mailboxes])
def getDirectory(self):
"""Get full path to mail keeper"""
return self.root
def _getMailBoxByEmails(self,emails):
"""Get all mailboxes by emails"""
return set([mailbox for email in emails
for mailbox in self.mailboxes[email]])
def createMailBoxByLetter(self,letter):
own_emails = [i for i in letter.getAllEmails() if self.reInDomain.search(i) ]
if not set(own_emails) <= set(self.mailboxes.keys()):
for email in own_emails:
if not email in self.mailboxes.keys():
self.mailboxes[email] = [MailBox(self,[email])]
def getMailBoxesByLetter(self,letter):
mb_emails = set(self.mailboxes.keys()) & set(letter.getAllEmails())
return [ uniqmailbox for uniqmailbox in self._getMailBoxByEmails(mb_emails)]
def getErrorLetterName(self,name="letter"):
destination = pathjoin(self.errordir,name)
# if destination file exists, then rename by addition number
if pathexists(destination):
while pathexists("%s%03d"%(destination,self.iNum)):
self.iNum += 1
destination = "%s%02d"%(destination,self.iNum)
return destination
def addLetter(self,letter):
"""Add letter by detect owner, creating mailbox, and redirect to mailbox"""
# get all emails from letter
try:
if self.domains:
self.createMailBoxByLetter(letter)
mboxs = self.getMailBoxesByLetter(letter)
if mboxs:
for mbox in mboxs:
mbox.addLetter(letter)
else:
Logger().printLOG("unknown letter:%s" % (letter.getFile()))
self.mailboxes["unknown"][0].addLetter(letter)
except:
dbg_except()
letter.copyTo(self.errordir,filename=self.getErrorLetterName())
class SortMilter(Milter.Milter):
"""Milter for sorting mail."""
def __init__(self,mail_keeper,lock):
self.tempname=self.mailfrom=self.connfrom=self.fp=None
self.id = Milter.uniqueID()
self.mail_keeper = mail_keeper
self.lock = lock
def connect(self, hostname, unused, hostaddr):
"""Execute on setting up SMTP-connect"""
return Milter.CONTINUE
def hello(self,hostname):
"""Execute after command HELO"""
return Milter.CONTINUE
def envfrom(self, f, *s):
"""Execute after command MAIL FROM. Begin new message."""
self.headers = []
self.bodysize = 0
self.mailbody = ""
self.errorletter = None
return Milter.CONTINUE
def envrcpt(self, to, *s):
"""Execute after command RCPT TO."""
return Milter.CONTINUE
def header(self, name, val):
"""Execute for each field of message header"""
# Save field
self.headers.append("%s: %s" % (name, val))
return Milter.CONTINUE
def eoh(self):
"""Execute at end of header"""
self.mailbody = "\n".join(self.headers) + "\n\n"
return Milter.CONTINUE
def body(self, chunk):
"""Execute for all fragmet of body of message"""
self.bodysize += len(chunk)
self.mailbody += chunk
# don't process letter if it is big
if self.bodysize > MAXLETTERSIZE:
Logger().printLOG("received big letter")
self.lock.acquire()
try:
if not self.errorletter:
self.errorletter = open(self.mail_keeper.getErrorLetterName("bigletter"),"w")
self.errorletter.write(self.mailbody)
self.mailbody = ""
except:
dbg_except()
self.lock.release()
return Milter.CONTINUE
def eom(self):
"""Execute at end of message"""
if not self.errorletter:
Logger().printDEBUG("processed letter")
self.lock.acquire()
try:
letter = Letter(content=self.mailbody)
self.mail_keeper.addLetter(letter)
# send command "rename by latest letter" for all opponents in mail keeper
all_mailbox = self.mail_keeper.getMailBoxesByLetter(letter)
emails = letter.getAllEmails()
# rename opponent mail folder for emails of letter
for mailbox in all_mailbox:
for op in mailbox.ops.values():
if op.email in emails:
op.renameByLatestLetter()
except:
dbg_except()
self.lock.release()
else:
self.errorletter.close()
return Milter.ACCEPT
def abort(self):
"""Execute for abnormal close connection"""
return Milter.CONTINUE
def close(self):
"""Execute after end of connect (include error disconnect)"""
return Milter.CONTINUE
def getMailFromFolder(dir):
# find all files in specified directory and generate list of Letter
for root, dirs, files in os.walk(dir):
for i in files:
yield Letter(os.path.join(root,i))
def runMilterMode(port,mail_keeper):
os.chdir("/tmp/")
socketname = "inet:%d@localhost" % port
timeout = 600 # seconds
lock = Lock()
Milter.factory = lambda : SortMilter(mail_keeper,lock)
Milter.set_flags(Milter.CHGBODY + Milter.CHGHDRS + Milter.ADDHDRS + Milter.DELRCPT + Milter.ADDRCPT)
Logger().printLOG("""Sort Milter start""")
try:
Milter.runmilter("mainfilter", socketname, timeout)
except Milter.milter.error:
Logger().printLOG("""Failed to start Sort Milter""")
Logger().printLOG("""Sort Milter shutdown""")
def main(argv):
# create mail keeper by storage_dir parameter
parser = OptionParser()
parser.add_option("--storage-dir",
action="store",
default="/var/calculate/mailbox",
dest="storage_dir",
metavar="DIR",
help="Mail storage directory [default: %default]"),
parser.add_option("--error-dir",
action="store",
default="/var/calculate/mailbox/error",
dest="error_dir",
metavar="DIR",
help="Directory for letters with error"),
parser.add_option("--domain",
action="append",
default= [],
dest="domains",
metavar="DOMAIN",
help="Owner mail domain"),
parser.add_option("--letters-dir",
action="append",
default=[],
dest="letters_dir",
metavar="DIR",
help="Directory which contains letter for performing"),
parser.add_option("--remove-success",
action="store_true",
default=False,
dest="remove_success",
help="Remove letters from directory if processed success"),
parser.add_option("--letter-file",
action="append",
default=[],
dest="letter_file",
metavar="FILE",
help="Letter file for performing"),
parser.add_option("--letter-stdin",
action="store_true",
default=False,
dest="letter_stdin",
help="Get letter by standard input stream"),
parser.add_option("--log",
action="store",
dest="log",
default="-",
help="log destination (stdout by default)")
parser.add_option("--milter-port","-m",
action="store",
dest="milter_port",
metavar="PORT",
help="port for milter mode")
parser.add_option("--debug",
action="store_true",
dest="debug",
default=False,
help="print debug messages")
parser.parse_args()
Logger().debug = parser.values.debug
if parser.values.log == "-":
Logger().logfile = sys.stdout
else:
Logger().logfile = open(parser.values.log, "a")
# create mail keeper by storage_dir parameter
mail_keeper = MailKeeper(storagedir=parser.values.storage_dir,
domains=parser.values.domains,
errordir=parser.values.error_dir)
# work at milter mode on port specified by milter_port
if parser.values.milter_port:
try:
port = int(parser.values.milter_port)
except ValueError:
parser.error("wrong milter port")
runMilterMode(port, mail_keeper)
else:
# process all letter specified by letter-file option
for lettername in parser.values.letter_file:
if pathexists(lettername):
ll = Letter(file=lettername)
mail_keeper.addLetter(ll)
#DEBUG
print(ll.mail.get_content_type())
else:
Logger().printERROR("file not found:%s" % lettername)
# process all mail directories specified by letters-dir option
for directory in parser.values.letters_dir:
for letter in getMailFromFolder(directory):
try:
Logger().printDEBUG("processed file:%s" % letter.getFile())
mail_keeper.addLetter(letter)
if parser.values.remove_success:
Logger().printDEBUG("removed file:%s" % letter.getFile())
os.remove(letter.getFile())
except:
dbg_except()
# process standard input stream if specified letter-stdin option
if parser.values.letter_stdin:
letter_string = sys.stdin.read()
mail_keeper.addLetter(Letter(content=letter_string))
if __name__ == "__main__":
main(sys.argv[1:])