[OT] Re: return level?
Matej Cepl
matej at ceplovi.cz
Tue Feb 18 17:23:03 CET 2003
David Relson wrote:
> So far, actual details of how he's running bogofilter are lacking...
My python script is attached (be aware, that there are
substantial changes -- new config rile -- which are not tested
much yet). On line 488 it prints 256.
Matej
--
Matej Cepl,
Finger: 89EF 4BC6 288A BF43 1BAB 25C3 E09F EF25 D964 84AC
138 Highland Ave. #10, Somerville, Ma 02143, (617) 623-1488
The ratio of literacy to illiteracy is a constant, but nowadays
the illiterates can read.
-- Alberto Moravia
-------------- next part --------------
#!/usr/bin/env python
# This Python program scans an IMAP Inbox and runs every
# entry against SpamAssassin. For any entries that match,
# the message is copied to another folder, and the original
# marked or deleted.
# This software is written and maintained by Roger Binns
# <rogerb at rogerbinns.com> It is distributed under the <a
# href="http://www.opensource.org/licenses/artistic-license.php">Artistic
# License</a>.
# $Id: isbg.py,v 1.8 2003/02/18 04:50:08 matej Exp $
version="0.96-20Jan03"
import imaplib
import sys
import re
import os
import popen2
import getpass
import getopt
import string
import socket
import md5
import ConfigParser
import stat
# You can specify your imap password using a command line option (--imappassword).
# This however is a really bad idea since any user on the system can run
# ps and see the command line arguments. If you really must do it non-interactively
# then set the password here.
# name of /dev/null
if os.name=="nt":
devnull="nul"
else:
devnull="/dev/null"
config = {'imapuser': getpass.getuser(),
'imaphost':'localhost',
'imapport': 0, # autodetect - 143 for standard connection, 993 for imaps
'usessl': 0,
'imappassword': None,
'imapinbox': 'INBOX',
'spaminbox': 'INBOX.spam',
'thresholdsize': 120000, # messages larger than this aren't considered
'pastuidsfile': None,
'passwordfilename': None, # where the password is stored if requested
'savepw': 0, # save the password
# satest is what command is used test if the message is spam
'satest': "spamassassin --exit-code >"+devnull,
# sasave is the one that dumps out a munged message including report
'sasave': "spamassassin",
# what we use to set flags on the original spam in imapbox
'spamflagscmd': "+FLAGS.SILENT",
# and the flags we set them to (none by default)
'spamflags': "(",
# include the spamassassin report in the message placed in spaminbox
'increport': 1,
# expunge before quiting causing all messages marked for deletion
# to actually be deleted
'expunge': 0,
# print imap tracing info
'verbose': 0,
# print stats at end
'stats': 1,
# IMAP implementation detail
# Courier IMAP ignores uid fetches where more than a certain number are listed
# so we break them down into smaller groups of this size
'uidfetchbatchsize': 25,
# password saving stuff. A vague level of obfuscation
'passwordhashlen': 256, # should be a multiple of 16
'passwordhash': None,
'spamreturnvalue': 0} # bogofilter returns different errorlevel
#read rc file
# or we can just ignore any *rc file which is not 0600
rcfilename=os.path.expanduser("~"+os.sep+".isbgrc")
if not(os.path.exists(rcfilename)):
print "File not exists."
else:
rcmode=oct(stat.S_IMODE(os.stat(rcfilename)[stat.ST_MODE]))
if (rcmode == '0600'):
rcconfigfile = ConfigParser.ConfigParser()
rcconfigfile.read(rcfilename)
rcconfig = {}
for sel in rcconfigfile.options('isbg'):
value=rcconfigfile.get('isbg',sel)
rcconfig[sel] = value
config.update(rcconfig)
for sel in ['imapport', 'usessl', 'thresholdsize', 'savepw',
'increport', 'expunge', 'verbose', 'stats', 'uidfetchbatchsize',
'passwordhashlen', 'spamreturnvalue']:
config[sel] = int(config[sel])
# Usage message - note that not all options are documented
def usage():
sslmsg=""
if hasattr(socket, "ssl"):
sslmsg="""
--ssl Make an SSL connection to the IMAP server"""
sys.stderr.write("""isbg: IMAP Spam begone %s
All options are optional
--imaphost hostname IMAP server name [%s]%s
--imapuser username Who you login as [%s]
--imapinbox mbox Name of your inbox folder [%s]
--spaminbox mbox Name of your spam folder [%s]
--maxsize numbytes Messages larger than this will be ignored as they are
unlikely to be spam [%d]
--noreport Don't include the SpamAssassin report in the message
copied to your spam folder
--flag The spams will be flagged in your inbox
--delete The spams will be marked for deletion from your inbox
--expunge Cause marked for deletion messages to also be deleted
(only useful if --delete is specified)
--verbose Show IMAP stuff happening
--spamc Use spamc instead of standalone SpamAssassin binary
--savepw Store the password to be used in future runs
--nostats Don't print stats
(Your inbox will remain untouched unless you specify --flag or --delete)
See http://www.rogerbinns.com/isbg for more details\n""" % (version, config['imaphost'],
sslmsg, config['imapuser'], config['imapinbox'], config['spaminbox'],
config['thresholdsize']))
sys.exit(1)
def errorexit(msg):
sys.stderr.write(msg)
sys.stderr.write("\nUse --help to see valid options and arguments\n")
sys.exit(3)
def addspamflag(flag):
global spamflags
if len(spamflags)>1: spamflags=spamflags+" "
spamflags=spamflags+flag
def hexof(x):
res=""
for i in x: res=res+("%02x" % ord(i))
return res
def hexdigit(c):
if c>='0' and c<='9':
return ord(c)-ord('0')
if c>='a' and c<='f':
return 10+ord(c)-ord('a')
if c>='A' and c<='F':
return 10+ord(c)-ord('a')
raise ValueError(`c`+"is not a valid hexadecimal digit")
def dehexof(x):
res=""
while(len(x)):
res=res+chr( 16*hexdigit(x[0])+ hexdigit(x[1]))
x=x[2:]
return res
## argument processing
longopts=[ "imaphost=", "imapuser=", "imapinbox=", "spaminbox=",
"maxsize=", "noreport", "flag", "delete", "expunge", "verbose",
"trackfile=", "spamc", "ssl", "savepw", "nostats",
# options not mentioned in usage
"imappassword=", "satest=", "sasave=", "spamflagscmd=", "spamflags=",
"help", "version", "imapport=", "passwordfilename="
]
try:
opts, pargs=getopt.getopt(sys.argv[1:], None, longopts)
except Exception,e:
errorexit("option processing failed - "+str(e))
if len(pargs):
errorexit("unrecognised option(s) - "+`pargs`)
for p in opts:
if p[0]=="--maxsize":
try:
config['thresholdsize']=int(p[1])
except:
errorexit("Unrecognized size - "+p[1])
if config['thresholdsize']<1:
errorexit("Size "+`config['thresholdsize']`+" is too small")
elif p[0]=="--imapport":
config['imapport']=int(p[1])
elif p[0]=="--noreport":
config['increport']=0
elif p[0]=="--flag":
addspamflag("\\Flagged")
elif p[0]=="--delete":
addspamflag("\\Deleted")
elif p[0]=="--spamc":
config['satest']="spamc -c >"+devnull
config['sasave']="spamc"
elif p[0]=="--expunge":
config['expunge']=1
elif p[0]=="--verbose":
config['verbose']=1
elif p[0]=="--ssl":
config['usessl']=1
elif p[0]=="--savepw":
config['savepw']=1
elif p[0]=="--nostats":
config['stats']=0
elif p[0]=="--help":
usage()
elif p[0]=="--version":
print version
sys.exit(0)
elif p[0]=="--trackfile":
config['pastuidsfile']=p[1]
else:
locals()[p[0][2:]]=p[1]
print config
# fixup any arguments
if config['spamflags'][-1]!=')':
config['spamflags']=config['spamflags']+')'
if config['imapport']==0:
if config['usessl']: config['imapport']=993
else: config['imapport']=143
if config['pastuidsfile'] is None:
config['pastuidsfile']=os.path.expanduser("~"+os.sep+".isbg-track")
m=md5.new()
m.update(config['imaphost'])
m.update(config['imapuser'])
m.update(`config['imapport']`)
res=hexof(m.digest())
config['pastuidsfile']=config['pastuidsfile']+res
# Password stuff
def getpw(data,hash):
res=""
for i in range(0,config['passwordhashlen']):
c=ord(data[i]) ^ ord(hash[i])
if c==0:
break
res=res+chr(c)
return res
def setpw(pw, hash):
if len(pw)>config['passwordhashlen']:
raise ValueError("password of length %d is too long to store (max accepted is %d)" % (len(pw), config['passwordhashlen']))
res=list(hash)
for i in range(0, len(pw)):
res[i]=chr( ord(res[i]) ^ ord(pw[i]) )
return string.join(res, '')
if config['passwordfilename'] is None:
m=md5.new()
m.update(config['imaphost'])
m.update(config['imapuser'])
m.update(`config['imapport']`)
config['passwordfilename']=os.path.expanduser("~"+os.sep+".isbg-"+hexof(m.digest()))
if config['passwordhash'] is None:
# We make hash that the password is xor'ed against
m=md5.new()
m.update(config['imaphost'])
m.update(m.digest())
m.update(config['imapuser'])
m.update(m.digest())
m.update(`config['imapport']`)
m.update(m.digest())
config['passwordhash']=m.digest()
while len(config['passwordhash'])<config['passwordhashlen']:
m.update(config['passwordhash'])
config['passwordhash']=config['passwordhash']+m.digest()
if config['verbose']:
print "Trackfile is", config['pastuidsfile']
print "SpamFlags are", config['spamflags']
print "Password file is", config['passwordfilename']
# Figure out the password
if config['imappassword'] is None:
if not config['savepw'] and os.path.exists(config['passwordfilename']):
try:
config['imappassword']=getpw(dehexof(open(config['passwordfilename'], "rb").read()), config['passwordhash'])
if config['verbose']: print "Successfully read password file"
except:
pass
# do we have to prompt?
if config['imappassword'] is None:
config['imappassword']=getpass.getpass("IMAP password for %s@%s: " % (config['imapuser'], config['imaphost']))
# Should we save it?
if config['savepw']:
f=open(config['passwordfilename'], "wb+")
try:
os.chmod(config['passwordfilename'], 0600)
except:
pass
f.write(hexof(setpw(config['imappassword'], config['passwordhash'])))
f.close()
# pastuids keeps track of which uids we have already seen, so
# that we don't analyze them multiple times. We store its
# contents between sessions by saving into a file as Python
# code (makes loading it here real easy since we just source
# the file)
pastuids=[]
try:
execfile(config['pastuidsfile'])
except:
pass
# remember what pastuids looked like so that we can compare at the end
origpastuids=pastuids[:]
# This function gets the list of uids corresponding
# to a message range
gure=re.compile(r"[0-9]+ \(UID ([0-9]+)\)")
def getuids(imap, low, high):
range=`low`+":"+`high`
res=imap.fetch(range, "UID")
assertok(res, 'fetch', range, 'UID')
res2=[]
for i in res[1]:
mo=gure.match(i)
if mo is None:
if verbose: print "getuids Eh?", i
else:
res2.append(mo.group(1))
return res2
# This function gets the size of each message in the provided
# list
gsre=re.compile(r"[0-9]+ \(UID ([0-9]+) RFC822.SIZE ([0-9]+)\)")
def getsizes(imap, msgs):
res2=[]
# Python really needs do - while
while 1:
if len(msgs)==0: break
if len(msgs)>config['uidfetchbatchsize']:
msgmore=msgs[config['uidfetchbatchsize']:]
msgs=msgs[:config['uidfetchbatchsize']]
else:
msgmore=[]
msgs=string.join(msgs, ',')
res=imap.uid("FETCH", msgs, "(UID RFC822.SIZE)")
assertok(res, "uid fetch", msgs, "(UID RFC822.SIZE)")
for i in res[1]:
mo=gsre.match(i)
if mo is None:
if config['verbose']: print "getsize Eh?", i
else:
res2.append((mo.group(2), mo.group(1)))
msgs=msgmore
return res2
# This function makes sure that each lines ends in <CR><LF>
# SpamAssassin strips out the <CR> normally
crnlre=re.compile("([^\r])\n", re.DOTALL)
def crnlify(text):
# we have to do it twice to work right since the re includes
# the char preceding \n
return re.sub(crnlre, "\\1\r\n", re.sub(crnlre, "\\1\r\n", text))
# This function checks that the return code is OK
# It also prints out what happened (which would end
# up /dev/null'ed in non-verbose mode)
def assertok(res,*args):
if res[0]!="OK":
sys.stderr.write("\n%s returned %s - aborting\n" % (`args`, res ))
sys.exit(2)
if config['verbose']:
print `args`, "=", res
# This class implements imap over SSL.
class IMAP4S(imaplib.IMAP4):
def __init__(self, host='', port=993): imaplib.IMAP4.__init__(self, host, port)
def open(self, host, port):
self.baresock=socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.baresock.connect( (self.host, self.port) )
self.ssl=socket.ssl( self.baresock )
def read(self, size):
res=""
while len(res)<size:
res=res+self.ssl.read(size-len(res))
return res
def readline(self):
# We can only do one character of lookahead, so this is done character by character
res=""
last=0
while last!="\n":
last=self.ssl.read(1)
res=res+last
return res
def send(self, data):
while len(data):
l=self.ssl.write(data)
if l==len(data): break
data=data[l:]
def shutdown(self):
del self.ssl
self.baresock.close()
def socket(self):
"""Do not send or receive any data on the returned socket otherwise you
will break the ssl connection. Only set socket options and that sort
of thing"""
return self.baresock
# Main code starts here
if config['usessl']:
imap=IMAP4S(config['imaphost'], config['imapport'])
else:
imap=imaplib.IMAP4(config['imaphost'], config['imapport'])
# Authenticate (only simple supported)
res=imap.login(config['imapuser'], config['imappassword'])
assertok(res, "login",config['imapuser'], 'xxxxxxxx')
# check spaminbox exists by examining it
res=imap.select(config['spaminbox'], 1)
assertok(res, 'select', config['spaminbox'], 1)
# select inbox
res=imap.select(config['imapinbox'], 1)
assertok(res, 'select', config['imapinbox'], 1)
# it returns number of messages in response
low=1
high=int(res[1][0])
# get the corresponding UIDs
alluids=getuids(imap,low,high)
uids=[]
for i in alluids:
if i not in pastuids:
uids.append(i)
# for the uids we haven't seen before, get their sizes
# The code originally got both the UIDs and size at the
# same time. This however took significantly longer as
# I assume it stat()ed and perhaps even opened every message,
# even the ones we had seen before
sizeduids=getsizes(imap, uids)
uids=[]
for i in sizeduids:
if int(i[0])>config['thresholdsize']:
pastuids.append(i[1])
if verbose:
print i[1], "is", i[0], "bytes so it is being skipped"
else:
uids.append(i[1])
# Keep track of new spam uids
spamlist=[]
# Main loop that iterates over each new uid we haven't seen before
for u in uids:
# Double check
if u in pastuids: continue
if config['verbose']: print u
# Retrieve the entire message
res=imap.uid("FETCH", u, "(RFC822)")
if res[0]!="OK":
assertok(res, 'uid fetch', u, '(RFC822)')
try:
body=res[1][0][1]
except:
if config['verbose']:
print "Confused - rfc822 fetch gave "+`res`
print "The message was probably deleted while we are running"
pastuids.append(u)
# Feed it to SpamAssassin in test mode
p=os.popen(config['satest'], 'w')
p.write(body)
code=p.close()
errstr="Return code is "+str(code)+" spamreturnvalue is "
errstr+=str(config['spamreturnvalue'])+"."
print errstr
if code == config['spamreturnvalue']:
# Message is below threshold
pastuids.append(u)
else:
# Message is spam
if config['verbose']: print u, "is spam"
spamlist.append(u)
# do we want to include the spam report
if config['increport']:
# filter it through sa
out,inp=popen2.popen2(config['sasave'])
inp.write(body)
inp.close()
body=out.read()
out.close()
body=crnlify(body)
res=imap.append(config['spaminbox'], None, None, body)
assertok(res, 'append', config['spaminbox'], "{body}")
else:
# just copy it as is
res=imap.uid("COPY", u, config['spaminbox'])
assertok(res, "uid copy", u, config['spaminbox'])
# If we found any spams, now go and mark the original messages
if len(spamlist):
res=imap.select(config['imapinbox'])
assertok(res, 'select', config['imapinbox'])
for u in spamlist:
res=imap.uid("STORE", u, config['spamflagscmd'], config['spamflags'])
assertok(res, "uid store", u, config['spamflagscmd'], config['spamflags'])
pastuids.append(u)
# only useful if we marked messages Deleted
if config['expunge']:
imap.expunge()
# sign off
imap.logout()
del imap
# Now tidy up lists of uids
newpastuids=[]
for i in pastuids:
if i in alluids and i not in newpastuids:
newpastuids.append(i)
# only write out pastuids if it has changed
if newpastuids!=origpastuids:
f=open(config['pastuidsfile'], "w+")
try:
os.chmod(config['pastuidsfile'], 0600)
except:
pass
f.write("pastuids=")
f.write(`newpastuids`)
f.write("\n")
f.close()
if config['stats']:
print "%d spams found in %d messages" % (len(spamlist), len(uids))
More information about the Bogofilter
mailing list