[PATCH] -U option as the opposite of -u
Chris Wilkes
cwilkes-bf at ladro.com
Tue Jan 28 19:25:17 CET 2003
Hi,
Many thanks to David Relson who told me about this list and helped me
through some of the code.
I use bogofilter's -u switch to automatically register mail whenever
it comes in. Once in a while an email is mislabeled and I would like to
undo it. I would like to have BF be smart enough to figure out if it
should run the -S or the -N switch based on its current value.
So I put in a patch to BF 0.10.1.2 that gives a -U switch. In a
nutshell I've added:
run_type = RUN_UPDATE_REVERSED
to the possibilities of run_type. That part of bogofilter.c has also
changed a bit with the addition of a 'todo' variable that's the same
type as run_type and gets set to what should be called in the
register_words() function.
I modified the bogofilter.xml file to update the man pages, but I
couldn't do a make on it as I couldn't figure out how to install the
getopt longparamaters package on my FreeBSD box required for xmlto. If
anyone has a pointer, please let me know.
Here's what happens when you run with -U where $s is the email that
was marked as good and should be spam:
$ bogofilter -v < $s
X-Bogosity: No, tests=bogofilter, spamicity=0.491674, version=0.10.1.2
$ bogofilter -U < $s
$ bogofilter -v < $s
X-Bogosity: Yes, tests=bogofilter, spamicity=0.660724, version=0.10.1.2
The files I've changed:
config.c # add "todo" and the -U switch to getopt
main.c # add case of RUN_UPDATE_REVERSED to switch
common.h # made "todo" of type run_t -- could change this
bogofilter.c # updated the register_words() call
doc/bogofilter.xml # add -U to the man page
robinson.c # have to put run_type = RUN_UPDATE_REVERSED in if()
Chris
--- config.c Fri Jan 24 05:48:43 2003
+++ config.c.new Tue Jan 28 10:16:41 2003
@@ -69,6 +69,7 @@
bool stats_in_header = true;
run_t run_type = RUN_NORMAL;
+run_t todo;
const char *logtag = NULL;
@@ -362,8 +363,8 @@
{
bool registration, classification;
-/* flags '-s', '-n', '-S', or '-N', are mutually exclusive of flags '-p', '-u', '-e', and '-R'. */
- classification = (run_type == RUN_NORMAL) ||(run_type == RUN_UPDATE) || passthrough || nonspam_exits_zero || (Rtable != 0);
+/* flags '-s', '-n', '-S', or '-N', are mutually exclusive of flags '-p', '-u', '-U', '-e', and '-R'. */
+ classification = (run_type == RUN_NORMAL) ||(run_type == RUN_UPDATE) || passthrough || nonspam_exits_zero || (Rtable != 0) || (run_type == RUN_UPDATE_REVERSED);
registration = (run_type == REG_SPAM) || (run_type == REG_GOOD) || (run_type == REG_GOOD_TO_SPAM) || (run_type == REG_SPAM_TO_GOOD);
if (*outfname && !passthrough)
@@ -379,7 +380,7 @@
"Error: Invalid combination of options.\n"
"\n"
" Options '-s', '-n', '-S', and '-N' are used when registering words.\n"
- " Options '-p', '-u', '-e', and '-R' are used when classifying messages.\n"
+ " Options '-p', '-u', '-U', '-e', and '-R' are used when classifying messages.\n"
" The two sets of options may not be used together.\n"
" \n"
#ifdef GRAHAM_AND_ROBINSON
@@ -422,6 +423,7 @@
"\t-n\t- register message as non-spam.\n"
"\t-o val\t- set user defined spamicity cutoff.\n"
"\t-u\t- classify message as spam or non-spam and register accordingly.\n"
+ "\t-U\t- reverse classification (i.e. was registered as spam but is good) of message\n"
"\t-S\t- move message's words from non-spam list to spam list.\n"
"\t-N\t- move message's words from spam list to spam non-list.\n"
"\t-R\t- print an R data frame.\n"
@@ -486,7 +488,7 @@
fpin = stdin;
- while ((option = getopt(argc, argv, "23d:eFhl::o:snSNvVpuc:CgrRx:fqtI:O:y:k:" G R F)) != EOF)
+ while ((option = getopt(argc, argv, "23d:eFhl::o:snSNvVpuUc:CgrRx:fqtI:O:y:k:" G R F)) != EOF)
{
switch(option)
{
@@ -560,6 +562,10 @@
case 'u':
run_type = RUN_UPDATE;
+ break;
+
+ case 'U':
+ run_type = RUN_UPDATE_REVERSED;
break;
case 'k':
--- main.c Sun Jan 19 07:02:00 2003
+++ main.c.new Tue Jan 28 10:16:41 2003
@@ -98,6 +98,7 @@
switch(run_type) {
case RUN_NORMAL:
case RUN_UPDATE:
+ case RUN_UPDATE_REVERSED:
{
double spamicity;
rc_t status = bogofilter(&spamicity);
@@ -188,6 +189,9 @@
syslog(LOG_INFO, "%s\n", msg_bogofilter);
break;
case RUN_UPDATE:
+ syslog(LOG_INFO, "%s, %s\n", msg_bogofilter, msg_register);
+ break;
+ case RUN_UPDATE_REVERSED: /* what is the msg_register? */
syslog(LOG_INFO, "%s, %s\n", msg_bogofilter, msg_register);
break;
default:
--- common.h Tue Jan 21 19:04:19 2003
+++ common.h.new Tue Jan 28 10:16:41 2003
@@ -57,10 +57,12 @@
typedef enum run_e {
RUN_NORMAL='r',
RUN_UPDATE='u',
+ RUN_UPDATE_REVERSED='U',
REG_SPAM='s', REG_SPAM_TO_GOOD='N',
- REG_GOOD='n', REG_GOOD_TO_SPAM='S'
+ REG_GOOD='n', REG_GOOD_TO_SPAM='S',
} run_t;
extern run_t run_type;
+extern run_t todo;
typedef struct {
double mant;
--- bogofilter.c Sun Jan 19 07:01:50 2003
+++ bogofilter.c.new Tue Jan 28 10:16:41 2003
@@ -58,6 +58,7 @@
wordhash_t *wordhash;
long wordcount, msgcount = 0;
bool cont;
+ /* todo is like run_type */
set_list_active_status(true);
@@ -76,13 +77,22 @@
if (xss != NULL)
*xss = spamicity;
- if (run_type == RUN_UPDATE) /* Note: don't register if RC_UNSURE */
- {
- if (status == RC_SPAM)
- register_words(REG_SPAM, wordhash, msgcount, wordcount);
- if (status == RC_HAM)
- register_words(REG_GOOD, wordhash, msgcount, wordcount);
- }
+ /* updated to include the _REVERSED method */
+ if (run_type == RUN_UPDATE) /* Note: don't register if RC_UNSURE */
+ {
+ if (status == RC_SPAM) todo=REG_SPAM;
+ if (status == RC_HAM) todo=REG_GOOD;
+ }
+ if (run_type == RUN_UPDATE_REVERSED) /* opposite of above */
+ {
+ /* if it was SPAM it should be re-classified as GOOD */
+ if (status == RC_SPAM) todo=REG_SPAM_TO_GOOD;
+ /* if it was GOOD it should be re-classified as SPAM */
+ if (status == RC_HAM) todo=REG_GOOD_TO_SPAM;
+ }
+ /* now that we're through the above choices, actually do something */
+ if (todo)
+ register_words(todo, wordhash, msgcount, wordcount);
wordhash_free(wordhash);
--- doc/bogofilter.xml Wed Jan 22 12:22:20 2003
+++ doc/bogofilter.xml.new Tue Jan 28 10:16:41 2003
@@ -31,6 +31,7 @@
<arg choice='opt'>-3</arg>
<arg choice='opt'>-R</arg>
<arg choice='opt'>-u</arg>
+ <arg choice='opt'>-U</arg>
<arg choice='opt'>-v</arg>
<arg choice='opt'>-V</arg>
<arg choice='opt'>-x <replaceable>flags</replaceable></arg>
@@ -137,6 +138,14 @@
on the spamlist and a non-spam message on the goodlist. If using the
Robinson-Fisher method and the classification is "unsure", the message will
not be registered.</para>
+
+<para>The <option>-U</option> option tells
+<application>bogofilter</application> to register the message's text
+AS THE OPPOSITE of what it currently is classified as. This is so that if
+a message was classified as spam and it is really non-spam you can just
+send it through bogofilter again with the -U option to unregister the
+words in the spamlist and register them in the goodlist. Its essentially
+a -S or -N after knowing the spamicity of the mail.</para>
<para>The <option>-2</option> option tells
<application>bogofilter</application> to binary classify the message as either
--- robinson.c Tue Jan 21 20:41:10 2003
+++ robinson.c.new Tue Jan 28 10:16:41 2003
@@ -293,7 +293,7 @@
** If we're registering tokens, we needn't get .MSG_COUNT
*/
- if (run_type == RUN_NORMAL || run_type == RUN_UPDATE) {
+ if (run_type == RUN_NORMAL || run_type == RUN_UPDATE || run_type == RUN_UPDATE_REVERSED) {
scalefactor = compute_scale();
if (fabs(robs) < EPS)
robs = ROBS;
More information about the bogofilter-dev
mailing list