[PATCH] bulkmode registration

David Relson relson at osagesoftware.com
Wed May 7 01:37:24 CEST 2003


Greetings,

For all of you who have been clamoring for registration of maildirs, 
attached is a patch relative to the cvs repository.

With the attached patch, bogofilter passes "make check" as well as a 
cursory test (27 messages, registered via "bogofilter -s -b -v < list -d 
dir").  The code is believed to work, though it hasn't been stress tested.

Volunteers needed to test it more thoroughly!

David

P.S.  If there's need, I can make a .tgz available with current source code.
-------------- next part --------------
Index: bogoconfig.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/bogoconfig.c,v
retrieving revision 1.51
diff -u -r1.51 bogoconfig.c
--- bogoconfig.c	2 May 2003 23:03:04 -0000	1.51
+++ bogoconfig.c	6 May 2003 23:26:54 -0000
@@ -321,9 +321,9 @@
 */
 	);
     (void)fprintf(stderr,
-		  "\t  -M      - set mailbox mode. Classify multiple messages in an mbox formatted file.\n"
-		  "\t  -b      - set streaming bulk mode. Classify multiple messages whose filenames are read from STDIN.\n"
-		  "\t  -B name1 name2 ... - set bulk mode. Classify multiple messages named as files on the command line.\n"
+		  "\t  -M      - set mailbox mode.  Classify multiple messages in an mbox formatted file.\n"
+		  "\t  -b      - set streaming bulk mode. Process multiple messages whose filenames are read from STDIN.\n"
+		  "\t  -B name1 name2 ... - set bulk mode. Process multiple messages named as files on the command line.\n"
 		  "\t  -F      - force printing of spamicity numbers.\n"
 		  "\t  -R      - print an R data frame.\n"
 	);
Index: bogofilter.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/bogofilter.c,v
retrieving revision 1.10
diff -u -r1.10 bogofilter.c
--- bogofilter.c	21 Apr 2003 17:03:17 -0000	1.10
+++ bogofilter.c	6 May 2003 23:26:54 -0000
@@ -46,7 +46,7 @@
     method->print_stats(fp);
 }
 
-rc_t bogofilter(double *xss) /*@globals errno@*/
+rc_t bogofilter()
 /* evaluate text for spamicity */
 {
     rc_t	status;
@@ -77,9 +77,6 @@
     spamicity = method->compute_spamicity(wordhash, NULL);
 
     status = method->status();
-
-    if (xss != NULL)
-        *xss = spamicity;
 
     if (run_type & RUN_UPDATE)		/* Note: don't register if RC_UNSURE */
     {
Index: bogofilter.h
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/bogofilter.h,v
retrieving revision 1.4
diff -u -r1.4 bogofilter.h
--- bogofilter.h	19 Apr 2003 01:09:02 -0000	1.4
+++ bogofilter.h	6 May 2003 23:26:54 -0000
@@ -8,10 +8,10 @@
 
 #define DEVIATION(n)	fabs((n) - EVEN_ODDS)	/* deviation from average */
 
-typedef enum rc_e {RC_SPAM=0, RC_HAM=1, RC_UNSURE=2, RC_MORE}  rc_t;
+typedef enum rc_e {RC_SPAM=0, RC_HAM=1, RC_UNSURE=2, RC_OK, RC_MORE}  rc_t;
 
 extern void initialize_constants(void);
-extern rc_t bogofilter(/*@out@*/ double *xss);
+extern rc_t bogofilter(void);
 extern void print_stats(FILE *fp);
 
 #endif	/* BOGOFILTER_H */
Index: main.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/main.c,v
retrieving revision 1.40
diff -u -r1.40 main.c
--- main.c	28 Apr 2003 04:01:59 -0000	1.40
+++ main.c	6 May 2003 23:26:54 -0000
@@ -53,6 +53,8 @@
 
 extern int Rtable;
 
+FILE  *fpo;
+
 char msg_register[256];
 char msg_bogofilter[256];
 size_t msg_register_size = sizeof(msg_register);
@@ -74,7 +76,7 @@
     exit(exitcode);
 }
 
-static int classify(int argc, char **argv, FILE *out);
+static rc_t classify(void);
 static void initialize(FILE *fp);
 
 static void initialize(FILE *fp)
@@ -86,7 +88,9 @@
     lexer_v3_init(fp);
 }
 
-static int classify(int argc, char **argv, FILE *out)
+typedef rc_t (*arg_foreach_t)(void);
+
+static int arg_foreach(arg_foreach_t hook, int argc, char **argv)
 {
     int  exitcode = 0;
     bool error = false;
@@ -133,31 +137,24 @@
 		continue;
 	    }
 	    initialize(fpin);
-	    fprintf(out, "%s ", filename ); 
+	    fprintf(dbgout, "%s ", filename ); 
 	}
 
-	passthrough_setup();
-	do {
-	    init_msg_counts();
-	    token_init();
-	    init_charset_table(charset_default, true);
-
-	    status = bogofilter(NULL);
-	    write_message(out, status);
-
-	    rstats_cleanup();
-	} while (status == RC_MORE);
-
-	passthrough_cleanup();
-
-	if (bulk_mode == B_NORMAL && status != RC_MORE) {
-	    exitcode = (status == RC_SPAM) ? 0 : 1;
-	    if (nonspam_exits_zero && passthrough && exitcode == 1)
-		exitcode = 0;
-	    done = true;
-	}
-	else {
-	    exitcode = !error ? 0 : 1;
+	status = hook();
+
+	exitcode = !error ? 0 : 1;
+
+	if (bulk_mode == B_NORMAL) {
+	    if ((run_type & (REG_SPAM | REG_GOOD | UNREG_SPAM | UNREG_GOOD)) != 0)
+		done = true;
+	    else {
+		if (status != RC_MORE) {
+		    exitcode = (status == RC_SPAM) ? 0 : 1;
+		    if (nonspam_exits_zero && passthrough && exitcode == 1)
+			exitcode = 0;
+		    done = true;
+		}
+	    }
 	}
     }
     return exitcode;
@@ -166,23 +163,21 @@
 int main(int argc, char **argv) /*@globals errno,stderr,stdout@*/
 {
     int   exitcode;
-    FILE  *out;
 
     process_args_and_config_file(argc, argv, true);
 
     /* open all wordlists */
     open_wordlists((run_type == RUN_NORMAL) ? DB_READ : DB_WRITE);
 
-    out = output_setup();
+    fpo = output_setup();
 
     initialize(NULL);
 
     if (run_type & (RUN_NORMAL | RUN_UPDATE)) {
-	exitcode = classify(argc, argv, out);
+	exitcode = arg_foreach(classify, argc, argv);
     }
     else {
-	register_messages(run_type);
-	exitcode = 0;
+	exitcode = arg_foreach(register_messages, argc, argv);
     }
 
     close_wordlists(false);
@@ -251,6 +246,27 @@
 }
 
 typedef int (*readfunc_t)(char **, void *);
+
+static rc_t classify()
+{
+    rc_t status = RC_MORE;
+
+    passthrough_setup();
+    do {
+	init_msg_counts();
+	token_init();
+	init_charset_table(charset_default, true);
+	
+	status = bogofilter();
+	write_message(fpo, status);
+	
+	rstats_cleanup();
+    } while (status == RC_MORE);
+    
+    passthrough_cleanup();
+
+    return status;
+}
 
 static void write_message(FILE *fp, rc_t status)
 {
Index: register.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/register.c,v
retrieving revision 1.11
diff -u -r1.11 register.c
--- register.c	19 Apr 2003 01:09:02 -0000	1.11
+++ register.c	6 May 2003 23:26:54 -0000
@@ -53,7 +53,7 @@
   format_log_update(msg_register, msg_register_size, u, r, wordcount, msgcount);
 
   if (verbose)
-    (void)fprintf(stderr, "# %d word%s, %d message%s\n", 
+    (void)fprintf(dbgout, "# %d word%s, %d message%s\n", 
 		  wordcount, PLURAL(wordcount), msgcount, PLURAL(msgcount));
 
   set_list_active_status(false);
@@ -128,7 +128,7 @@
  * cap-and-accumulation phase. we save more than half of the execution
  * time for big mbox inputs, when teaching bogofilter.
  */
-void register_messages(run_t _run_type)
+rc_t register_messages()
 {
   wordhash_t *words = wordhash_init();
   long	msgcount = 0;
@@ -148,6 +148,8 @@
   } while (token_type != NONE);
 
   wordhash_sort(words);
-  register_words(_run_type, words, msgcount);
+  register_words(run_type, words, msgcount);
   wordhash_free(words);
+
+  return RC_OK;
 }
Index: register.h
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/register.h,v
retrieving revision 1.2
diff -u -r1.2 register.h
--- register.h	16 Apr 2003 17:30:17 -0000	1.2
+++ register.h	6 May 2003 23:26:54 -0000
@@ -6,7 +6,7 @@
 
 #include <wordhash.h>
 
-extern void register_messages(run_t _run_type);
+extern rc_t register_messages(void);
 extern void register_words(run_t _run_type, wordhash_t *h, int msgcount);
 
 #endif	/* REGISTER_H */
Index: tests/bogofilter/t.bulkmode
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/tests/bogofilter/t.bulkmode,v
retrieving revision 1.12
diff -u -r1.12 t.bulkmode
--- tests/bogofilter/t.bulkmode	28 Apr 2003 04:02:04 -0000	1.12
+++ tests/bogofilter/t.bulkmode	6 May 2003 23:26:54 -0000
@@ -93,20 +93,20 @@
 # test scoring of files listed on stdin
 
 NAME="bulk-stdin"
-ls $pattern | $BOGOFILTER -c $CFG $OPT -b | \
+ls $pattern | $BOGOFILTER -c $CFG $OPT -b -D | \
     sed s at .*inputs/@./inputs/@ >${TMPDIR}/$NAME.out
 
 # test scoring of files listed on linend
 
 NAME="bulk-linend"
-$BOGOFILTER -c $CFG $OPT -B `ls $pattern` | \
+$BOGOFILTER -c $CFG $OPT -B -D `ls $pattern` | \
     sed s at .*inputs/@./inputs/@ >${TMPDIR}/$NAME.out >${TMPDIR}/$NAME.out
 
 # test scoring each file twice (using linend)
 
 NAME="bulk-double-1"
 for f in $pattern ; do 
-    map_rc $BOGOFILTER -c $CFG $OPT -B $f $f | \
+    map_rc $BOGOFILTER -c $CFG $OPT -B -D $f $f | \
 	sed s at .*inputs/@./inputs/@ >> ${TMPDIR}/$NAME.tmp
 done
 sort -u < ${TMPDIR}/$NAME.tmp > ${TMPDIR}/$NAME.out
@@ -117,7 +117,7 @@
 for f in $pattern ; do 
     t="${TMPDIR}/`basename $f`"
     grep -v "^From "< $f > $t
-    map_rc $BOGOFILTER -c $CFG $OPT -B $t $t | \
+    map_rc $BOGOFILTER -c $CFG $OPT -B -D $t $t | \
 	sed s at .*/@./inputs/@ >> ${TMPDIR}/$NAME.tmp
 done
 sort -u < ${TMPDIR}/$NAME.tmp > ${TMPDIR}/$NAME.out



More information about the bogofilter-dev mailing list