Please review batch mode patch from Mike Tillberg, SF #648494

Matthias Andree matthias.andree at gmx.de
Sat Dec 7 04:52:12 CET 2002


Hi,

find attached Mike Tillberg's patch rediffed to unified diff and
adjusted to current CVS. Please comment. Original submission is at
http://sourceforge.net/tracker/index.php?func=detail&aid=648494&group_id=62265&atid=499999

His original description from the tracker:

| This patch allows bogofilter to process multiple
| messages in -p mode. I haven't tested it in -u mode,
| but since it goes through all the DB access routines
| for each message, it should work. It's a quick hack,
| and I haven't thought any implications so use at your
| own risk. The patch makes two changes:
| 
| bogofilter() takes a boolean pointer to return the
| continuation bool from collect_words, and main wraps
| everything after initialization in a do while loop.
| 
| lexer.l provides a reset function that clears out the
| textblock list is maintains to output the original
| message. The prevents the messages from piling up and
| being output multiple times.

-- 
Matthias Andree
-------------- next part --------------
Index: bogofilter.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.c,v
retrieving revision 1.93
diff -u -r1.93 bogofilter.c
--- bogofilter.c	6 Dec 2002 00:07:33 -0000	1.93
+++ bogofilter.c	7 Dec 2002 03:49:29 -0000
@@ -50,14 +50,14 @@
     method->print_stats(fp, spamicity);
 }
 
-rc_t bogofilter(double *xss) /*@globals errno@*/
+rc_t bogofilter(double *xss, bool *cont) /*@globals errno@*/
 /* evaluate text for spamicity */
 {
     rc_t	status;
     double 	spamicity;
     wordhash_t  *wordhash;
     long	wordcount, msgcount = 0;
-    bool	cont;
+    /*bool	cont;*/
 
     good_list.active = spam_list.active = true;
 
@@ -70,9 +70,9 @@
 
     /* tokenize input text and save words in a wordhash. */
     do {
-	collect_words(&wordhash, &wordcount, &cont);
+	collect_words(&wordhash, &wordcount, cont);
 	++msgcount;
-    } while(cont);
+    } while(*cont);
 
     spamicity = method->compute_spamicity(wordhash, NULL);
 
Index: bogofilter.h
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/bogofilter.h,v
retrieving revision 1.33
diff -u -r1.33 bogofilter.h
--- bogofilter.h	30 Nov 2002 22:38:38 -0000	1.33
+++ bogofilter.h	7 Dec 2002 03:49:29 -0000
@@ -11,7 +11,7 @@
 typedef enum rc_e {RC_SPAM=0, RC_HAM=1, RC_UNSURE=2}  rc_t;
 
 extern void initialize_constants(void);
-extern rc_t bogofilter(/*@out@*/ double *xss);
+extern rc_t bogofilter(/*@out@*/ double *xss, bool *cont);
 extern void print_stats(FILE *fp, double spamicity);
 
 #endif	/* HAVE_BOGOFILTER_H */
Index: collect.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/collect.c,v
retrieving revision 1.3
diff -u -r1.3 collect.c
--- collect.c	29 Nov 2002 15:24:02 -0000	1.3
+++ collect.c	7 Dec 2002 03:49:29 -0000
@@ -46,6 +46,7 @@
 
     wordprop_t *w;
     wordhash_t *h = wordhash_init();
+    reset_lexer();
 
     for (;;){
 	token_t token_type = get_token();
Index: lexer.h
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/lexer.h,v
retrieving revision 1.16
diff -u -r1.16 lexer.h
--- lexer.h	4 Dec 2002 17:09:47 -0000	1.16
+++ lexer.h	7 Dec 2002 03:49:29 -0000
@@ -32,5 +32,6 @@
 extern int lexer_fgets(char *buf, int max_size, FILE *s);
 extern token_t get_token(void);
 extern token_t yylex(void);
+void reset_lexer(void);
 
 #endif	/* HAVE_LEXER_H */
Index: lexer.l
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/lexer.l,v
retrieving revision 1.41
diff -u -r1.41 lexer.l
--- lexer.l	6 Dec 2002 23:57:33 -0000	1.41
+++ lexer.l	7 Dec 2002 03:49:29 -0000
@@ -387,6 +387,13 @@
     return(class);
 }
 
+void reset_lexer(void)
+{
+  bzero(&textblocks, sizeof(struct textblock));
+  textend = &textblocks;
+}
+
+
 /*
  * The following sets edit modes for GNU EMACS
  * Local Variables:
Index: main.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/main.c,v
retrieving revision 1.92
diff -u -r1.92 main.c
--- main.c	5 Dec 2002 14:16:35 -0000	1.92
+++ main.c	7 Dec 2002 03:49:29 -0000
@@ -113,6 +113,7 @@
 int main(int argc, char **argv) /*@globals errno,stderr,stdout@*/
 {
     int   exitcode;
+    bool  cont;
 
     if ((set_dir_from_env(directory, "HOME", BOGODIR, sizeof(directory)) < 0)
 	|| (set_dir_from_env(directory, "BOGOFILTER_DIR", NULL, sizeof(directory)) < 0)) {
@@ -132,12 +133,15 @@
 
     process_config_files();
 
+    cont = 0;
+
+    do {
     switch(run_type) {
 	case RUN_NORMAL:
 	case RUN_UPDATE:
 	    {
 		double spamicity;
-		rc_t   status = bogofilter(&spamicity);
+		rc_t   status = bogofilter(&spamicity, &cont);
 
 		if (passthrough)
 		{
@@ -205,8 +209,6 @@
 	    break;
     }
 
-    close_lists();
-
 #ifdef HAVE_SYSLOG_H
     if (logflag)
     {
@@ -228,6 +230,9 @@
 	closelog();
     }
 #endif
+    } while (cont);
+
+    close_lists();
 
     exit(exitcode);
 }



More information about the bogofilter-dev mailing list