bulk_mode patch - version 2

David Relson relson at osagesoftware.com
Mon Apr 14 17:18:26 CEST 2003


Michael,

Here's version 2 of the bulkmode patch.  I've added a number of 
initialization/reset functions and calls to them.

I've run it on a variety of test messages and see no problems.

This patch replaces the previous patch.  You can revert the old patch and 
apply the new one, or you can start with a clean source tree and apply the 
new patch.  The patch is relative to version 0.11.2, though I think it's 
applicable 0.11.1.8 or anything newer.

Please keep the list posted on your use of bulkmode.

Cheers!

David
-------------- next part --------------
Index: bogoconfig.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/bogoconfig.c,v
retrieving revision 1.42
diff -u -r1.42 bogoconfig.c
--- bogoconfig.c	13 Apr 2003 16:32:53 -0000	1.42
+++ bogoconfig.c	14 Apr 2003 15:11:20 -0000
@@ -330,6 +330,9 @@
 		  "\t-q\t- quiet - don't print warning messages.\n"
 		  "\t-l\t- write messages to syslog.\n");
     (void)fprintf(stderr,
+		  "\t-b\t- set streaming bulk mode. Classify multiple messages whose filenames are read from STDIN.\n"
+		  "\t-B name1 name2 ...\t- set bulk mode. Classify multiple messages named as files on the command line.\n");
+    (void)fprintf(stderr,
 		  "\t-L tag\t- specify the tag value for log messages.\n"
 		  "\t-F\t- force printing of spamicity numbers.\n"
 		  "\t-x list\t- set debug flags.\n"
@@ -404,7 +407,7 @@
 #if HAVE_DECL_OPTRESET
     optreset = 1;
 #endif
-    while ((option = getopt(argc, argv, ":23d:eFhlL:m:o:snSNvVpuc:CgrRfqtI:O:y:x:DT" G R F)) != -1)
+    while ((option = getopt(argc, argv, ":23d:eFhlL:m:o:snSNvVpuc:CgrRfqtI:O:y:x:BbDT" G R F)) != -1)
     {
 #if 0
 	if (getenv("BOGOFILTER_DEBUG_OPTIONS")) {
@@ -566,6 +569,15 @@
 	    today = string_to_date((char *)optarg);
 	    break;
 
+	case 'B':
+	    bulk_mode = B_CMDLINE;
+	    break;
+
+	case 'b':
+	    bulk_mode = B_STDIN;
+	    fpin = NULL;	/* Ensure that input file isn't stdin */
+	    break;
+
 	case 'D':
 	    dbgout = stdout;
 	    break;
@@ -592,10 +604,13 @@
     if (exitcode) 
 	exit (exitcode);
 
-    if (optind < argc) {
+    if (bulk_mode == B_NORMAL && optind < argc) {
 	fprintf(stderr, "Extra arguments given, first: %s. Aborting.\n", argv[optind]);
 	exit(2);
     }
+
+    if (bulk_mode == B_CMDLINE)
+ 	bulk_mode = optind;	/* save index of first filename */
 
     return;
 }
Index: common.h
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/common.h,v
retrieving revision 1.6
diff -u -r1.6 common.h
--- common.h	7 Apr 2003 11:18:09 -0000	1.6
+++ common.h	14 Apr 2003 15:11:20 -0000
@@ -32,8 +32,6 @@
 #define PATH_LEN 1024
 #endif
 
-#include "globals.h"
-
 /* Default build includes Graham, Robinson, and Robinson-Fisher methods */
 
 #if	defined(ENABLE_ROBINSON_METHOD) || defined(ENABLE_ROBINSON_FISHER)
@@ -81,6 +79,14 @@
     PR_ENV_BOGO,	/* 5 */
     PR_COMMAND		/* 6 */
 } priority_t;
+
+typedef enum bulk_e {
+    B_NORMAL,
+    B_CMDLINE,
+    B_STDIN
+} bulk_t;
+
+#include "globals.h"
 
 extern int build_path(char* dest, size_t size, const char* dir, const char* file);
 
Index: globals.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/globals.c,v
retrieving revision 1.10
diff -u -r1.10 globals.c
--- globals.c	9 Apr 2003 21:54:34 -0000	1.10
+++ globals.c	14 Apr 2003 15:11:20 -0000
@@ -29,6 +29,7 @@
 bool	quiet;				/* '-q' */
 bool	terse;				/* '-t' */
 int	verbose;			/* '-v' */
+int	bulk_mode = B_NORMAL;		/* '-b, -B' */
 
 FILE	*fpin = NULL;			/* '-I' */
 int	Rtable = 0;			/* '-R' */
Index: globals.h
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/globals.h,v
retrieving revision 1.11
diff -u -r1.11 globals.h
--- globals.h	9 Apr 2003 21:54:34 -0000	1.11
+++ globals.h	14 Apr 2003 15:11:20 -0000
@@ -24,6 +24,7 @@
 extern	bool	terse;			/* '-t' */
 extern	bool	quiet;			/* '-q' */
 extern	bool	passthrough;		/* '-p' */
+extern	int	bulk_mode;		/* '-B' */
 extern	int	verbose;		/* '-v' */
 extern	FILE	*fpin;			/* '-I' */
 
Index: lexer.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/lexer.c,v
retrieving revision 1.20
diff -u -r1.20 lexer.c
--- lexer.c	28 Mar 2003 15:16:00 -0000	1.20
+++ lexer.c	14 Apr 2003 15:11:20 -0000
@@ -258,6 +258,12 @@
     return cnt;
 }
 
+void yyinit(void)
+{
+    yylineno = 0;
+    msg_header = true;
+}
+
 int yyinput(byte *buf, size_t max_size)
 /* input getter for the scanner */
 {
Index: lexer.h
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/lexer.h,v
retrieving revision 1.7
diff -u -r1.7 lexer.h
--- lexer.h	3 Apr 2003 21:28:22 -0000	1.7
+++ lexer.h	14 Apr 2003 15:11:20 -0000
@@ -41,11 +41,13 @@
 extern token_t	lexer_v3_lex(void);
 extern int	lexer_v3_leng;
 extern char   * lexer_v3_text;
+extern void	lexer_v3_init(FILE *fp);
 
 /* in lexer.c */
-extern int yyinput(byte *buf, size_t size);
-extern int yyredo(word_t *text, char del);
+extern void	yyinit(void);
+extern int	yyinput(byte *buf, size_t size);
+extern int	yyredo(word_t *text, char del);
 
-extern int buff_fill(buff_t *buff, size_t used, size_t need);
+extern int	buff_fill(buff_t *buff, size_t used, size_t need);
 
 #endif	/* LEXER_H */
Index: lexer_v3.l
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/lexer_v3.l,v
retrieving revision 1.3
diff -u -r1.3 lexer_v3.l
--- lexer_v3.l	1 Mar 2003 02:30:25 -0000	1.3
+++ lexer_v3.l	14 Apr 2003 15:11:20 -0000
@@ -67,6 +67,7 @@
 
 %}
 
+%option debug nodebug
 %option align nounput noyywrap noreject 8bit caseless
 %option prefix="lexer_v3_"
 
@@ -147,6 +148,12 @@
 \n						{ got_newline(); }
 
 %%
+
+void lexer_v3_init(FILE *fp)
+{
+    BEGIN(INITIAL);
+    yyrestart(fp);
+}
 
 /*
  * The following sets edit modes for GNU EMACS
Index: main.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/main.c,v
retrieving revision 1.26
diff -u -r1.26 main.c
--- main.c	9 Apr 2003 22:45:10 -0000	1.26
+++ main.c	14 Apr 2003 15:11:20 -0000
@@ -69,6 +69,83 @@
     exit(exitcode);
 }
 
+int classify(int argc, char **argv, FILE *out);
+void initialize(FILE *fp);
+
+void initialize(FILE *fp)
+{
+    init_charset_table(charset_default, true);
+    mime_reset();
+    token_init();
+    if (fp)
+	lexer_v3_init(fpin);
+}
+
+int classify(int argc, char **argv, FILE *out)
+{
+    int   exitcode = 0;
+    bool done = false;
+    bool error = false;
+    double spamicity;
+    rc_t   status;
+    char *filename;
+    char buff[PATH_LEN+1];
+
+    while (!done && !error) {
+	switch (bulk_mode) {
+	case B_NORMAL:
+	    break;
+	case B_STDIN:	/* streaming (stdin) mode */
+	{
+	    size_t len;
+	    filename = buff;
+	    if (fgets(buff, sizeof(buff), stdin) == 0) {
+		error = true;
+		continue;
+	    }
+	    len = strlen(filename);
+	    if (len > 0 && filename[len-1] == '\n')
+		filename[len-1] = '\0';
+	    break;
+	}
+	default:		/* command line mode */
+	    if (bulk_mode < argc && !error) {
+		filename = argv[bulk_mode++];
+	    }
+	    else {
+		done = true;
+		continue;
+	    }
+	    break;
+	}
+	if (bulk_mode != B_NORMAL) {
+	    if (fpin)
+		fclose(fpin);
+	    fpin = fopen( filename, "r" );
+	    if (fpin == NULL) {
+		error = true;
+		fprintf(stderr, "Can't read file '%s'\n", filename);
+		continue;
+	    }
+	    fprintf(out, "%s ", filename ); 
+	}
+
+	initialize(fpin);
+	status = bogofilter(&spamicity);
+	write_message(out, status);
+	if (bulk_mode == B_NORMAL) {
+	    exitcode = (status == RC_SPAM) ? 0 : 1;
+	    if (nonspam_exits_zero && passthrough && exitcode == 1)
+		exitcode = 0;
+	    done = true;
+	}
+	else {
+	    exitcode = !error ? 0 : 1;
+	}
+    }
+    return exitcode;
+}
+
 int main(int argc, char **argv) /*@globals errno,stderr,stdout@*/
 {
     int   exitcode = 0;
@@ -76,9 +153,6 @@
 
     process_args_and_config_file(argc, argv, true);
 
-    /* initialize */
-    init_charset_table(charset_default, true);
-
     /* open all wordlists */
     open_wordlists((run_type == RUN_NORMAL) ? DB_READ : DB_WRITE);
 
@@ -122,22 +196,12 @@
 	}
     }
 
-    mime_reset();
+    initialize(NULL);
 
     if (run_type & (RUN_NORMAL | RUN_UPDATE))
-    {
-	double spamicity;
-	rc_t   status = bogofilter(&spamicity);
-
-	write_message(out, status);
-
-	exitcode = (status == RC_SPAM) ? 0 : 1;
-	if (nonspam_exits_zero && passthrough && exitcode == 1)
-	    exitcode = 0;
-    }
-    else {
+	exitcode = classify(argc, argv, out);
+    else
 	register_messages(run_type);
-    }
 
     if (passthrough) {
 	switch(passmode) {
Index: token.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/token.c,v
retrieving revision 1.9
diff -u -r1.9 token.c
--- token.c	28 Mar 2003 15:16:02 -0000	1.9
+++ token.c	14 Apr 2003 15:11:20 -0000
@@ -193,10 +193,16 @@
     return(class);
 }
 
-void got_from(void)
+void token_init(void)
 {
+    yyinit();
     mime_reset(); 
     reset_html_level();
+}
+
+void got_from(void)
+{
+    token_init();
 }
 
 void got_newline()
Index: token.h
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/token.h,v
retrieving revision 1.5
diff -u -r1.5 token.h
--- token.h	28 Mar 2003 15:16:03 -0000	1.5
+++ token.h	14 Apr 2003 15:11:20 -0000
@@ -21,6 +21,7 @@
 extern void got_emptyline(void);
 extern void set_tag(const char *tag);
 
+extern void token_init(void);
 extern void token_cleanup(void);
 
 /* used by lexer_text_html.l */



More information about the bogofilter-dev mailing list