bulk_mode patch - version 2
David Relson
relson at osagesoftware.com
Mon Apr 14 17:18:26 CEST 2003
Michael,
Here's version 2 of the bulkmode patch. I've added a number of
initialization/reset functions and calls to them.
I've run it on a variety of test messages and see no problems.
This patch replaces the previous patch. You can revert the old patch and
apply the new one, or you can start with a clean source tree and apply the
new patch. The patch is relative to version 0.11.2, though I think it's
applicable 0.11.1.8 or anything newer.
Please keep the list posted on your use of bulkmode.
Cheers!
David
-------------- next part --------------
Index: bogoconfig.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/bogoconfig.c,v
retrieving revision 1.42
diff -u -r1.42 bogoconfig.c
--- bogoconfig.c 13 Apr 2003 16:32:53 -0000 1.42
+++ bogoconfig.c 14 Apr 2003 15:11:20 -0000
@@ -330,6 +330,9 @@
"\t-q\t- quiet - don't print warning messages.\n"
"\t-l\t- write messages to syslog.\n");
(void)fprintf(stderr,
+ "\t-b\t- set streaming bulk mode. Classify multiple messages whose filenames are read from STDIN.\n"
+ "\t-B name1 name2 ...\t- set bulk mode. Classify multiple messages named as files on the command line.\n");
+ (void)fprintf(stderr,
"\t-L tag\t- specify the tag value for log messages.\n"
"\t-F\t- force printing of spamicity numbers.\n"
"\t-x list\t- set debug flags.\n"
@@ -404,7 +407,7 @@
#if HAVE_DECL_OPTRESET
optreset = 1;
#endif
- while ((option = getopt(argc, argv, ":23d:eFhlL:m:o:snSNvVpuc:CgrRfqtI:O:y:x:DT" G R F)) != -1)
+ while ((option = getopt(argc, argv, ":23d:eFhlL:m:o:snSNvVpuc:CgrRfqtI:O:y:x:BbDT" G R F)) != -1)
{
#if 0
if (getenv("BOGOFILTER_DEBUG_OPTIONS")) {
@@ -566,6 +569,15 @@
today = string_to_date((char *)optarg);
break;
+ case 'B':
+ bulk_mode = B_CMDLINE;
+ break;
+
+ case 'b':
+ bulk_mode = B_STDIN;
+ fpin = NULL; /* Ensure that input file isn't stdin */
+ break;
+
case 'D':
dbgout = stdout;
break;
@@ -592,10 +604,13 @@
if (exitcode)
exit (exitcode);
- if (optind < argc) {
+ if (bulk_mode == B_NORMAL && optind < argc) {
fprintf(stderr, "Extra arguments given, first: %s. Aborting.\n", argv[optind]);
exit(2);
}
+
+ if (bulk_mode == B_CMDLINE)
+ bulk_mode = optind; /* save index of first filename */
return;
}
Index: common.h
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/common.h,v
retrieving revision 1.6
diff -u -r1.6 common.h
--- common.h 7 Apr 2003 11:18:09 -0000 1.6
+++ common.h 14 Apr 2003 15:11:20 -0000
@@ -32,8 +32,6 @@
#define PATH_LEN 1024
#endif
-#include "globals.h"
-
/* Default build includes Graham, Robinson, and Robinson-Fisher methods */
#if defined(ENABLE_ROBINSON_METHOD) || defined(ENABLE_ROBINSON_FISHER)
@@ -81,6 +79,14 @@
PR_ENV_BOGO, /* 5 */
PR_COMMAND /* 6 */
} priority_t;
+
+typedef enum bulk_e {
+ B_NORMAL,
+ B_CMDLINE,
+ B_STDIN
+} bulk_t;
+
+#include "globals.h"
extern int build_path(char* dest, size_t size, const char* dir, const char* file);
Index: globals.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/globals.c,v
retrieving revision 1.10
diff -u -r1.10 globals.c
--- globals.c 9 Apr 2003 21:54:34 -0000 1.10
+++ globals.c 14 Apr 2003 15:11:20 -0000
@@ -29,6 +29,7 @@
bool quiet; /* '-q' */
bool terse; /* '-t' */
int verbose; /* '-v' */
+int bulk_mode = B_NORMAL; /* '-b, -B' */
FILE *fpin = NULL; /* '-I' */
int Rtable = 0; /* '-R' */
Index: globals.h
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/globals.h,v
retrieving revision 1.11
diff -u -r1.11 globals.h
--- globals.h 9 Apr 2003 21:54:34 -0000 1.11
+++ globals.h 14 Apr 2003 15:11:20 -0000
@@ -24,6 +24,7 @@
extern bool terse; /* '-t' */
extern bool quiet; /* '-q' */
extern bool passthrough; /* '-p' */
+extern int bulk_mode; /* '-B' */
extern int verbose; /* '-v' */
extern FILE *fpin; /* '-I' */
Index: lexer.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/lexer.c,v
retrieving revision 1.20
diff -u -r1.20 lexer.c
--- lexer.c 28 Mar 2003 15:16:00 -0000 1.20
+++ lexer.c 14 Apr 2003 15:11:20 -0000
@@ -258,6 +258,12 @@
return cnt;
}
+void yyinit(void)
+{
+ yylineno = 0;
+ msg_header = true;
+}
+
int yyinput(byte *buf, size_t max_size)
/* input getter for the scanner */
{
Index: lexer.h
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/lexer.h,v
retrieving revision 1.7
diff -u -r1.7 lexer.h
--- lexer.h 3 Apr 2003 21:28:22 -0000 1.7
+++ lexer.h 14 Apr 2003 15:11:20 -0000
@@ -41,11 +41,13 @@
extern token_t lexer_v3_lex(void);
extern int lexer_v3_leng;
extern char * lexer_v3_text;
+extern void lexer_v3_init(FILE *fp);
/* in lexer.c */
-extern int yyinput(byte *buf, size_t size);
-extern int yyredo(word_t *text, char del);
+extern void yyinit(void);
+extern int yyinput(byte *buf, size_t size);
+extern int yyredo(word_t *text, char del);
-extern int buff_fill(buff_t *buff, size_t used, size_t need);
+extern int buff_fill(buff_t *buff, size_t used, size_t need);
#endif /* LEXER_H */
Index: lexer_v3.l
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/lexer_v3.l,v
retrieving revision 1.3
diff -u -r1.3 lexer_v3.l
--- lexer_v3.l 1 Mar 2003 02:30:25 -0000 1.3
+++ lexer_v3.l 14 Apr 2003 15:11:20 -0000
@@ -67,6 +67,7 @@
%}
+%option debug nodebug
%option align nounput noyywrap noreject 8bit caseless
%option prefix="lexer_v3_"
@@ -147,6 +148,12 @@
\n { got_newline(); }
%%
+
+void lexer_v3_init(FILE *fp)
+{
+ BEGIN(INITIAL);
+ yyrestart(fp);
+}
/*
* The following sets edit modes for GNU EMACS
Index: main.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/main.c,v
retrieving revision 1.26
diff -u -r1.26 main.c
--- main.c 9 Apr 2003 22:45:10 -0000 1.26
+++ main.c 14 Apr 2003 15:11:20 -0000
@@ -69,6 +69,83 @@
exit(exitcode);
}
+int classify(int argc, char **argv, FILE *out);
+void initialize(FILE *fp);
+
+void initialize(FILE *fp)
+{
+ init_charset_table(charset_default, true);
+ mime_reset();
+ token_init();
+ if (fp)
+ lexer_v3_init(fpin);
+}
+
+int classify(int argc, char **argv, FILE *out)
+{
+ int exitcode = 0;
+ bool done = false;
+ bool error = false;
+ double spamicity;
+ rc_t status;
+ char *filename;
+ char buff[PATH_LEN+1];
+
+ while (!done && !error) {
+ switch (bulk_mode) {
+ case B_NORMAL:
+ break;
+ case B_STDIN: /* streaming (stdin) mode */
+ {
+ size_t len;
+ filename = buff;
+ if (fgets(buff, sizeof(buff), stdin) == 0) {
+ error = true;
+ continue;
+ }
+ len = strlen(filename);
+ if (len > 0 && filename[len-1] == '\n')
+ filename[len-1] = '\0';
+ break;
+ }
+ default: /* command line mode */
+ if (bulk_mode < argc && !error) {
+ filename = argv[bulk_mode++];
+ }
+ else {
+ done = true;
+ continue;
+ }
+ break;
+ }
+ if (bulk_mode != B_NORMAL) {
+ if (fpin)
+ fclose(fpin);
+ fpin = fopen( filename, "r" );
+ if (fpin == NULL) {
+ error = true;
+ fprintf(stderr, "Can't read file '%s'\n", filename);
+ continue;
+ }
+ fprintf(out, "%s ", filename );
+ }
+
+ initialize(fpin);
+ status = bogofilter(&spamicity);
+ write_message(out, status);
+ if (bulk_mode == B_NORMAL) {
+ exitcode = (status == RC_SPAM) ? 0 : 1;
+ if (nonspam_exits_zero && passthrough && exitcode == 1)
+ exitcode = 0;
+ done = true;
+ }
+ else {
+ exitcode = !error ? 0 : 1;
+ }
+ }
+ return exitcode;
+}
+
int main(int argc, char **argv) /*@globals errno,stderr,stdout@*/
{
int exitcode = 0;
@@ -76,9 +153,6 @@
process_args_and_config_file(argc, argv, true);
- /* initialize */
- init_charset_table(charset_default, true);
-
/* open all wordlists */
open_wordlists((run_type == RUN_NORMAL) ? DB_READ : DB_WRITE);
@@ -122,22 +196,12 @@
}
}
- mime_reset();
+ initialize(NULL);
if (run_type & (RUN_NORMAL | RUN_UPDATE))
- {
- double spamicity;
- rc_t status = bogofilter(&spamicity);
-
- write_message(out, status);
-
- exitcode = (status == RC_SPAM) ? 0 : 1;
- if (nonspam_exits_zero && passthrough && exitcode == 1)
- exitcode = 0;
- }
- else {
+ exitcode = classify(argc, argv, out);
+ else
register_messages(run_type);
- }
if (passthrough) {
switch(passmode) {
Index: token.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/token.c,v
retrieving revision 1.9
diff -u -r1.9 token.c
--- token.c 28 Mar 2003 15:16:02 -0000 1.9
+++ token.c 14 Apr 2003 15:11:20 -0000
@@ -193,10 +193,16 @@
return(class);
}
-void got_from(void)
+void token_init(void)
{
+ yyinit();
mime_reset();
reset_html_level();
+}
+
+void got_from(void)
+{
+ token_init();
}
void got_newline()
Index: token.h
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/token.h,v
retrieving revision 1.5
diff -u -r1.5 token.h
--- token.h 28 Mar 2003 15:16:03 -0000 1.5
+++ token.h 14 Apr 2003 15:11:20 -0000
@@ -21,6 +21,7 @@
extern void got_emptyline(void);
extern void set_tag(const char *tag);
+extern void token_init(void);
extern void token_cleanup(void);
/* used by lexer_text_html.l */
More information about the bogofilter-dev
mailing list