[PATCH] bulkmode registration
David Relson
relson at osagesoftware.com
Wed May 7 01:37:24 CEST 2003
Greetings,
For all of you who have been clamoring for registration of maildirs,
attached is a patch relative to the cvs repository.
With the attached patch, bogofilter passes "make check" as well as a
cursory test (27 messages, registered via "bogofilter -s -b -v < list -d
dir"). The code is believed to work, though it hasn't been stress tested.
Volunteers needed to test it more thoroughly!
David
P.S. If there's need, I can make a .tgz available with current source code.
-------------- next part --------------
Index: bogoconfig.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/bogoconfig.c,v
retrieving revision 1.51
diff -u -r1.51 bogoconfig.c
--- bogoconfig.c 2 May 2003 23:03:04 -0000 1.51
+++ bogoconfig.c 6 May 2003 23:26:54 -0000
@@ -321,9 +321,9 @@
*/
);
(void)fprintf(stderr,
- "\t -M - set mailbox mode. Classify multiple messages in an mbox formatted file.\n"
- "\t -b - set streaming bulk mode. Classify multiple messages whose filenames are read from STDIN.\n"
- "\t -B name1 name2 ... - set bulk mode. Classify multiple messages named as files on the command line.\n"
+ "\t -M - set mailbox mode. Classify multiple messages in an mbox formatted file.\n"
+ "\t -b - set streaming bulk mode. Process multiple messages whose filenames are read from STDIN.\n"
+ "\t -B name1 name2 ... - set bulk mode. Process multiple messages named as files on the command line.\n"
"\t -F - force printing of spamicity numbers.\n"
"\t -R - print an R data frame.\n"
);
Index: bogofilter.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/bogofilter.c,v
retrieving revision 1.10
diff -u -r1.10 bogofilter.c
--- bogofilter.c 21 Apr 2003 17:03:17 -0000 1.10
+++ bogofilter.c 6 May 2003 23:26:54 -0000
@@ -46,7 +46,7 @@
method->print_stats(fp);
}
-rc_t bogofilter(double *xss) /*@globals errno@*/
+rc_t bogofilter()
/* evaluate text for spamicity */
{
rc_t status;
@@ -77,9 +77,6 @@
spamicity = method->compute_spamicity(wordhash, NULL);
status = method->status();
-
- if (xss != NULL)
- *xss = spamicity;
if (run_type & RUN_UPDATE) /* Note: don't register if RC_UNSURE */
{
Index: bogofilter.h
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/bogofilter.h,v
retrieving revision 1.4
diff -u -r1.4 bogofilter.h
--- bogofilter.h 19 Apr 2003 01:09:02 -0000 1.4
+++ bogofilter.h 6 May 2003 23:26:54 -0000
@@ -8,10 +8,10 @@
#define DEVIATION(n) fabs((n) - EVEN_ODDS) /* deviation from average */
-typedef enum rc_e {RC_SPAM=0, RC_HAM=1, RC_UNSURE=2, RC_MORE} rc_t;
+typedef enum rc_e {RC_SPAM=0, RC_HAM=1, RC_UNSURE=2, RC_OK, RC_MORE} rc_t;
extern void initialize_constants(void);
-extern rc_t bogofilter(/*@out@*/ double *xss);
+extern rc_t bogofilter(void);
extern void print_stats(FILE *fp);
#endif /* BOGOFILTER_H */
Index: main.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/main.c,v
retrieving revision 1.40
diff -u -r1.40 main.c
--- main.c 28 Apr 2003 04:01:59 -0000 1.40
+++ main.c 6 May 2003 23:26:54 -0000
@@ -53,6 +53,8 @@
extern int Rtable;
+FILE *fpo;
+
char msg_register[256];
char msg_bogofilter[256];
size_t msg_register_size = sizeof(msg_register);
@@ -74,7 +76,7 @@
exit(exitcode);
}
-static int classify(int argc, char **argv, FILE *out);
+static rc_t classify(void);
static void initialize(FILE *fp);
static void initialize(FILE *fp)
@@ -86,7 +88,9 @@
lexer_v3_init(fp);
}
-static int classify(int argc, char **argv, FILE *out)
+typedef rc_t (*arg_foreach_t)(void);
+
+static int arg_foreach(arg_foreach_t hook, int argc, char **argv)
{
int exitcode = 0;
bool error = false;
@@ -133,31 +137,24 @@
continue;
}
initialize(fpin);
- fprintf(out, "%s ", filename );
+ fprintf(dbgout, "%s ", filename );
}
- passthrough_setup();
- do {
- init_msg_counts();
- token_init();
- init_charset_table(charset_default, true);
-
- status = bogofilter(NULL);
- write_message(out, status);
-
- rstats_cleanup();
- } while (status == RC_MORE);
-
- passthrough_cleanup();
-
- if (bulk_mode == B_NORMAL && status != RC_MORE) {
- exitcode = (status == RC_SPAM) ? 0 : 1;
- if (nonspam_exits_zero && passthrough && exitcode == 1)
- exitcode = 0;
- done = true;
- }
- else {
- exitcode = !error ? 0 : 1;
+ status = hook();
+
+ exitcode = !error ? 0 : 1;
+
+ if (bulk_mode == B_NORMAL) {
+ if ((run_type & (REG_SPAM | REG_GOOD | UNREG_SPAM | UNREG_GOOD)) != 0)
+ done = true;
+ else {
+ if (status != RC_MORE) {
+ exitcode = (status == RC_SPAM) ? 0 : 1;
+ if (nonspam_exits_zero && passthrough && exitcode == 1)
+ exitcode = 0;
+ done = true;
+ }
+ }
}
}
return exitcode;
@@ -166,23 +163,21 @@
int main(int argc, char **argv) /*@globals errno,stderr,stdout@*/
{
int exitcode;
- FILE *out;
process_args_and_config_file(argc, argv, true);
/* open all wordlists */
open_wordlists((run_type == RUN_NORMAL) ? DB_READ : DB_WRITE);
- out = output_setup();
+ fpo = output_setup();
initialize(NULL);
if (run_type & (RUN_NORMAL | RUN_UPDATE)) {
- exitcode = classify(argc, argv, out);
+ exitcode = arg_foreach(classify, argc, argv);
}
else {
- register_messages(run_type);
- exitcode = 0;
+ exitcode = arg_foreach(register_messages, argc, argv);
}
close_wordlists(false);
@@ -251,6 +246,27 @@
}
typedef int (*readfunc_t)(char **, void *);
+
+static rc_t classify()
+{
+ rc_t status = RC_MORE;
+
+ passthrough_setup();
+ do {
+ init_msg_counts();
+ token_init();
+ init_charset_table(charset_default, true);
+
+ status = bogofilter();
+ write_message(fpo, status);
+
+ rstats_cleanup();
+ } while (status == RC_MORE);
+
+ passthrough_cleanup();
+
+ return status;
+}
static void write_message(FILE *fp, rc_t status)
{
Index: register.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/register.c,v
retrieving revision 1.11
diff -u -r1.11 register.c
--- register.c 19 Apr 2003 01:09:02 -0000 1.11
+++ register.c 6 May 2003 23:26:54 -0000
@@ -53,7 +53,7 @@
format_log_update(msg_register, msg_register_size, u, r, wordcount, msgcount);
if (verbose)
- (void)fprintf(stderr, "# %d word%s, %d message%s\n",
+ (void)fprintf(dbgout, "# %d word%s, %d message%s\n",
wordcount, PLURAL(wordcount), msgcount, PLURAL(msgcount));
set_list_active_status(false);
@@ -128,7 +128,7 @@
* cap-and-accumulation phase. we save more than half of the execution
* time for big mbox inputs, when teaching bogofilter.
*/
-void register_messages(run_t _run_type)
+rc_t register_messages()
{
wordhash_t *words = wordhash_init();
long msgcount = 0;
@@ -148,6 +148,8 @@
} while (token_type != NONE);
wordhash_sort(words);
- register_words(_run_type, words, msgcount);
+ register_words(run_type, words, msgcount);
wordhash_free(words);
+
+ return RC_OK;
}
Index: register.h
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/register.h,v
retrieving revision 1.2
diff -u -r1.2 register.h
--- register.h 16 Apr 2003 17:30:17 -0000 1.2
+++ register.h 6 May 2003 23:26:54 -0000
@@ -6,7 +6,7 @@
#include <wordhash.h>
-extern void register_messages(run_t _run_type);
+extern rc_t register_messages(void);
extern void register_words(run_t _run_type, wordhash_t *h, int msgcount);
#endif /* REGISTER_H */
Index: tests/bogofilter/t.bulkmode
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/src/tests/bogofilter/t.bulkmode,v
retrieving revision 1.12
diff -u -r1.12 t.bulkmode
--- tests/bogofilter/t.bulkmode 28 Apr 2003 04:02:04 -0000 1.12
+++ tests/bogofilter/t.bulkmode 6 May 2003 23:26:54 -0000
@@ -93,20 +93,20 @@
# test scoring of files listed on stdin
NAME="bulk-stdin"
-ls $pattern | $BOGOFILTER -c $CFG $OPT -b | \
+ls $pattern | $BOGOFILTER -c $CFG $OPT -b -D | \
sed s at .*inputs/@./inputs/@ >${TMPDIR}/$NAME.out
# test scoring of files listed on linend
NAME="bulk-linend"
-$BOGOFILTER -c $CFG $OPT -B `ls $pattern` | \
+$BOGOFILTER -c $CFG $OPT -B -D `ls $pattern` | \
sed s at .*inputs/@./inputs/@ >${TMPDIR}/$NAME.out >${TMPDIR}/$NAME.out
# test scoring each file twice (using linend)
NAME="bulk-double-1"
for f in $pattern ; do
- map_rc $BOGOFILTER -c $CFG $OPT -B $f $f | \
+ map_rc $BOGOFILTER -c $CFG $OPT -B -D $f $f | \
sed s at .*inputs/@./inputs/@ >> ${TMPDIR}/$NAME.tmp
done
sort -u < ${TMPDIR}/$NAME.tmp > ${TMPDIR}/$NAME.out
@@ -117,7 +117,7 @@
for f in $pattern ; do
t="${TMPDIR}/`basename $f`"
grep -v "^From "< $f > $t
- map_rc $BOGOFILTER -c $CFG $OPT -B $t $t | \
+ map_rc $BOGOFILTER -c $CFG $OPT -B -D $t $t | \
sed s at .*/@./inputs/@ >> ${TMPDIR}/$NAME.tmp
done
sort -u < ${TMPDIR}/$NAME.tmp > ${TMPDIR}/$NAME.out
More information about the bogofilter-dev
mailing list