Index: bogoconfig.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/bogoconfig.c,v retrieving revision 1.106 diff -u -r1.106 bogoconfig.c --- bogoconfig.c 8 Sep 2003 12:16:02 -0000 1.106 +++ bogoconfig.c 14 Sep 2003 23:51:46 -0000 @@ -107,6 +107,8 @@ extern double robx, robs; extern wl_t wl_default; +extern void lexer_set_debug(int v); + /*---------------------------------------------------------------------------*/ /* Notes: @@ -164,9 +166,16 @@ void process_args_and_config_file(int argc, char **argv, bool warn_on_error) { + const char *bogotest = getenv("BOGOTEST"); + process_args_1(argc, argv); process_config_files(warn_on_error); process_args_2(argc, argv); + + if (bogotest) + test = atoi(bogotest); + + lexer_set_debug(test); /* 1 - INITEST, 2 - lexer states */ if (!twostate && !threestate) { twostate = ham_cutoff < EPS; Index: bogolexer.c =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/bogolexer.c,v retrieving revision 1.44 diff -u -r1.44 bogolexer.c --- bogolexer.c 8 Sep 2003 12:16:02 -0000 1.44 +++ bogolexer.c 14 Sep 2003 23:51:46 -0000 @@ -239,15 +239,24 @@ int count=0; +extern void lexer_set_debug(int v); + int main(int argc, char **argv) { token_t t; - mbox_mode = true; /* to allow multiple messages */ + const char *bogotest = getenv("BOGOTEST"); process_args_1(argc, argv); process_config_files(false); process_args_2(argc, argv); + + if (bogotest) + test = atoi(bogotest); + + lexer_set_debug(test); /* 1 - INITEST, 2 - lexer states */ + + mbox_mode = true; /* to allow multiple messages */ textblock_init(); Index: lexer_v3.l =================================================================== RCS file: /cvsroot/bogofilter/bogofilter/src/lexer_v3.l,v retrieving revision 1.84 diff -u -r1.84 lexer_v3.l --- lexer_v3.l 14 Sep 2003 21:17:52 -0000 1.84 +++ lexer_v3.l 14 Sep 2003 23:51:47 -0000 @@ -94,6 +94,10 @@ static void reorder_html(void); static void skip_to(char chr); +static void use_to(char chr); + +void yy_set_state_initial(void); +void lexer_set_debug(int v); /* Function Definitions */ @@ -107,7 +111,7 @@ %} %option warn -%option debug nodebug +%option nodebug debug %option align caseless 8bit %option never-interactive %option noreject noyywrap @@ -194,6 +198,7 @@ BREAKHTML "<"({HBREAK}([ \n\t][^>]*|""))">" %s TEXT HTML BOGO_LEX +%s INITEST %s HTOKEN HDISCARD SCOMMENT LCOMMENT HSCRIPT %% @@ -207,6 +212,7 @@ ^\"{BOGOLEX_TOKEN}\"{NUM_NUM} { return BOGO_LEX_LINE; } \n { lineno += 1; } +{ENCODED_TOKEN} | {ENCODED_TOKEN} { word_t *w = yy_text(); size_t size = decode_text(w); while (size-- > 0) @@ -214,22 +220,39 @@ } ^(To|From|Return-Path|Subject): { set_tag(yytext); } +^(To|From|Return-Path|Subject): { set_tag(yytext); } +^Received: { set_tag(yytext); return TOKEN; } + ^Content-(Transfer-Encoding|Type|Disposition):{MTYPE} { mime_content(yy_text()); skip_to(':'); return TOKEN; } +^Content-(Transfer-Encoding|Type|Disposition):{MTYPE} { mime_content(yy_text()); return TOKEN; } + ^MIME-Version:.* { mime_version(yy_text()); skip_to(':'); return TOKEN; } +^MIME-Version:.* { mime_version(yy_text()); return HEADKEY; } ^(Delivery-)?Date:.* /* ignore */ +^(Delivery-)?Date:.* { return HEADKEY; } ^(Resent-)?Message-ID:.* /* ignore */ +^(Resent-)?Message-ID:.* { return HEADKEY; } +^(In-Reply-To|References):.* | ^(In-Reply-To|References):.* { return HEADKEY; } +boundary=[ ]*\"?{MIME_BOUNDARY}\"? | boundary=[ ]*\"?{MIME_BOUNDARY}\"? { mime_boundary_set(yy_text()); } + charset=\"?{CHARSET}\"? { got_charset(yytext); skip_to('='); return TOKEN; } +(file)?name=\"? | (file)?name=\"? /* ignore */ (ESMTP|SMTP)+[ \t\n]+id\ {ID} /* ignore */ +(ESMTP|SMTP)+[ \t\n]+id\ {ID} { if (header_line_markup) { use_to(' '); return TOKEN; } } +[:blank:]*id\ {ID} | [:blank:]*id\ {ID} /* ignore */ +\n[ \t] | \n[ \t] { lineno += 1; } + +\n\n | \n\n { if (get_content_type() == MIME_TEXT_HTML) BEGIN HTML; else @@ -240,10 +263,10 @@ } <> { return NONE; } +<> { add_hint("no_body:"); return NONE; } ^--{MIME_BOUNDARY}(--)?$ { if (got_mime_boundary(yy_text())) { - BEGIN INITIAL; - msg_header = true; + yy_set_state_initial(); return BOUNDARY; } else { yyless(2); @@ -287,8 +310,7 @@ void lexer_v3_init(FILE *fp) { lineno = 0; - BEGIN INITIAL; - msg_header = true; + yy_set_state_initial(); yyrestart(fp); } @@ -298,6 +320,15 @@ yyless(len); } +static void use_to(char chr) +{ + char * p = memchr(yytext, chr, yyleng); + if (p) { + *p = 0; + yyleng = (p - yytext); + } +} + static void reorder_html(void) { char *chr = memchr(yytext, '<', yyleng); /* find start of html tag */ @@ -315,11 +346,11 @@ } char yy_get_state(void); -void yy_set_state_initial(void); char yy_get_state() { switch (YYSTATE) { + case INITEST: case INITIAL: return 'i'; case TEXT: return 't'; case HTML: @@ -330,12 +361,30 @@ } } -void yy_set_state_initial() +void yy_set_state_initial(void) { - BEGIN INITIAL; - if (DEBUG_LEXER(1)) fprintf(dbgout, "%s:%d %s\n", __FILE__, __LINE__, "BEGIN INITIAL"); + if (! (test & 1)) /* 1 - INITEST, 2 - debug (display lexer states) */ + BEGIN INITIAL; + else + BEGIN INITEST; + + msg_header = true; + + if (DEBUG_LEXER(1)) + fprintf(dbgout, "%s:%d BEGIN %s\n", + __FILE__, __LINE__, + !test ? "INITIAL" : "INITEST" ); } +void lexer_set_debug(int v) +{ +#ifndef FLEX_DEBUG + (void) v; +#else + yy_flex_debug = v; +#endif +} + /* * The following sets edit modes for GNU EMACS * Local Variables: