diff -u -r1.232 bogoconfig.c --- bogoconfig.c 15 Oct 2005 20:52:51 -0000 1.232 +++ bogoconfig.c 30 Oct 2005 00:46:24 -0000 @@ -694,6 +694,7 @@ case O_HEADER_FORMAT: header_format = get_string(name, val); break; case O_LOG_HEADER_FORMAT: log_header_format = get_string(name, val); break; case O_LOG_UPDATE_FORMAT: log_update_format = get_string(name, val); break; + case O_MAX_TOKEN_LEN: max_token_len=atoi(val); break; case O_REPLACE_NONASCII_CHARACTERS: replace_nonascii_characters = get_bool(name, val); break; case O_SPAMICITY_FORMATS: set_spamicity_formats(val); break; case O_SPAMICITY_TAGS: set_spamicity_tags(val); break; diff -u -r1.90 bogolexer.c --- bogolexer.c 17 Jun 2005 03:31:06 -0000 1.90 +++ bogolexer.c 30 Oct 2005 00:46:24 -0000 @@ -250,6 +250,10 @@ block_on_subnets = get_bool(name, val); break; + case O_MAX_TOKEN_LEN: + max_token_len = atoi(val); + break; + default: /* config file options: ** ok - if from config file diff -u -r1.47 format.c --- format.c 4 Sep 2005 14:49:43 -0000 1.47 +++ format.c 30 Oct 2005 00:46:24 -0000 @@ -326,7 +326,7 @@ *buff++ = '%'; break; case 'A': /* A - Message Address */ - buff += format_string(buff, msg_addr.leng != 0 ? (const char *)msg_addr.text : "UNKNOWN", 0, prec, flags, end); + buff += format_string(buff, ((*msg_addr->text != '\0') ? ((const char *)msg_addr->text) : "UNKNOWN"), 0, prec, flags, end); break; case 'c': /* c - classification, e.g. Yes/No, Spam/Ham/Unsure, or YN, SHU, +-? */ { @@ -348,10 +348,10 @@ break; } case 'I': /* M - Message ID */ - buff += format_string(buff, msg_id.leng != 0 ? (const char *)msg_id.text : "UNKNOWN", 0, prec, flags, end); + buff += format_string(buff, ((*msg_id->text != '\0') ? ((const char *)msg_id->text) : "UNKNOWN"), 0, prec, flags, end); break; case 'Q': /* Q - Queue ID */ - buff += format_string(buff, queue_id.leng != 0 ? (const char *)queue_id.text : "UNKNOWN", 0, prec, flags, end); + buff += format_string(buff, ((*queue_id->text != '\0') ? ((const char *)queue_id->text) : "UNKNOWN"), 0, prec, flags, end); break; case 'p': /* p - spamicity as a probability */ { diff -u -r1.78 globals.c --- globals.c 17 Jun 2005 03:31:06 -0000 1.78 +++ globals.c 30 Oct 2005 00:46:24 -0000 @@ -38,6 +38,7 @@ int verbose; /* '-v' */ /* config file options */ +uint max_token_len = MAXTOKENLEN; double min_dev; double ham_cutoff = HAM_CUTOFF; double spam_cutoff; diff -u -r1.81 globals.h --- globals.h 7 Jun 2005 23:06:20 -0000 1.81 +++ globals.h 30 Oct 2005 00:46:24 -0000 @@ -27,6 +27,7 @@ extern bool fisher; /* '-f' */ extern FILE *fpin; /* '-I' */ extern bool logflag; /* '-l' */ +extern uint max_token_len; extern bool mbox_mode; /* '-M' */ extern char outfname[PATH_LEN]; /* '-O' */ extern bool passthrough; /* '-p' */ diff -u -r1.143 lexer.c --- lexer.c 23 Oct 2005 15:15:55 -0000 1.143 +++ lexer.c 30 Oct 2005 00:46:24 -0000 @@ -330,7 +330,7 @@ if (count >= MAXTOKENLEN * 2 && long_token(buff.t.text, (uint) count)) { uint start = buff.t.leng - count; - uint length = count - MAXTOKENLEN; + uint length = count - max_token_len; buff_shift(&buff, start, length); count = buff.t.leng; } diff -u -r1.19 longoptions.h --- longoptions.h 15 Oct 2005 21:02:44 -0000 1.19 +++ longoptions.h 30 Oct 2005 00:46:24 -0000 @@ -46,6 +46,7 @@ O_HEADER_FORMAT, O_LOG_HEADER_FORMAT, O_LOG_UPDATE_FORMAT, + O_MAX_TOKEN_LEN, O_MIN_DEV, O_REPLACE_NONASCII_CHARACTERS, O_ROBS, @@ -79,6 +80,7 @@ { "no-config-file", N, 0, 'C' }, \ { "help", N, 0, 'h' }, \ { "input-file", N, 0, 'I' }, \ + { "max-token-len", R, 0, O_MAX_TOKEN_LEN }, \ UNICODE_OPTION \ { "version", N, 0, 'V' }, \ { "verbosity", N, 0, 'v' }, diff -u -r1.51 rstats.c --- rstats.c 15 Mar 2005 12:25:59 -0000 1.51 +++ rstats.c 30 Oct 2005 00:46:24 -0000 @@ -232,16 +232,19 @@ /* print header */ if (!Rtable) (void)fprintf(fpo, "%*s %6s %-6s %-6s %-6s %s\n", - MAXTOKENLEN+2,"","n", "pgood", "pbad", "fw", "U"); + max_token_len+2,"","n", "pgood", "pbad", "fw", "U"); else (void)fprintf(fpo, "%*s %6s %-6s %-6s %-6s %-6s %-6s %s\n", - MAXTOKENLEN+2,"","n", "pgood", "pbad", "fw","invfwlog", "fwlog", "U"); + max_token_len+2,"","n", "pgood", "pbad", "fw","invfwlog", "fwlog", "U"); /* Print 1 line per token */ for (r= 0; rtoken->leng); + +// int len = max(0, max_token_len-(int)cur->token->leng); + int len = (cur->token->leng >= max_token_len) ? 0 : (max_token_len - cur->token->leng); + double fw = calc_prob(cur->good, cur->bad, cur->msgs_good, cur->msgs_bad); char flag = (fabs(fw-EVEN_ODDS) - min_dev >= EPS) ? '+' : '-'; diff -u -r1.56 score.c --- score.c 4 Sep 2005 14:49:43 -0000 1.56 +++ score.c 30 Oct 2005 00:46:24 -0000 @@ -412,14 +412,14 @@ { if (!Rtable) { (void)fprintf(fpo, "%-*s %6lu %9.6f %9.6f %9.6f\n", - MAXTOKENLEN+2, "N_P_Q_S_s_x_md", (unsigned long)score.robn, + max_token_len+2, "N_P_Q_S_s_x_md", (unsigned long)score.robn, score.p_pr, score.q_pr, score.spamicity); (void)fprintf(fpo, "%-*s %9.6f %9.6f %9.6f\n", - MAXTOKENLEN+2+6, " ", robs, robx, min_dev); + max_token_len+2+6, " ", robs, robx, min_dev); } else (void)fprintf(fpo, "%-*s %6lu %9.2e %9.2e %9.2e %9.2e %9.2e %5.3f\n", - MAXTOKENLEN+2, "N_P_Q_S_s_x_md", (unsigned long)score.robn, + max_token_len+2, "N_P_Q_S_s_x_md", (unsigned long)score.robn, score.p_pr, score.q_pr, score.spamicity, robs, robx, min_dev); } diff -u -r1.123 token.c --- token.c 5 Sep 2005 19:06:32 -0000 1.123 +++ token.c 30 Oct 2005 00:46:25 -0000 @@ -32,20 +32,16 @@ /* Local Variables */ -byte msg_addr_text[MAXTOKENLEN + D]; -byte msg_id_text [MAXTOKENLEN * 3 + D]; -byte queue_id_text[MAXTOKENLEN + D]; -byte ipsave_text[MAXTOKENLEN + D]; - -word_t msg_addr = { 0, msg_addr_text}; /* First IP Address in Received: statement */ -word_t msg_id = { 0, msg_id_text}; /* Message ID */ -word_t queue_id = { 0, queue_id_text}; /* Message's first queue ID */ +word_t *msg_addr; /* First IP Address in Received: statement */ +word_t *msg_id; /* Message ID */ +word_t *queue_id; /* Message's first queue ID */ static token_t save_class = NONE; -static word_t ipsave = { 0, ipsave_text}; +static word_t *ipsave; -byte yylval_text[MAXTOKENLEN + MAX_PREFIX_LEN + MSG_COUNT_PADDING + D]; -static word_t yylval = { 0, yylval_text }; +static byte *yylval_text; +static size_t yylval_text_size; +static word_t yylval; static word_t *w_to = NULL; /* To: */ static word_t *w_from = NULL; /* From: */ @@ -112,13 +108,13 @@ /* If saved IPADDR, truncate last octet */ if ( block_on_subnets && save_class == IPADDR ) { - byte *t = xmemrchr(ipsave.text, '.', ipsave.leng); + byte *t = xmemrchr(ipsave->text, '.', ipsave->leng); if (t == NULL) save_class = NONE; else { - ipsave.leng = (uint) (t - ipsave.text); - token_set( &yylval, ipsave.text, ipsave.leng); + ipsave->leng = (uint) (t - ipsave->text); + token_set( &yylval, ipsave->text, ipsave->leng); cls = save_class; done = true; } @@ -143,7 +139,7 @@ switch (cls) { case EOH: /* end of header - bogus if not empty */ - if (leng > MAXTOKENLEN) + if (leng > max_token_len) continue; if (msg_state->mime_type == MIME_MESSAGE) @@ -182,7 +178,7 @@ leng = (uint) (ot - st); } text[leng] = '\0'; /* ensure nul termination */ - build_prefixed_token(&yylval, sizeof(yylval_text), token_prefix, text, leng); + build_prefixed_token(&yylval, yylval_text_size, token_prefix, text, leng); } break; @@ -193,7 +189,7 @@ else { const char *delim = strchr((const char *)text, ':'); leng = (uint) (delim - (const char *)text); - if (leng > MAXTOKENLEN) + if (leng > max_token_len) continue; token_set( &yylval, text, leng); } @@ -202,9 +198,9 @@ /*@fallthrough@*/ case TOKEN: /* ignore anything when not reading text MIME types */ - if (leng > MAXTOKENLEN) + if (leng > max_token_len) continue; - build_prefixed_token(&yylval, sizeof(yylval_text), token_prefix, text, leng); + build_prefixed_token(&yylval, yylval_text_size, token_prefix, text, leng); if (token_prefix == NULL) { switch (msg_state->mime_type) { case MIME_TEXT: @@ -224,7 +220,7 @@ case MESSAGE_ID: /* special token; saved for formatted output, but not returned to bogofilter */ /** \bug: the parser MUST be aligned with lexer_v3.l! */ - if (leng < sizeof(msg_id_text)) + if (leng < max_token_len) { while (!isspace(text[0])) { text += 1; @@ -234,16 +230,15 @@ text += 1; leng -= 1; } - token_set( &yylval, text, leng); - token_copy( &msg_id, &yylval ); + token_set( msg_id, text, leng); } - continue; + continue; case QUEUE_ID: /* special token; saved for formatted output, but not returned to bogofilter */ /** \bug: the parser MUST be aligned with lexer_v3.l! */ - if (queue_id.leng == 0 && - leng < sizeof(queue_id_text) ) + if (*queue_id->text == '\0' && + leng < max_token_len ) { while (isspace(text[0])) { text += 1; @@ -257,8 +252,8 @@ text += 1; leng -= 1; } - token_set( &yylval, text, leng); - token_copy( &queue_id, &yylval ); + memcpy( queue_id->text, text, min(queue_id->leng, leng)+D ); + Z(queue_id->text[queue_id->leng]); } continue; @@ -271,10 +266,12 @@ /* if top level, no address, not localhost, .... */ if (token_prefix == w_recv && msg_state->parent == NULL && - msg_addr.leng == 0 && - strcmp((char *)text, "127.0.0.1") != 0) { + *msg_addr->text == '\0' && + strcmp((char *)text, "127.0.0.1") != 0) + { /* Not guaranteed to be the originating address of the message. */ - token_copy( &msg_addr, &yylval ); + memcpy( msg_addr->text, yylval.text, min(msg_addr->leng, yylval.leng)+D ); + Z(msg_addr->text[yylval.leng]); } } @@ -304,15 +301,15 @@ q1 & 0xff, q2 & 0xff, q3 & 0xff, q4 & 0xff); leng = strlen((const char *)text); - build_prefixed_token(&ipsave, sizeof(ipsave_text), prefix, text, leng); - token_copy( &yylval, &ipsave ); + build_prefixed_token(ipsave, max_token_len, prefix, text, leng); + token_copy( &yylval, ipsave ); word_free(prefix); save_class = IPADDR; *token = &yylval; return (cls); } - build_prefixed_token(&yylval, sizeof(yylval_text), token_prefix, text, leng); + build_prefixed_token(&yylval, yylval_text_size, token_prefix, text, leng); break; case NONE: /* nothing to do */ @@ -342,7 +339,7 @@ } /* eat all long words */ - if (yylval.leng <= MAXTOKENLEN) + if (yylval.leng <= max_token_len) done = true; } @@ -374,11 +371,32 @@ void token_init(void) { + static bool fTokenInit = false; + yyinit(); - token_clear(); + if ( fTokenInit) { + token_clear(); + } + else { + fTokenInit = true; + yylval_text_size = max_token_len + MAX_PREFIX_LEN + MSG_COUNT_PADDING + D; + + yylval_text = (byte *) malloc( yylval_text_size ); + yylval.leng = 0; + yylval.text = yylval_text; + + /* First IP Address in Received: statement */ + msg_addr = word_new( NULL, max_token_len ); + + /* Message ID */ + msg_id = word_new( NULL, max_token_len * 3 ); + + /* Message's first queue ID */ + queue_id = word_new( NULL, max_token_len ); + + ipsave = word_new( NULL, max_token_len ); - if (w_to == NULL) { /* word_new() used to avoid compiler complaints */ w_to = word_news("to:"); /* To: */ w_from = word_news("from:"); /* From: */ @@ -453,9 +471,8 @@ void set_msg_id(byte *text, uint leng) { - if (leng >= sizeof(msg_id_text)) /* Limit length */ - leng = sizeof(msg_id_text) - 1; - token_set( &msg_id, text, leng ); + (void) leng; /* suppress compiler warning */ + token_set( msg_id, text, msg_id->leng ); } #define WFREE(n) word_free(n); n = NULL @@ -477,7 +494,10 @@ void token_clear() { - msg_addr.leng = 0; - msg_id.leng = 0; - queue_id.leng = 0; + if (msg_addr != NULL) + { + *msg_addr->text = '\0'; + *msg_id->text = '\0'; + *queue_id->text = '\0'; + } } diff -u -r1.20 token.h --- token.h 13 Mar 2005 04:43:02 -0000 1.20 +++ token.h 30 Oct 2005 00:46:25 -0000 @@ -12,9 +12,9 @@ #include "lexer.h" -extern word_t msg_addr; /* First IP Address in Received: statement */ -extern word_t msg_id; /* Message ID */ -extern word_t queue_id; /* Message's first Queue ID */ +extern word_t *msg_addr; /* First IP Address in Received: statement */ +extern word_t *msg_id; /* Message ID */ +extern word_t *queue_id; /* Message's first Queue ID */ extern token_t get_token(word_t **token);