libgmime installation and build requirements

Matthias Andree matthias.andree at gmx.de
Wed Dec 11 16:59:48 CET 2002


> OTOH, I already have fgetsl read folded headers (up to the maximum
> buffer size) which is necessary to figure boundaries easily with flex.

Euhm, that's get_token. It breaks the system tests currently, and I
haven't reviewed what exactly breaks, so I cannot tell what's up.

Here's how it currently looks like, it's incomplete and won't work and
breaks the current stuff, and doesn't yet have base64/quotedprintable
decoders.

? mime.diff
? contrib/.deps
Index: lexer.h
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/lexer.h,v
retrieving revision 1.19
diff -u -r1.19 lexer.h
--- lexer.h	11 Dec 2002 13:56:36 -0000	1.19
+++ lexer.h	11 Dec 2002 15:58:14 -0000
@@ -14,9 +14,12 @@
     BOUNDARY,	/* MIME multipart boundary line */
     IPADDR,	/* ip address */
     CHARSET,	/* charset="..." */
+    EMPTY,	/* empty line */
+    MIME_VERSION,
+    MIME_ENCODING,
+    MIME_TYPE,
     TRANSFER,	/* content-transfer-encoding: */
     UUENCODE,	/* uuencoded line */
-    EMPTY	/* empty line */
 } token_t;
 
 extern char *yylval;
Index: lexer.l
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/lexer.l,v
retrieving revision 1.48
diff -u -r1.48 lexer.l
--- lexer.l	11 Dec 2002 13:41:50 -0000	1.48
+++ lexer.l	11 Dec 2002 15:58:14 -0000
@@ -20,6 +20,7 @@
 #include "lexer.h"
 #include "fgetsl.h"
 #include "xmalloc.h"
+#include "xstrdup.h"
 
 /*
  * Our lexical analysis is different from Paul Graham's rules: 
@@ -57,19 +58,129 @@
 
 #define YY_DECL token_t yylex(void)
 
+enum mimeencoding { MIME_7BIT, MIME_8BIT, MIME_BINARY, MIME_QP, MIME_BASE64 };
+
+enum mimetype { MIME_MULTIPART, MIME_MESSAGE, MIME_TEXT, MIME_OTHER };
+
 char *yylval;
+
+#define MIME_STACK_MAX 100
+
+static int stackp = 0;
+
 static int past_header;
 static int yyinput(char *buf, int max_size);
 
+static token_t save_class = NONE;
+static char save_text[256];
+
+struct msg_state {
+    char *charset;
+    char *boundary; /* only valid if mime_type is MIME_MULTIPART or
+		       MIME_MESSAGE */
+    int mime_header;
+    int mime_mail;
+    enum mimeencoding mime_encoding;
+    enum mimetype mime_type;
+} msg_stack[MIME_STACK_MAX];
+
+static void resetmsgstate(struct msg_state *ms, int new)
+{
+    ms->mime_mail = 0;
+    ms->mime_header = 0;
+    ms->mime_type = MIME_TEXT;
+    ms->mime_encoding = MIME_7BIT;
+    if (!new && ms->boundary) xfree(ms->boundary);
+    ms->boundary = NULL;
+    if (!new && ms->charset) xfree(ms->charset);
+    ms->charset = xstrdup("US-ASCII");
+}
+
+/* skips whitespace, returns NULL when ran into end of string */
+static char *skipws(char *t, char *e) {
+    while(t < e && isspace((unsigned char)*t))
+	t++;
+    if (t < e) return t;
+    return NULL;
+}
+
+/* skips [ws]";"[ws] */
+static char *skipsemi(char *t, char *e) {
+    if (!(t = skipws(t, e))) return NULL;
+    if (*t == ';') t++;
+    return skipws(t, e);
+}
+
+/* get next MIME word, NULL when none found.
+ * caller must free returned string with xfree() */
+static char *getmimew(char *t, char *e) {
+    int quote = 0;
+    char *ts;
+    char *n;
+    t = skipws(t, e);
+    if (!t) return NULL;
+    if (*t == '"') {
+	quote++;
+	t++;
+    }
+    ts = t;
+    while(t < e && quote ? *t != '"' : !isspace((unsigned char)*t)) {
+	t++;
+    }
+    n = xmalloc(t - ts + 1);
+    strlcpy(n, t, t - ts + 1);
+    return n;
+}
+
+static char *getparam(char *t, char *e, const char *param) {
+    char *w, *u;
+
+    return NULL; /* NOT YET IMPLEMENTED */
+}
+
+static void mime_version(void) {
+    msg_stack[stackp].mime_mail = 1;
+    msg_stack[stackp].mime_header = 1;
+}
+
+static void mime_encoding(void) {
+}
+
+static void mime_type(void) {
+    char *w = getmimew(yytext, yytext + yyleng);
+    enum mimetype t = MIME_OTHER;
+    if (!w) return;
+    if (strncasecmp(w, "text/", 5)) { t = MIME_TEXT; }
+    else if (strncasecmp(w, "multipart/", 10)) { t = MIME_MULTIPART; }
+    else if (strncasecmp(w, "message/", 8)) { t = MIME_MESSAGE; }
+    msg_stack[stackp].mime_type = t;
+    switch(t) {
+	case MIME_TEXT:
+	    /* XXX: read charset */
+	    break;
+	case MIME_OTHER:
+	    return;
+	case MIME_MULTIPART:
+	case MIME_MESSAGE:
+	    /* XXX: read boundary */
+	    break;
+    }
+    /* XXX: incomplete */
+}
+static void mime_boundary(void) {
+}
+
 %}
 
-%option align nounput noyywrap noreject 8bit
+%option align nounput noyywrap noreject 8bit debug caseless
 
 BASE64		^([A-Za-z0-9/+]+={1,2}|[A-Za-z0-9/+]{32,})$
 UUENCODE	^M[^ ]{60}X?$
 UINT8		([01]?[0-9]?[0-9]|2([0-4][0-9]|5[0-5]))
 IPADDR		{UINT8}\.{UINT8}\.{UINT8}\.{UINT8}
-MIME_BOUNDARY	^--[^[:blank:][:cntrl:]]+
+BCHARSNOSPC	[0-9a-zA-Z'()+_,-./:=?]
+BCHARS		[ 0-9a-zA-Z'()+_,-./:=?]
+MIME_BOUNDARY	{BCHARS}*{BCHARSNOSPC}
 
 %%
 
@@ -228,7 +339,10 @@
 \<\!--	;
 -->	;
 
-^From\ 						{past_header = 0; return(FROM);}
+^MIME-Version:.*				{ mime_version(); }
+^Content-Transfer-Encoding:.*			{ mime_encoding(); }
+^Content-Type:.*				{ mime_type(); }
+^From\ 						{ stackp = 0; resetmsgstate(&msg_stack[stackp], 0); return(FROM); }
 ^Date:.*|Delivery-Date:.*			;
 ^Message-ID:.*					;
 {BASE64}					;
@@ -239,12 +353,12 @@
 boundary=.*					;
 name=\"?					;
 filename=\"?					;
-{MIME_BOUNDARY}(--)?$				{return (BOUNDARY);}
+^--{MIME_BOUNDARY}(--)?$			{ mime_boundary(); return (BOUNDARY); }
 
 {IPADDR}					{return(IPADDR);}
 [^[:blank:][:cntrl:][:digit:][:punct:]][^][:blank:]<>;=():&%$#@!+|/\\{}^\"?\*,[:cntrl:][]+[^[:blank:][:punct:][:cntrl:]]	{return(TOKEN);} 
 .						;
-^\n						{past_header = 1; /* eat token */}
+^\n						{ past_header = 1; msg_stack[stackp].mime_header = 0; /* eat token */}
 \n						;
 
 %%
@@ -252,7 +366,7 @@
 static int yyinput(char *buf, int max_size)
 /* input getter for the scanner */
 {
-    int i, returned;
+    int i, c, returned;
 
     static size_t hdrlen = 0;
     if (hdrlen==0)
@@ -271,6 +385,24 @@
 	} while (returned != -1 && isspace((unsigned char)*buf));
     }
 
+
+    do {
+	if (EOF != (c = fgetc(yyin))) {
+	    if (c == ' ' || c == '\t') {
+		int add;
+		/* continuation line */
+		ungetc(c,yyin);
+		if (buf[returned - 1] == '\n') returned --;
+		add = fgetsl(buf + returned, max_size - returned, yyin);
+		if (add == EOF) break;
+		returned += add;
+	    } else {
+		ungetc(c,yyin);
+		c = EOF; /* end marker */
+	    }
+	}
+    } while(c != EOF);
+
     if (returned == -1) {
 	if (ferror(yyin)) {
 	    PRINT_ERROR("input in flex scanner failed\n");
@@ -278,6 +410,12 @@
 	} else {
 	    return YY_NULL;
 	}
+    }
+
+    if (1) { /* debug */
+	fprintf(stderr, "%d: ", returned);
+	fwrite(buf, 1, returned, stderr);
+	fprintf(stderr, "\n");
     }
 
     if (passthrough)




More information about the bogofilter-dev mailing list