libgmime installation and build requirements
Matthias Andree
matthias.andree at gmx.de
Wed Dec 11 16:59:48 CET 2002
> OTOH, I already have fgetsl read folded headers (up to the maximum
> buffer size) which is necessary to figure boundaries easily with flex.
Euhm, that's get_token. It breaks the system tests currently, and I
haven't reviewed what exactly breaks, so I cannot tell what's up.
Here's how it currently looks like, it's incomplete and won't work and
breaks the current stuff, and doesn't yet have base64/quotedprintable
decoders.
? mime.diff
? contrib/.deps
Index: lexer.h
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/lexer.h,v
retrieving revision 1.19
diff -u -r1.19 lexer.h
--- lexer.h 11 Dec 2002 13:56:36 -0000 1.19
+++ lexer.h 11 Dec 2002 15:58:14 -0000
@@ -14,9 +14,12 @@
BOUNDARY, /* MIME multipart boundary line */
IPADDR, /* ip address */
CHARSET, /* charset="..." */
+ EMPTY, /* empty line */
+ MIME_VERSION,
+ MIME_ENCODING,
+ MIME_TYPE,
TRANSFER, /* content-transfer-encoding: */
UUENCODE, /* uuencoded line */
- EMPTY /* empty line */
} token_t;
extern char *yylval;
Index: lexer.l
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/lexer.l,v
retrieving revision 1.48
diff -u -r1.48 lexer.l
--- lexer.l 11 Dec 2002 13:41:50 -0000 1.48
+++ lexer.l 11 Dec 2002 15:58:14 -0000
@@ -20,6 +20,7 @@
#include "lexer.h"
#include "fgetsl.h"
#include "xmalloc.h"
+#include "xstrdup.h"
/*
* Our lexical analysis is different from Paul Graham's rules:
@@ -57,19 +58,129 @@
#define YY_DECL token_t yylex(void)
+enum mimeencoding { MIME_7BIT, MIME_8BIT, MIME_BINARY, MIME_QP, MIME_BASE64 };
+
+enum mimetype { MIME_MULTIPART, MIME_MESSAGE, MIME_TEXT, MIME_OTHER };
+
char *yylval;
+
+#define MIME_STACK_MAX 100
+
+static int stackp = 0;
+
static int past_header;
static int yyinput(char *buf, int max_size);
+static token_t save_class = NONE;
+static char save_text[256];
+
+struct msg_state {
+ char *charset;
+ char *boundary; /* only valid if mime_type is MIME_MULTIPART or
+ MIME_MESSAGE */
+ int mime_header;
+ int mime_mail;
+ enum mimeencoding mime_encoding;
+ enum mimetype mime_type;
+} msg_stack[MIME_STACK_MAX];
+
+static void resetmsgstate(struct msg_state *ms, int new)
+{
+ ms->mime_mail = 0;
+ ms->mime_header = 0;
+ ms->mime_type = MIME_TEXT;
+ ms->mime_encoding = MIME_7BIT;
+ if (!new && ms->boundary) xfree(ms->boundary);
+ ms->boundary = NULL;
+ if (!new && ms->charset) xfree(ms->charset);
+ ms->charset = xstrdup("US-ASCII");
+}
+
+/* skips whitespace, returns NULL when ran into end of string */
+static char *skipws(char *t, char *e) {
+ while(t < e && isspace((unsigned char)*t))
+ t++;
+ if (t < e) return t;
+ return NULL;
+}
+
+/* skips [ws]";"[ws] */
+static char *skipsemi(char *t, char *e) {
+ if (!(t = skipws(t, e))) return NULL;
+ if (*t == ';') t++;
+ return skipws(t, e);
+}
+
+/* get next MIME word, NULL when none found.
+ * caller must free returned string with xfree() */
+static char *getmimew(char *t, char *e) {
+ int quote = 0;
+ char *ts;
+ char *n;
+ t = skipws(t, e);
+ if (!t) return NULL;
+ if (*t == '"') {
+ quote++;
+ t++;
+ }
+ ts = t;
+ while(t < e && quote ? *t != '"' : !isspace((unsigned char)*t)) {
+ t++;
+ }
+ n = xmalloc(t - ts + 1);
+ strlcpy(n, t, t - ts + 1);
+ return n;
+}
+
+static char *getparam(char *t, char *e, const char *param) {
+ char *w, *u;
+
+ return NULL; /* NOT YET IMPLEMENTED */
+}
+
+static void mime_version(void) {
+ msg_stack[stackp].mime_mail = 1;
+ msg_stack[stackp].mime_header = 1;
+}
+
+static void mime_encoding(void) {
+}
+
+static void mime_type(void) {
+ char *w = getmimew(yytext, yytext + yyleng);
+ enum mimetype t = MIME_OTHER;
+ if (!w) return;
+ if (strncasecmp(w, "text/", 5)) { t = MIME_TEXT; }
+ else if (strncasecmp(w, "multipart/", 10)) { t = MIME_MULTIPART; }
+ else if (strncasecmp(w, "message/", 8)) { t = MIME_MESSAGE; }
+ msg_stack[stackp].mime_type = t;
+ switch(t) {
+ case MIME_TEXT:
+ /* XXX: read charset */
+ break;
+ case MIME_OTHER:
+ return;
+ case MIME_MULTIPART:
+ case MIME_MESSAGE:
+ /* XXX: read boundary */
+ break;
+ }
+ /* XXX: incomplete */
+}
+static void mime_boundary(void) {
+}
+
%}
-%option align nounput noyywrap noreject 8bit
+%option align nounput noyywrap noreject 8bit debug caseless
BASE64 ^([A-Za-z0-9/+]+={1,2}|[A-Za-z0-9/+]{32,})$
UUENCODE ^M[^ ]{60}X?$
UINT8 ([01]?[0-9]?[0-9]|2([0-4][0-9]|5[0-5]))
IPADDR {UINT8}\.{UINT8}\.{UINT8}\.{UINT8}
-MIME_BOUNDARY ^--[^[:blank:][:cntrl:]]+
+BCHARSNOSPC [0-9a-zA-Z'()+_,-./:=?]
+BCHARS [ 0-9a-zA-Z'()+_,-./:=?]
+MIME_BOUNDARY {BCHARS}*{BCHARSNOSPC}
%%
@@ -228,7 +339,10 @@
\<\!-- ;
--> ;
-^From\ {past_header = 0; return(FROM);}
+^MIME-Version:.* { mime_version(); }
+^Content-Transfer-Encoding:.* { mime_encoding(); }
+^Content-Type:.* { mime_type(); }
+^From\ { stackp = 0; resetmsgstate(&msg_stack[stackp], 0); return(FROM); }
^Date:.*|Delivery-Date:.* ;
^Message-ID:.* ;
{BASE64} ;
@@ -239,12 +353,12 @@
boundary=.* ;
name=\"? ;
filename=\"? ;
-{MIME_BOUNDARY}(--)?$ {return (BOUNDARY);}
+^--{MIME_BOUNDARY}(--)?$ { mime_boundary(); return (BOUNDARY); }
{IPADDR} {return(IPADDR);}
[^[:blank:][:cntrl:][:digit:][:punct:]][^][:blank:]<>;=():&%$#@!+|/\\{}^\"?\*,[:cntrl:][]+[^[:blank:][:punct:][:cntrl:]] {return(TOKEN);}
. ;
-^\n {past_header = 1; /* eat token */}
+^\n { past_header = 1; msg_stack[stackp].mime_header = 0; /* eat token */}
\n ;
%%
@@ -252,7 +366,7 @@
static int yyinput(char *buf, int max_size)
/* input getter for the scanner */
{
- int i, returned;
+ int i, c, returned;
static size_t hdrlen = 0;
if (hdrlen==0)
@@ -271,6 +385,24 @@
} while (returned != -1 && isspace((unsigned char)*buf));
}
+
+ do {
+ if (EOF != (c = fgetc(yyin))) {
+ if (c == ' ' || c == '\t') {
+ int add;
+ /* continuation line */
+ ungetc(c,yyin);
+ if (buf[returned - 1] == '\n') returned --;
+ add = fgetsl(buf + returned, max_size - returned, yyin);
+ if (add == EOF) break;
+ returned += add;
+ } else {
+ ungetc(c,yyin);
+ c = EOF; /* end marker */
+ }
+ }
+ } while(c != EOF);
+
if (returned == -1) {
if (ferror(yyin)) {
PRINT_ERROR("input in flex scanner failed\n");
@@ -278,6 +410,12 @@
} else {
return YY_NULL;
}
+ }
+
+ if (1) { /* debug */
+ fprintf(stderr, "%d: ", returned);
+ fwrite(buf, 1, returned, stderr);
+ fprintf(stderr, "\n");
}
if (passthrough)
More information about the bogofilter-dev
mailing list