[cvs] bogofilter/src charset.c, 1.13, 1.14 charset.h, 1.1, 1.2 collect.c, 1.36, 1.37 lexer.c, 1.103, 1.104 mime.c, 1.32, 1.33 mime.h, 1.19, 1.20

Mon Jan 3 15:16:55 CET 2005

relson at users.sourceforge.net writes:

>  static void map_windows_1251(void)
>  {
> -#ifdef	WINDOWS_1251_to_CYRILLIC
> +#ifdef	CP866
>      /* Map:  windows-1251 -> KOI8-R (Cyrillic) */
>      /* Contributed by: Yar Tikhiy (yarq at users.sourceforge.net) */
>      static char xlate_1251[] = {
> -	0xA8, 0xB3,	
> -	0xB8, 0xA3,	
> +	0xA8, 0xB3,
> +	0xB8, 0xA3,
>  	0xE0, 0xC1,  0xE1, 0xC2,  0xE2, 0xD7,  0xE3, 0xC7,  0xE4, 0xC4,  0xE5, 0xC5,  0xE6, 0xD6,  0xE7, 0xDA,
>  	0xE8, 0xC9,  0xE9, 0xCA,  0xEA, 0xCB,  0xEB, 0xCC,  0xEC, 0xCD,  0xED, 0xCE,  0xEE, 0xCF,  0xEF, 0xD0,
>  	0xF0, 0xD2,  0xF1, 0xD3,  0xF2, 0xD4,  0xF3, 0xD5,  0xF4, 0xC6,  0xF5, 0xC8,  0xF6, 0xC3,  0xF7, 0xDE,
> @@ -285,6 +290,98 @@
>  #endif
>  }

What is this function doing?

Why are we converting directly from one codepage to another?

> +int  htmlUNICODE_decode(byte *buf, int len)

And what does this function do?

> -/* Check for lines wholly composed of printable characters as they can cause a scanner abort 
> +/* Check for lines wholly composed of printable characters as they can cause a scanner abort
>     "input buffer overflow, can't enlarge buffer because scanner uses REJECT"
>  */
>  static bool not_long_token(byte *buf, uint count)
>  {
>      uint i;
>      for (i=0; i < count; i += 1) {
> -	byte c = buf[i];
> -	if ((iscntrl(c) || isspace(c) || ispunct(c)) && (c != '_'))
> -	    return true;
> +       byte c = buf[i];
> +       if ((iscntrl(c) || isspace(c) || ispunct(c)) && (c != '_'))
> +	   return true;
>      }
>      return false;
>  }

Please no formatting changes intermixed with code changes.

> @@ -436,10 +435,10 @@
>  static void mime_encoding(word_t * text)
>  {
>      size_t i;
> -    size_t l;
> +const size_t l =  sizeof("Content-Transfer-Encoding:") - 1;
>      char *w;
>  
> -    l = strlen("Content-Transfer-Encoding:");
> +//    l = strlen("Content-Transfer-Encoding:");

This hunk doesn't look right to me. strlen (string lenght) is not the
same as sizeof (required storage). Please revert.

> @@ -464,11 +463,10 @@
>  
>  static void mime_type(word_t * text)
>  {
> -    size_t l;
> +    const size_t l = sizeof("Content-Type:");
>      char *w;
>      struct type_s *typ;
>  
> -    l = strlen("Content-Type:");
>      w = (char *) getword(text->text + l, text->text + text->leng);
>  
>      if (!w)

Same here.

> +void mime_type2(word_t * text)

What does this do? Why a mile-long #if 0? The whole mime.* change is
undocumented and I don't see why we might need it, what it changes or does.

> +{
> +    char *w = text->text;
> +    struct type_s *typ;
> +
> +    if (!w)
> +	return;
> +
> +    msg_state->mime_type = MIME_TYPE_UNKNOWN;
> +    for (typ = mime_type_table;
> +	 typ < mime_type_table + COUNTOF(mime_type_table); typ += 1) {
> +	if (strncasecmp(w, typ->name, typ->len) == 0) {
> +	    msg_state->mime_type = typ->type;
> +	    if (DEBUG_MIME(1) || DEBUG_LEXER(1))
> +		fprintf(dbgout, "*** mime_type: %s\n", text->text);
> +	    break;
> +	}
> +    }
> +    if (DEBUG_MIME(0) && msg_state->mime_type == MIME_TYPE_UNKNOWN)
> +	fprintf(stderr, "Unknown mime type - '%s'\n", w);
> +#if	0
> +    switch (msg_state->mime_type) {
> +    case MIME_TEXT:
> +    case MIME_TEXT_HTML:
> +    case MIME_TEXT_PLAIN:
> +	/* XXX: read charset */
> +	return;
> +    case MIME_TYPE_UNKNOWN:
> +	return;
> +    case MIME_MULTIPART:
> +	return;
> +    case MIME_MESSAGE:
> +	/* XXX: read boundary */
> +	return;
> +    case MIME_APPLICATION:
> +	/* XXX: read boundary */
> +	return;
> +    case MIME_IMAGE:
> +	/* XXX: read boundary */
> +	return;
> +    }
> +#endif
> +    return;
> +}
> +
>  void mime_boundary_set(word_t * text)
>  {
>      byte *boundary = text->text;

-- 
Matthias Andree