Better database??

michael at optusnet.com.au michael at optusnet.com.au
Tue Mar 2 04:42:32 CET 2004


I've been playing with word pairs and such like to improve the accuracy
of bogofilter and was getting very frustrated with db as a datastore.

In particular, db is slow and bulky. My tiny little database of only
15 million tokens is almost a gigabyte in size! DB grows without
bound and deals badly with hapaxs and long tokens.

So I did something about it: A new data store!

Use ./configure --with-datastore=hash ...

The hash datastore is a _lossy_ datastore. It doesn't guarentee
that it won't drop some data.

It works by taking a 64 bit signature of the token you're trying to store,
then hashing into a bucket to store the signature and the token counts.

It's possible that two tokens may hash sign to the same value (improbable
though).

There's a maximum of 80 buckets that any given signature can be stored
in (the 'range'). If none of those buckets are empty, then a hapax in
that range will be discarded in favour of the new token. If there are
no hapaxs then the new token will be _discarded_.

What's good about it:
   * It's very cpu efficent.
   * It's very space efficent. :) (16 bytes per token always).
   * It never grows in size.
   * It automatically cleans up hapaxs if need be.
   * It uses no external libraries! (wave bye-bye to the db disaster area).
   * It's very small and easy to understand.
   * It's corruption resistant.

What's bad about it:
   * There's a (small) chance it will lose data.
   * It never grows in size.
   * The machine _must_ support a working sane mmap.
   * It's not endian independant. (You can't copy a database from a
        big endian machine to a little endian machine or vice versa).
   * It doesn't store the date the token was last used (not needed).
   * It doesn't actually store tokens, only token signatures. So
        bogoutil -d looks a little odd. 
   * It doesn't yet handle multiple writers, although that's very easy to
        add.
   * There's currently no way to customize the number of buckets without
        editing the source.
        You get a million buckets, and by jupiter you'll like them!

Comments welcome. (This is step one in my evil plan to add multi-word
phrase recognition).

[ Little rant: The datastore abstraction layer works in exactly the
wrong direction. The API makes everything look db instead of presenting
the bogofilter data to the datastore in standard way. argh!!! ]

Without further ado: The diff.

Michael.

diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/configure.ac bogofilter-dev/configure.ac
--- bogofilter-0.17.2/configure.ac	2004-02-23 04:08:32.000000000 +1100
+++ bogofilter-dev/configure.ac	2004-03-02 12:04:39.000000000 +1100
@@ -270,7 +270,7 @@
 WITH_DB_ENGINE=db
 AC_ARG_WITH(database,
 	    AC_HELP_STRING([--with-database=ENGINE],
-	    [Choose database engine {db|tdb|qdbm} [[ENGINE=db]]]),
+	    [Choose database engine {db|tdb|qdbm|hash} [[ENGINE=db]]]),
 	    [ WITH_DB_ENGINE=$withval ]
 )
 
@@ -307,6 +307,11 @@
 	])],,AC_MSG_ERROR(Cannot link to qdbm library.))
 	LIBS="$saveLIBS"
     ;;
+    xhash)
+	AC_DEFINE(ENABLE_HASH_DATASTORE,1, [Enable hash datastore])
+	AC_DEFINE_UNQUOTED(DB_TYPE, "hash")
+	AC_DEFINE_UNQUOTED(DB_EXT, ".chash")
+    ;;
     xdb)
 	dnl check if we need LIBDB at all (it is harmful on BSD since
 	dnl they rename the library including soname)
@@ -384,7 +389,7 @@
 	LIBS="$saveLIBS"
     ;;
     *)
-	AC_MSG_ERROR([Invalid --with-database argument. Supported engines are db, tdb, qdbm.])
+	AC_MSG_ERROR([Invalid --with-database argument. Supported engines are db, tdb, qdbm, hash.])
     ;;
 esac
 
@@ -398,6 +403,7 @@
 
 AM_CONDITIONAL(ENABLE_TDB_DATASTORE, test "x$WITH_DB_ENGINE" = "xtdb")
 AM_CONDITIONAL(ENABLE_QDBM_DATASTORE, test "x$WITH_DB_ENGINE" = "xqdbm")
+AM_CONDITIONAL(ENABLE_HASH_DATASTORE, test "x$WITH_DB_ENGINE" = "xhash")
 
 dnl Use TRIO to replace missing snprintf/vsnprintf.
 needtrio=0
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/bogohist.c bogofilter-dev/src/bogohist.c
--- bogofilter-0.17.2/src/bogohist.c	2004-02-21 04:05:41.000000000 +1100
+++ bogofilter-dev/src/bogohist.c	2004-03-02 13:01:38.000000000 +1100
@@ -140,7 +140,7 @@
     if (dsh == NULL)
 	return EX_ERROR;
 
-    ds_get_msgcounts(dsh, &val);
+    ds_get_msgcounts(dsh, &msgs_good, &msgs_bad);
     msgs_bad  = val.spamcount;
     msgs_good = val.goodcount;
 
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/bogoutil.c bogofilter-dev/src/bogoutil.c
--- bogofilter-0.17.2/src/bogoutil.c	2004-02-21 04:05:41.000000000 +1100
+++ bogofilter-dev/src/bogoutil.c	2004-03-02 13:01:36.000000000 +1100
@@ -270,7 +270,7 @@
     if (show_probability)
     {
 	dsv_t val;
-	ds_get_msgcounts(dsh, &val);
+	ds_get_msgcounts(dsh, &msgs_good, &msgs_bad);
 	msgs_good = val.goodcount;
 	msgs_bad  = val.spamcount;
 	robs = ROBS;
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/datastore.c bogofilter-dev/src/datastore.c
--- bogofilter-0.17.2/src/datastore.c	2004-02-21 04:05:41.000000000 +1100
+++ bogofilter-dev/src/datastore.c	2004-03-02 13:02:32.000000000 +1100
@@ -329,23 +329,34 @@
 /*
   Get the number of messages associated with database.
 */
-bool ds_get_msgcounts(void *vhandle, dsv_t *val)
+bool ds_get_msgcounts(void *vhandle, int * good, int * bad)
 {
     int rc;
+    dsv_t val;
     dsh_t *dsh = vhandle;
-    rc = ds_read(dsh, msg_count_tok, val);
+
+    rc = ds_read(dsh, msg_count_tok, &val);
+    *good = val.goodcount;
+    *bad = val.spamcount;
+    
     return rc == 0;
 }
 
 /*
  Set the number of messages associated with database.
 */
-void ds_set_msgcounts(void *vhandle, dsv_t *val)
+void ds_set_msgcounts(void *vhandle, int good, int bad)
 {
     dsh_t *dsh = vhandle;
-    if (timestamp_tokens && val->date != 0)
-	val->date = today;
-    ds_write(dsh, msg_count_tok, val);
+    dsv_t val;
+
+    if (timestamp_tokens && val.date != 0)
+	val.date = today;
+
+    val.goodcount = good;
+    val.spamcount = bad;
+    
+    ds_write(dsh, msg_count_tok, &val);
     return;
 }
 
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/datastore.h bogofilter-dev/src/datastore.h
--- bogofilter-0.17.2/src/datastore.h	2004-02-11 11:21:00.000000000 +1100
+++ bogofilter-dev/src/datastore.h	2004-03-02 13:02:56.000000000 +1100
@@ -126,10 +126,10 @@
 extern void ds_updvalues(void *vhandle, const dbv_t *token, const dbv_t *updval);
 
 /** Get the database message count */
-extern bool ds_get_msgcounts(void *vhandle, dsv_t *val);
+extern bool ds_get_msgcounts(void *vhandle, int * good, int * bad);
 
 /** set the database message count */
-extern void ds_set_msgcounts(void *vhandle, dsv_t *val);
+extern void ds_set_msgcounts(void *vhandle, int good, int bad);
 
 /* Get the current process id */
 extern unsigned long ds_handle_pid(void *vhandle);
@@ -229,9 +229,9 @@
 
 /** Get the database message count */
 extern __inline 
-void ds_get_msgcounts(void*, dbv_t *)
+void ds_get_msgcounts(void*, int * good, int * bad)
 {
-    return db_get_msgcounts(void*, dbv_t *)
+    return db_get_msgcounts(void*, int  * , int * )
 }
 
 /** set the database message count */
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/datastore_hash.c bogofilter-dev/src/datastore_hash.c
--- bogofilter-0.17.2/src/datastore_hash.c	1970-01-01 10:00:00.000000000 +1000
+++ bogofilter-dev/src/datastore_hash.c	2004-03-02 14:31:23.000000000 +1100
@@ -0,0 +1,478 @@
+/* $Id: datastore_hash.c,v 1.1 2004/02/11 00:21:00 m-a Exp $ */
+
+/*****************************************************************************
+
+NAME:
+datastore_hash.c -- implements the datastore, using a crc64 hash table.
+
+AUTHORS:
+Michael O'Reilly	2004
+
+******************************************************************************/
+
+#include "common.h"
+
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "datastore.h"
+#include "datastore_db.h"
+#include "error.h"
+#include "paths.h"
+#include "xmalloc.h"
+#include "xstrdup.h"
+
+/* initial bucket array element count (for new data base) */
+static const int DB_BUCKETS = 2000000; /* 32 meg default database size. */
+static const int DB_RANGE = 80; /* search up to 80 buckets when there's a collision. */
+
+typedef struct {
+    uint32_t       h1;
+    uint32_t       h2;
+} hash_t;
+
+static hash_t CrcXor[256];
+static int    DidCrcInit = 0;
+
+
+typedef struct {
+    uint32_t h1;
+    uint32_t h2;
+
+    uint32_t v[2]; /* Must equal IX_SIZE, but must not change either */
+} bucket_t;
+
+typedef struct {
+    uint32_t	version;	/* version number of structure. */
+    uint32_t	buckets;	/* Number of buckets. */
+    uint32_t	locked;		/* If the map is locked for writing, this holds the PID of the locking process. */
+    uint32_t	range;		/* How many buckets to search when there's a hash collision. */
+    uint32_t    size;		/* How many byte long this map is? */
+
+    uint32_t	reserved[1019]; /* pad to 4096 bytes. */
+
+    bucket_t 	b[0];		/* array of 'buckets' elements. */
+} map_t;
+
+typedef struct {
+    char *path;
+    char *name;
+    char *logname;
+    bool locked;
+    int fd;
+    int mode;
+    map_t * map;
+    FILE * log;
+} dbh_t;
+
+
+/* Function definitions */
+
+const char *db_version_str(void)
+{
+    static char v[80];
+    if (!v[0])
+	snprintf(v, sizeof(v), "Hash DB(version %s)", "1.0" );
+    return v;
+}
+
+
+static dbh_t *dbh_init(const char *path, const char *name)
+{
+    dbh_t *handle;
+    size_t len = strlen(path) + strlen(name) + 2;
+
+    handle = xmalloc(sizeof(dbh_t));
+    memset(handle, 0, sizeof(dbh_t));	/* valgrind */
+
+    handle->path = xstrdup(path);
+
+    handle->name = xmalloc(len);
+    build_path(handle->name, len, path, name);
+
+    handle->logname = xmalloc(len + 5);
+    snprintf(handle->logname, len + 5, "%s.log", handle->name);
+
+    handle->locked = false;
+
+    return handle;
+}
+
+
+static void dbh_free(/*@only@*/ dbh_t *handle)
+{
+    if (handle != NULL) {
+      xfree(handle->name);
+      xfree(handle->path);
+      xfree(handle);
+    }
+    return;
+}
+
+
+/*
+  Initialize database.
+  Returns: pointer to database handle on success, NULL otherwise.
+*/
+void *db_open(const char *db_file, const char *name, dbmode_t open_mode)
+{
+    dbh_t *handle;
+    dsh_t * dsh;
+
+    map_t * map = NULL;
+    map_t read_map;
+
+    int fd = -1;
+
+    int flags;
+    uint32_t i;
+
+    handle = dbh_init(db_file, name);
+    handle->mode = open_mode;
+
+    if (open_mode == DB_WRITE) {
+	fd = open(handle->name, O_CREAT | O_RDWR, 0644);
+	flags = PROT_READ | PROT_WRITE;
+    } else {
+	fd = open(handle->name, O_RDONLY );
+	flags = PROT_READ;
+    }
+
+    if (fd < 0) /* open failed. */
+	goto open_err;
+
+    read(fd, (void*) &read_map, sizeof(read_map));
+
+
+
+    if (!read_map.version) { /* Initial open of file! */
+
+	if (open_mode != DB_WRITE)	/* Not a valid file, but opening for read only!? */
+		goto open_err;
+
+	int size = sizeof(bucket_t) * DB_BUCKETS + sizeof(map_t);
+	ftruncate(fd, size);
+
+	read_map.version = 1;
+	read_map.buckets = DB_BUCKETS;
+	read_map.range = DB_RANGE;
+	read_map.locked = 0;
+	read_map.size = size;
+    }
+
+    map = (map_t*) mmap( NULL, read_map.size, flags, MAP_SHARED, fd, 0);
+    if (!map)
+	goto open_err;
+
+    if (!map->version) {	/* Initialize this map. */
+	memcpy(map, &read_map, sizeof(read_map));
+
+			/* Touch every page in the map to keep the on-disk image unfragmented. */
+	for (i = 0; i < map->buckets ; ++i)
+		map->b[i].h1 = map->b[i].h2 = 0;
+
+			/* Wait for it to all write out to disk. */
+	msync(map, map->size, MS_SYNC);
+    }
+
+    if (DEBUG_DATABASE(1))
+	fprintf(dbgout, "(hash) db_open( %s, %d )\n", handle->name, open_mode);
+
+    handle->fd = fd;
+    handle->map = map;
+    if (handle->logname && open_mode == DB_WRITE)
+      handle->log = fopen( handle->logname, "a");
+
+    dsh = dsh_init(handle, false);	/* Convert to dsh. */
+
+    return dsh;
+
+ open_err:
+    print_error(__FILE__, __LINE__, "(hash) db_open(%s, %d) failed: %s",
+		handle->name, flags, strerror(errno));
+    if (fd >= 0)
+	close(fd);
+    dbh_free(handle);
+
+    return NULL;
+}
+
+
+/* ----------------------------------------------- */
+/* Hash methods. 			           */
+/* ----------------------------------------------- */
+
+#define HINIT1  0xFAC432B1
+#define HINIT2  0x0CD5E44A
+
+#define POLY1   0x00600340UL
+#define POLY2   0x00F0D50BUL
+
+static void
+crcinit(void)
+{
+    int i;
+                                                                                                                                                       
+    for (i = 0; i < 256; ++i) {
+        int j;
+        int v = i;
+        hash_t hv = { 0, 0 };
+                                                                                                                                                       
+        for (j = 0; j < 8; ++j, (v <<= 1)) {
+            if (v & 0x80) {
+                hv.h1 ^= POLY1;
+                hv.h2 ^= POLY2;
+            }
+            hv.h2 = (hv.h2 << 1);
+            if (hv.h1 & 0x80000000)
+                hv.h2 |= 1;
+            hv.h1 <<= 1;
+        }
+        CrcXor[i] = hv;
+    }
+    DidCrcInit = 1;
+}
+
+static hash_t
+hhash(const char *text, int len)
+{
+    hash_t t;
+                                                                                                                                                       
+    /*
+     * HASH_CRC  - CRC64
+     */
+    if (DidCrcInit == 0) {
+        crcinit();
+    }
+
+    t.h1 = HINIT1;
+    t.h2 = HINIT2;
+                                                                                                                                                       
+    while (len--) {
+        int i = (t.h1 >> 24) & 255;
+        t.h1 = (t.h1 << 8) ^ (int)((uint32_t)t.h2 >> 24) ^ CrcXor[i].h1;
+        t.h2 = (t.h2 << 8) ^ (uint8_t)*text ^ CrcXor[i].h2;
+        ++text;
+    }
+
+    if (t.h1 & 0x80000000)
+        t.h1 = (t.h1 & 0x7FFFFFFF) ^ 1;
+    if (t.h2 & 0x80000000)
+        t.h2 = (t.h2 & 0x7FFFFFFF) ^ 1;
+    
+    return(t);
+}
+
+
+
+/* ----------------------------------------------- */
+/* database methods. */
+/* ----------------------------------------------- */
+
+static bucket_t * find_bucket(map_t *m, char * data, int len, int create)
+{
+    hash_t h;
+    int s, range;
+
+    h = hhash(data, len);
+
+    s = (h.h1 ^ h.h2) % m->buckets; /* Find starting point. */
+    range = m->range;
+    while (range -- ) {
+	if (m->b[s].h1 == h.h1 && m->b[s].h2 == h.h2) {
+		return &m->b[s];
+	}
+	if (!m->b[s].h1 && !m->b[s].h2)
+		break;
+	++s;
+    }
+
+    if (!create)
+	return NULL;
+
+    if (!m->b[s].h1 && !m->b[s].h2) { /* Search aborted early with an empty bucket. */
+	m->b[s].h1 = h.h1;
+	m->b[s].h2 = h.h2;
+	m->b[s].v[0] = m->b[s].v[1] = 0;
+	return &m->b[s];
+    }
+
+/* Oh bugger. All the buckets are used. Search for an appropriate bucket to reuse. */
+
+    range = m->range;
+    s -= range; /* reset search. */
+
+    while (range -- ) {
+	if (m->b[s].v[0] + m->b[s].v[1] <= 1) { /* Excellent! A hapax! */
+	    m->b[s].h1 = h.h1;
+	    m->b[s].h2 = h.h2;
+	    m->b[s].v[0] = m->b[s].v[1] = 0;
+	    return &m->b[s];
+	}
+    }
+
+	/* No empty buckets, no hapaxs. fail. */
+    return NULL;
+}
+
+
+int db_delete(dsh_t *dsh, const dbv_t *token)
+{
+    dbh_t *handle = dsh->dbh;
+    bucket_t * b;
+    
+    b = find_bucket(handle->map, token->data, token->leng, 0);
+    if (!b)
+	return -1; /* Failed to find token. */
+
+    b->v[0] = b->v[1] = 0;	/* make bucket available for re-use. */
+
+    return 0;		/* 0 if ok */
+}
+
+
+/*
+ * This is a terrible hack.
+ *
+ * Because the data is stored compressed inside the mmap
+ * we need to convert the data back and forth.. :(
+ */
+int db_get_dbvalue(dsh_t *dsh, const dbv_t *token, /*@out@*/ dbv_t *val)
+{
+    dbh_t *handle = dsh->dbh;
+    bucket_t * b;
+    uint32_t * p;
+    
+    b = find_bucket(handle->map, token->data, token->leng, 0);
+
+    if (!b)
+	return DS_NOTFOUND;
+
+    if (val->leng != 3 * sizeof(*p) ) {
+	print_error(__FILE__, __LINE__,
+		    "(hash) db_get_dbvalue( '%.*s' ), size error %lu: %lu (MUST be 3 longs)",
+		    CLAMP_INT_MAX(token->leng),
+		    (char *)token->data, (unsigned long)val->leng,
+		    (unsigned long)token->leng);
+	exit(EX_ERROR);
+    }
+
+    p = (uint32_t*) val->data;
+
+    p[0] = b->v[0];
+    p[1] = b->v[1];
+    p[2] = 0
+    return 0;
+}
+
+int db_set_dbvalue(dsh_t *dsh, const dbv_t *token, dbv_t *val)
+{
+    dbh_t *handle = dsh->dbh;
+    bucket_t * b;
+    uint32_t * p;
+    
+    b = find_bucket(handle->map, token->data, token->leng, 1); /* create is true */
+
+    if (!b)
+	return DS_NOTFOUND;
+
+    if (!b->v[0] && !b->v[1]) { /* New bucket. */
+      fprintf(handle->log, "%08x%08x ", b->h1, b->h2);
+      fwrite(token->data, token->leng, 1, handle->log);
+      fwrite("\n", 1, 1, handle->log);
+    }
+
+    p = (uint32_t*) val->data;
+ 
+    b->v[0] = p[0];
+    b->v[1] = p[1];
+    return 0;
+}
+
+
+/*
+   Close files and clean up.
+*/
+void db_close(void *vhandle, bool nosync)
+{
+    dbh_t *handle = vhandle;
+
+    if (handle == NULL) return;
+
+    if (DEBUG_DATABASE(1))
+	fprintf(dbgout, "(hash) db_close( %s, %s )\n", handle->name, nosync ? "nosync" : "sync");
+
+    if (!nosync)
+	msync(handle->map, handle->map->size, MS_SYNC);
+
+    munmap(handle->map, handle->map->size );
+
+    close(handle->fd);
+
+    dbh_free(handle);
+}
+
+
+/*
+   Flush any data in memory to disk
+*/
+void db_flush(dsh_t *dsh)
+{
+    dbh_t *handle = dsh->dbh;
+
+    msync(handle->map, handle->map->size, MS_SYNC);
+}
+
+
+int db_foreach(dsh_t *dsh, db_foreach_t hook, void *userdata)
+{
+    unsigned int ret = 0, s;
+    map_t * m;
+    dbh_t *handle = dsh->dbh;
+
+    dbv_t dbv_key, dbv_data;
+    char *key;
+    uint32_t *data;
+
+    m = handle->map;
+    key = xmalloc(17);	/* 64 value in hex. */
+    data = (uint32_t*) xmalloc(8);  /* External data values. */
+
+    for (s = 0; s < m->buckets; ++s) {
+	if (!m->b[s].v[0] && !m->b[s].v[1]) /* Empty bucket? */
+	    continue;
+
+	dbv_key.leng = 16;
+        dbv_key.data = (void*)key;
+
+	dbv_data.leng = 6;
+        dbv_data.data = (void*)data;
+
+	snprintf(key, 17, "%08x%08x", m->b[s].h1, m->b[s].h2 );
+	
+	data[0] = m->b[s].v[0];
+	data[1] = m->b[s].v[1];
+	data[2] = 0;
+
+	ret = hook(&dbv_key, &dbv_data, userdata);
+
+	if (ret != 0)
+	    break;
+    }
+
+    return 0;
+}
+
+
+const char *db_str_err(int e) {
+    printf("db_str_err %d\n", e);
+    return 0;
+}
+
+/* dummy infrastructure, to be expanded by environment
+ * or transactional initialization/shutdown */
+static bool init = false;
+int db_init(void) { init = true; return 0; }
+void db_cleanup(void) { init = false; }
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/Makefile.am bogofilter-dev/src/Makefile.am
--- bogofilter-0.17.2/src/Makefile.am	2004-02-23 03:57:00.000000000 +1100
+++ bogofilter-dev/src/Makefile.am	2004-03-02 12:07:46.000000000 +1100
@@ -140,9 +140,13 @@
 if ENABLE_QDBM_DATASTORE
 datastore_SOURCE = datastore_qdbm.c
 else
+if ENABLE_HASH_DATASTORE
+datastore_SOURCE = datastore_hash.c
+else
 datastore_SOURCE = datastore_db.c
 endif
 endif
+endif
 
 datastore_OBJECT = $(datastore_SOURCE:.c=.o)
 
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/register.c bogofilter-dev/src/register.c
--- bogofilter-0.17.2/src/register.c	2004-01-31 16:05:17.000000000 +1100
+++ bogofilter-dev/src/register.c	2004-03-02 13:04:35.000000000 +1100
@@ -80,14 +80,16 @@
 
   for (list = word_lists; list != NULL; list = list->next)
   {
+      int spam_count, good_count;
+    
 /*
       if (!list->active)
 	  continue;
 */
 
-      ds_get_msgcounts(list->dsh, &val);
-      list->msgcount[IX_SPAM] = val.spamcount;
-      list->msgcount[IX_GOOD] = val.goodcount;
+      ds_get_msgcounts(list->dsh, &good_count, &spam_count);
+      list->msgcount[IX_SPAM] = spam_count;
+      list->msgcount[IX_GOOD] = good_count;
 
       if (incr != IX_UNDF)
 	  list->msgcount[incr] += msgcount;
@@ -99,10 +101,10 @@
 	      list->msgcount[decr] = 0;
       }
 
-      val.spamcount = list->msgcount[IX_SPAM];
-      val.goodcount = list->msgcount[IX_GOOD];
+      spam_count = list->msgcount[IX_SPAM];
+      good_count = list->msgcount[IX_GOOD];
 
-      ds_set_msgcounts(list->dsh, &val);
+      ds_set_msgcounts(list->dsh, good_count, spam_count);
 
       ds_flush(list->dsh);
 
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/robx.c bogofilter-dev/src/robx.c
--- bogofilter-0.17.2/src/robx.c	2004-01-31 15:13:31.000000000 +1100
+++ bogofilter-dev/src/robx.c	2004-03-02 13:01:29.000000000 +1100
@@ -75,16 +75,13 @@
     uint32_t good_cnt, spam_cnt;
     struct robhook_data rh;
 
-    ok = ds_get_msgcounts(dsh, &val);
+    ok = ds_get_msgcounts(dsh, &good_cnt, &spam_cnt);
 
     if (!ok) {
 	fprintf(stderr, "Can't find message counts.\n");
 	exit(EX_ERROR);
     }
 
-    spam_cnt = val.spamcount;
-    good_cnt = val.goodcount;
-
     rh.scalefactor = (double)spam_cnt/(double)good_cnt;
     rh.dsh = dsh;
     rh.sum = 0.0;
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/version.c bogofilter-dev/src/version.c
--- bogofilter-0.17.2/src/version.c	2004-02-23 04:57:32.000000000 +1100
+++ bogofilter-dev/src/version.c	1970-01-01 10:00:00.000000000 +1000
@@ -1,2 +0,0 @@
-#include "globals.h"
-const char * const version = "0.17.2";
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/wordlists.c bogofilter-dev/src/wordlists.c
--- bogofilter-0.17.2/src/wordlists.c	2004-02-21 04:00:00.000000000 +1100
+++ bogofilter-dev/src/wordlists.c	2004-03-02 13:05:05.000000000 +1100
@@ -89,10 +89,11 @@
 			exit(EX_ERROR);
 		} /* switch */
 	    } else { /* ds_open */
-		dsv_t val;
-		ds_get_msgcounts(list->dsh, &val);
-		list->msgcount[IX_GOOD] = val.goodcount;
-		list->msgcount[IX_SPAM] = val.spamcount;
+	        int good_count, spam_count;
+	      
+		ds_get_msgcounts(list->dsh, &good_count, &spam_count);
+		list->msgcount[IX_GOOD] = good_count;
+		list->msgcount[IX_SPAM] = spam_count;
 	    } /* ds_open */
 	} /* for */
     } while(retry);




More information about the Bogofilter mailing list