Better database??
michael at optusnet.com.au
michael at optusnet.com.au
Tue Mar 2 04:42:32 CET 2004
I've been playing with word pairs and such like to improve the accuracy
of bogofilter and was getting very frustrated with db as a datastore.
In particular, db is slow and bulky. My tiny little database of only
15 million tokens is almost a gigabyte in size! DB grows without
bound and deals badly with hapaxs and long tokens.
So I did something about it: A new data store!
Use ./configure --with-datastore=hash ...
The hash datastore is a _lossy_ datastore. It doesn't guarentee
that it won't drop some data.
It works by taking a 64 bit signature of the token you're trying to store,
then hashing into a bucket to store the signature and the token counts.
It's possible that two tokens may hash sign to the same value (improbable
though).
There's a maximum of 80 buckets that any given signature can be stored
in (the 'range'). If none of those buckets are empty, then a hapax in
that range will be discarded in favour of the new token. If there are
no hapaxs then the new token will be _discarded_.
What's good about it:
* It's very cpu efficent.
* It's very space efficent. :) (16 bytes per token always).
* It never grows in size.
* It automatically cleans up hapaxs if need be.
* It uses no external libraries! (wave bye-bye to the db disaster area).
* It's very small and easy to understand.
* It's corruption resistant.
What's bad about it:
* There's a (small) chance it will lose data.
* It never grows in size.
* The machine _must_ support a working sane mmap.
* It's not endian independant. (You can't copy a database from a
big endian machine to a little endian machine or vice versa).
* It doesn't store the date the token was last used (not needed).
* It doesn't actually store tokens, only token signatures. So
bogoutil -d looks a little odd.
* It doesn't yet handle multiple writers, although that's very easy to
add.
* There's currently no way to customize the number of buckets without
editing the source.
You get a million buckets, and by jupiter you'll like them!
Comments welcome. (This is step one in my evil plan to add multi-word
phrase recognition).
[ Little rant: The datastore abstraction layer works in exactly the
wrong direction. The API makes everything look db instead of presenting
the bogofilter data to the datastore in standard way. argh!!! ]
Without further ado: The diff.
Michael.
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/configure.ac bogofilter-dev/configure.ac
--- bogofilter-0.17.2/configure.ac 2004-02-23 04:08:32.000000000 +1100
+++ bogofilter-dev/configure.ac 2004-03-02 12:04:39.000000000 +1100
@@ -270,7 +270,7 @@
WITH_DB_ENGINE=db
AC_ARG_WITH(database,
AC_HELP_STRING([--with-database=ENGINE],
- [Choose database engine {db|tdb|qdbm} [[ENGINE=db]]]),
+ [Choose database engine {db|tdb|qdbm|hash} [[ENGINE=db]]]),
[ WITH_DB_ENGINE=$withval ]
)
@@ -307,6 +307,11 @@
])],,AC_MSG_ERROR(Cannot link to qdbm library.))
LIBS="$saveLIBS"
;;
+ xhash)
+ AC_DEFINE(ENABLE_HASH_DATASTORE,1, [Enable hash datastore])
+ AC_DEFINE_UNQUOTED(DB_TYPE, "hash")
+ AC_DEFINE_UNQUOTED(DB_EXT, ".chash")
+ ;;
xdb)
dnl check if we need LIBDB at all (it is harmful on BSD since
dnl they rename the library including soname)
@@ -384,7 +389,7 @@
LIBS="$saveLIBS"
;;
*)
- AC_MSG_ERROR([Invalid --with-database argument. Supported engines are db, tdb, qdbm.])
+ AC_MSG_ERROR([Invalid --with-database argument. Supported engines are db, tdb, qdbm, hash.])
;;
esac
@@ -398,6 +403,7 @@
AM_CONDITIONAL(ENABLE_TDB_DATASTORE, test "x$WITH_DB_ENGINE" = "xtdb")
AM_CONDITIONAL(ENABLE_QDBM_DATASTORE, test "x$WITH_DB_ENGINE" = "xqdbm")
+AM_CONDITIONAL(ENABLE_HASH_DATASTORE, test "x$WITH_DB_ENGINE" = "xhash")
dnl Use TRIO to replace missing snprintf/vsnprintf.
needtrio=0
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/bogohist.c bogofilter-dev/src/bogohist.c
--- bogofilter-0.17.2/src/bogohist.c 2004-02-21 04:05:41.000000000 +1100
+++ bogofilter-dev/src/bogohist.c 2004-03-02 13:01:38.000000000 +1100
@@ -140,7 +140,7 @@
if (dsh == NULL)
return EX_ERROR;
- ds_get_msgcounts(dsh, &val);
+ ds_get_msgcounts(dsh, &msgs_good, &msgs_bad);
msgs_bad = val.spamcount;
msgs_good = val.goodcount;
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/bogoutil.c bogofilter-dev/src/bogoutil.c
--- bogofilter-0.17.2/src/bogoutil.c 2004-02-21 04:05:41.000000000 +1100
+++ bogofilter-dev/src/bogoutil.c 2004-03-02 13:01:36.000000000 +1100
@@ -270,7 +270,7 @@
if (show_probability)
{
dsv_t val;
- ds_get_msgcounts(dsh, &val);
+ ds_get_msgcounts(dsh, &msgs_good, &msgs_bad);
msgs_good = val.goodcount;
msgs_bad = val.spamcount;
robs = ROBS;
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/datastore.c bogofilter-dev/src/datastore.c
--- bogofilter-0.17.2/src/datastore.c 2004-02-21 04:05:41.000000000 +1100
+++ bogofilter-dev/src/datastore.c 2004-03-02 13:02:32.000000000 +1100
@@ -329,23 +329,34 @@
/*
Get the number of messages associated with database.
*/
-bool ds_get_msgcounts(void *vhandle, dsv_t *val)
+bool ds_get_msgcounts(void *vhandle, int * good, int * bad)
{
int rc;
+ dsv_t val;
dsh_t *dsh = vhandle;
- rc = ds_read(dsh, msg_count_tok, val);
+
+ rc = ds_read(dsh, msg_count_tok, &val);
+ *good = val.goodcount;
+ *bad = val.spamcount;
+
return rc == 0;
}
/*
Set the number of messages associated with database.
*/
-void ds_set_msgcounts(void *vhandle, dsv_t *val)
+void ds_set_msgcounts(void *vhandle, int good, int bad)
{
dsh_t *dsh = vhandle;
- if (timestamp_tokens && val->date != 0)
- val->date = today;
- ds_write(dsh, msg_count_tok, val);
+ dsv_t val;
+
+ if (timestamp_tokens && val.date != 0)
+ val.date = today;
+
+ val.goodcount = good;
+ val.spamcount = bad;
+
+ ds_write(dsh, msg_count_tok, &val);
return;
}
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/datastore.h bogofilter-dev/src/datastore.h
--- bogofilter-0.17.2/src/datastore.h 2004-02-11 11:21:00.000000000 +1100
+++ bogofilter-dev/src/datastore.h 2004-03-02 13:02:56.000000000 +1100
@@ -126,10 +126,10 @@
extern void ds_updvalues(void *vhandle, const dbv_t *token, const dbv_t *updval);
/** Get the database message count */
-extern bool ds_get_msgcounts(void *vhandle, dsv_t *val);
+extern bool ds_get_msgcounts(void *vhandle, int * good, int * bad);
/** set the database message count */
-extern void ds_set_msgcounts(void *vhandle, dsv_t *val);
+extern void ds_set_msgcounts(void *vhandle, int good, int bad);
/* Get the current process id */
extern unsigned long ds_handle_pid(void *vhandle);
@@ -229,9 +229,9 @@
/** Get the database message count */
extern __inline
-void ds_get_msgcounts(void*, dbv_t *)
+void ds_get_msgcounts(void*, int * good, int * bad)
{
- return db_get_msgcounts(void*, dbv_t *)
+ return db_get_msgcounts(void*, int * , int * )
}
/** set the database message count */
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/datastore_hash.c bogofilter-dev/src/datastore_hash.c
--- bogofilter-0.17.2/src/datastore_hash.c 1970-01-01 10:00:00.000000000 +1000
+++ bogofilter-dev/src/datastore_hash.c 2004-03-02 14:31:23.000000000 +1100
@@ -0,0 +1,478 @@
+/* $Id: datastore_hash.c,v 1.1 2004/02/11 00:21:00 m-a Exp $ */
+
+/*****************************************************************************
+
+NAME:
+datastore_hash.c -- implements the datastore, using a crc64 hash table.
+
+AUTHORS:
+Michael O'Reilly 2004
+
+******************************************************************************/
+
+#include "common.h"
+
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "datastore.h"
+#include "datastore_db.h"
+#include "error.h"
+#include "paths.h"
+#include "xmalloc.h"
+#include "xstrdup.h"
+
+/* initial bucket array element count (for new data base) */
+static const int DB_BUCKETS = 2000000; /* 32 meg default database size. */
+static const int DB_RANGE = 80; /* search up to 80 buckets when there's a collision. */
+
+typedef struct {
+ uint32_t h1;
+ uint32_t h2;
+} hash_t;
+
+static hash_t CrcXor[256];
+static int DidCrcInit = 0;
+
+
+typedef struct {
+ uint32_t h1;
+ uint32_t h2;
+
+ uint32_t v[2]; /* Must equal IX_SIZE, but must not change either */
+} bucket_t;
+
+typedef struct {
+ uint32_t version; /* version number of structure. */
+ uint32_t buckets; /* Number of buckets. */
+ uint32_t locked; /* If the map is locked for writing, this holds the PID of the locking process. */
+ uint32_t range; /* How many buckets to search when there's a hash collision. */
+ uint32_t size; /* How many byte long this map is? */
+
+ uint32_t reserved[1019]; /* pad to 4096 bytes. */
+
+ bucket_t b[0]; /* array of 'buckets' elements. */
+} map_t;
+
+typedef struct {
+ char *path;
+ char *name;
+ char *logname;
+ bool locked;
+ int fd;
+ int mode;
+ map_t * map;
+ FILE * log;
+} dbh_t;
+
+
+/* Function definitions */
+
+const char *db_version_str(void)
+{
+ static char v[80];
+ if (!v[0])
+ snprintf(v, sizeof(v), "Hash DB(version %s)", "1.0" );
+ return v;
+}
+
+
+static dbh_t *dbh_init(const char *path, const char *name)
+{
+ dbh_t *handle;
+ size_t len = strlen(path) + strlen(name) + 2;
+
+ handle = xmalloc(sizeof(dbh_t));
+ memset(handle, 0, sizeof(dbh_t)); /* valgrind */
+
+ handle->path = xstrdup(path);
+
+ handle->name = xmalloc(len);
+ build_path(handle->name, len, path, name);
+
+ handle->logname = xmalloc(len + 5);
+ snprintf(handle->logname, len + 5, "%s.log", handle->name);
+
+ handle->locked = false;
+
+ return handle;
+}
+
+
+static void dbh_free(/*@only@*/ dbh_t *handle)
+{
+ if (handle != NULL) {
+ xfree(handle->name);
+ xfree(handle->path);
+ xfree(handle);
+ }
+ return;
+}
+
+
+/*
+ Initialize database.
+ Returns: pointer to database handle on success, NULL otherwise.
+*/
+void *db_open(const char *db_file, const char *name, dbmode_t open_mode)
+{
+ dbh_t *handle;
+ dsh_t * dsh;
+
+ map_t * map = NULL;
+ map_t read_map;
+
+ int fd = -1;
+
+ int flags;
+ uint32_t i;
+
+ handle = dbh_init(db_file, name);
+ handle->mode = open_mode;
+
+ if (open_mode == DB_WRITE) {
+ fd = open(handle->name, O_CREAT | O_RDWR, 0644);
+ flags = PROT_READ | PROT_WRITE;
+ } else {
+ fd = open(handle->name, O_RDONLY );
+ flags = PROT_READ;
+ }
+
+ if (fd < 0) /* open failed. */
+ goto open_err;
+
+ read(fd, (void*) &read_map, sizeof(read_map));
+
+
+
+ if (!read_map.version) { /* Initial open of file! */
+
+ if (open_mode != DB_WRITE) /* Not a valid file, but opening for read only!? */
+ goto open_err;
+
+ int size = sizeof(bucket_t) * DB_BUCKETS + sizeof(map_t);
+ ftruncate(fd, size);
+
+ read_map.version = 1;
+ read_map.buckets = DB_BUCKETS;
+ read_map.range = DB_RANGE;
+ read_map.locked = 0;
+ read_map.size = size;
+ }
+
+ map = (map_t*) mmap( NULL, read_map.size, flags, MAP_SHARED, fd, 0);
+ if (!map)
+ goto open_err;
+
+ if (!map->version) { /* Initialize this map. */
+ memcpy(map, &read_map, sizeof(read_map));
+
+ /* Touch every page in the map to keep the on-disk image unfragmented. */
+ for (i = 0; i < map->buckets ; ++i)
+ map->b[i].h1 = map->b[i].h2 = 0;
+
+ /* Wait for it to all write out to disk. */
+ msync(map, map->size, MS_SYNC);
+ }
+
+ if (DEBUG_DATABASE(1))
+ fprintf(dbgout, "(hash) db_open( %s, %d )\n", handle->name, open_mode);
+
+ handle->fd = fd;
+ handle->map = map;
+ if (handle->logname && open_mode == DB_WRITE)
+ handle->log = fopen( handle->logname, "a");
+
+ dsh = dsh_init(handle, false); /* Convert to dsh. */
+
+ return dsh;
+
+ open_err:
+ print_error(__FILE__, __LINE__, "(hash) db_open(%s, %d) failed: %s",
+ handle->name, flags, strerror(errno));
+ if (fd >= 0)
+ close(fd);
+ dbh_free(handle);
+
+ return NULL;
+}
+
+
+/* ----------------------------------------------- */
+/* Hash methods. */
+/* ----------------------------------------------- */
+
+#define HINIT1 0xFAC432B1
+#define HINIT2 0x0CD5E44A
+
+#define POLY1 0x00600340UL
+#define POLY2 0x00F0D50BUL
+
+static void
+crcinit(void)
+{
+ int i;
+
+ for (i = 0; i < 256; ++i) {
+ int j;
+ int v = i;
+ hash_t hv = { 0, 0 };
+
+ for (j = 0; j < 8; ++j, (v <<= 1)) {
+ if (v & 0x80) {
+ hv.h1 ^= POLY1;
+ hv.h2 ^= POLY2;
+ }
+ hv.h2 = (hv.h2 << 1);
+ if (hv.h1 & 0x80000000)
+ hv.h2 |= 1;
+ hv.h1 <<= 1;
+ }
+ CrcXor[i] = hv;
+ }
+ DidCrcInit = 1;
+}
+
+static hash_t
+hhash(const char *text, int len)
+{
+ hash_t t;
+
+ /*
+ * HASH_CRC - CRC64
+ */
+ if (DidCrcInit == 0) {
+ crcinit();
+ }
+
+ t.h1 = HINIT1;
+ t.h2 = HINIT2;
+
+ while (len--) {
+ int i = (t.h1 >> 24) & 255;
+ t.h1 = (t.h1 << 8) ^ (int)((uint32_t)t.h2 >> 24) ^ CrcXor[i].h1;
+ t.h2 = (t.h2 << 8) ^ (uint8_t)*text ^ CrcXor[i].h2;
+ ++text;
+ }
+
+ if (t.h1 & 0x80000000)
+ t.h1 = (t.h1 & 0x7FFFFFFF) ^ 1;
+ if (t.h2 & 0x80000000)
+ t.h2 = (t.h2 & 0x7FFFFFFF) ^ 1;
+
+ return(t);
+}
+
+
+
+/* ----------------------------------------------- */
+/* database methods. */
+/* ----------------------------------------------- */
+
+static bucket_t * find_bucket(map_t *m, char * data, int len, int create)
+{
+ hash_t h;
+ int s, range;
+
+ h = hhash(data, len);
+
+ s = (h.h1 ^ h.h2) % m->buckets; /* Find starting point. */
+ range = m->range;
+ while (range -- ) {
+ if (m->b[s].h1 == h.h1 && m->b[s].h2 == h.h2) {
+ return &m->b[s];
+ }
+ if (!m->b[s].h1 && !m->b[s].h2)
+ break;
+ ++s;
+ }
+
+ if (!create)
+ return NULL;
+
+ if (!m->b[s].h1 && !m->b[s].h2) { /* Search aborted early with an empty bucket. */
+ m->b[s].h1 = h.h1;
+ m->b[s].h2 = h.h2;
+ m->b[s].v[0] = m->b[s].v[1] = 0;
+ return &m->b[s];
+ }
+
+/* Oh bugger. All the buckets are used. Search for an appropriate bucket to reuse. */
+
+ range = m->range;
+ s -= range; /* reset search. */
+
+ while (range -- ) {
+ if (m->b[s].v[0] + m->b[s].v[1] <= 1) { /* Excellent! A hapax! */
+ m->b[s].h1 = h.h1;
+ m->b[s].h2 = h.h2;
+ m->b[s].v[0] = m->b[s].v[1] = 0;
+ return &m->b[s];
+ }
+ }
+
+ /* No empty buckets, no hapaxs. fail. */
+ return NULL;
+}
+
+
+int db_delete(dsh_t *dsh, const dbv_t *token)
+{
+ dbh_t *handle = dsh->dbh;
+ bucket_t * b;
+
+ b = find_bucket(handle->map, token->data, token->leng, 0);
+ if (!b)
+ return -1; /* Failed to find token. */
+
+ b->v[0] = b->v[1] = 0; /* make bucket available for re-use. */
+
+ return 0; /* 0 if ok */
+}
+
+
+/*
+ * This is a terrible hack.
+ *
+ * Because the data is stored compressed inside the mmap
+ * we need to convert the data back and forth.. :(
+ */
+int db_get_dbvalue(dsh_t *dsh, const dbv_t *token, /*@out@*/ dbv_t *val)
+{
+ dbh_t *handle = dsh->dbh;
+ bucket_t * b;
+ uint32_t * p;
+
+ b = find_bucket(handle->map, token->data, token->leng, 0);
+
+ if (!b)
+ return DS_NOTFOUND;
+
+ if (val->leng != 3 * sizeof(*p) ) {
+ print_error(__FILE__, __LINE__,
+ "(hash) db_get_dbvalue( '%.*s' ), size error %lu: %lu (MUST be 3 longs)",
+ CLAMP_INT_MAX(token->leng),
+ (char *)token->data, (unsigned long)val->leng,
+ (unsigned long)token->leng);
+ exit(EX_ERROR);
+ }
+
+ p = (uint32_t*) val->data;
+
+ p[0] = b->v[0];
+ p[1] = b->v[1];
+ p[2] = 0
+ return 0;
+}
+
+int db_set_dbvalue(dsh_t *dsh, const dbv_t *token, dbv_t *val)
+{
+ dbh_t *handle = dsh->dbh;
+ bucket_t * b;
+ uint32_t * p;
+
+ b = find_bucket(handle->map, token->data, token->leng, 1); /* create is true */
+
+ if (!b)
+ return DS_NOTFOUND;
+
+ if (!b->v[0] && !b->v[1]) { /* New bucket. */
+ fprintf(handle->log, "%08x%08x ", b->h1, b->h2);
+ fwrite(token->data, token->leng, 1, handle->log);
+ fwrite("\n", 1, 1, handle->log);
+ }
+
+ p = (uint32_t*) val->data;
+
+ b->v[0] = p[0];
+ b->v[1] = p[1];
+ return 0;
+}
+
+
+/*
+ Close files and clean up.
+*/
+void db_close(void *vhandle, bool nosync)
+{
+ dbh_t *handle = vhandle;
+
+ if (handle == NULL) return;
+
+ if (DEBUG_DATABASE(1))
+ fprintf(dbgout, "(hash) db_close( %s, %s )\n", handle->name, nosync ? "nosync" : "sync");
+
+ if (!nosync)
+ msync(handle->map, handle->map->size, MS_SYNC);
+
+ munmap(handle->map, handle->map->size );
+
+ close(handle->fd);
+
+ dbh_free(handle);
+}
+
+
+/*
+ Flush any data in memory to disk
+*/
+void db_flush(dsh_t *dsh)
+{
+ dbh_t *handle = dsh->dbh;
+
+ msync(handle->map, handle->map->size, MS_SYNC);
+}
+
+
+int db_foreach(dsh_t *dsh, db_foreach_t hook, void *userdata)
+{
+ unsigned int ret = 0, s;
+ map_t * m;
+ dbh_t *handle = dsh->dbh;
+
+ dbv_t dbv_key, dbv_data;
+ char *key;
+ uint32_t *data;
+
+ m = handle->map;
+ key = xmalloc(17); /* 64 value in hex. */
+ data = (uint32_t*) xmalloc(8); /* External data values. */
+
+ for (s = 0; s < m->buckets; ++s) {
+ if (!m->b[s].v[0] && !m->b[s].v[1]) /* Empty bucket? */
+ continue;
+
+ dbv_key.leng = 16;
+ dbv_key.data = (void*)key;
+
+ dbv_data.leng = 6;
+ dbv_data.data = (void*)data;
+
+ snprintf(key, 17, "%08x%08x", m->b[s].h1, m->b[s].h2 );
+
+ data[0] = m->b[s].v[0];
+ data[1] = m->b[s].v[1];
+ data[2] = 0;
+
+ ret = hook(&dbv_key, &dbv_data, userdata);
+
+ if (ret != 0)
+ break;
+ }
+
+ return 0;
+}
+
+
+const char *db_str_err(int e) {
+ printf("db_str_err %d\n", e);
+ return 0;
+}
+
+/* dummy infrastructure, to be expanded by environment
+ * or transactional initialization/shutdown */
+static bool init = false;
+int db_init(void) { init = true; return 0; }
+void db_cleanup(void) { init = false; }
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/Makefile.am bogofilter-dev/src/Makefile.am
--- bogofilter-0.17.2/src/Makefile.am 2004-02-23 03:57:00.000000000 +1100
+++ bogofilter-dev/src/Makefile.am 2004-03-02 12:07:46.000000000 +1100
@@ -140,9 +140,13 @@
if ENABLE_QDBM_DATASTORE
datastore_SOURCE = datastore_qdbm.c
else
+if ENABLE_HASH_DATASTORE
+datastore_SOURCE = datastore_hash.c
+else
datastore_SOURCE = datastore_db.c
endif
endif
+endif
datastore_OBJECT = $(datastore_SOURCE:.c=.o)
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/register.c bogofilter-dev/src/register.c
--- bogofilter-0.17.2/src/register.c 2004-01-31 16:05:17.000000000 +1100
+++ bogofilter-dev/src/register.c 2004-03-02 13:04:35.000000000 +1100
@@ -80,14 +80,16 @@
for (list = word_lists; list != NULL; list = list->next)
{
+ int spam_count, good_count;
+
/*
if (!list->active)
continue;
*/
- ds_get_msgcounts(list->dsh, &val);
- list->msgcount[IX_SPAM] = val.spamcount;
- list->msgcount[IX_GOOD] = val.goodcount;
+ ds_get_msgcounts(list->dsh, &good_count, &spam_count);
+ list->msgcount[IX_SPAM] = spam_count;
+ list->msgcount[IX_GOOD] = good_count;
if (incr != IX_UNDF)
list->msgcount[incr] += msgcount;
@@ -99,10 +101,10 @@
list->msgcount[decr] = 0;
}
- val.spamcount = list->msgcount[IX_SPAM];
- val.goodcount = list->msgcount[IX_GOOD];
+ spam_count = list->msgcount[IX_SPAM];
+ good_count = list->msgcount[IX_GOOD];
- ds_set_msgcounts(list->dsh, &val);
+ ds_set_msgcounts(list->dsh, good_count, spam_count);
ds_flush(list->dsh);
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/robx.c bogofilter-dev/src/robx.c
--- bogofilter-0.17.2/src/robx.c 2004-01-31 15:13:31.000000000 +1100
+++ bogofilter-dev/src/robx.c 2004-03-02 13:01:29.000000000 +1100
@@ -75,16 +75,13 @@
uint32_t good_cnt, spam_cnt;
struct robhook_data rh;
- ok = ds_get_msgcounts(dsh, &val);
+ ok = ds_get_msgcounts(dsh, &good_cnt, &spam_cnt);
if (!ok) {
fprintf(stderr, "Can't find message counts.\n");
exit(EX_ERROR);
}
- spam_cnt = val.spamcount;
- good_cnt = val.goodcount;
-
rh.scalefactor = (double)spam_cnt/(double)good_cnt;
rh.dsh = dsh;
rh.sum = 0.0;
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/version.c bogofilter-dev/src/version.c
--- bogofilter-0.17.2/src/version.c 2004-02-23 04:57:32.000000000 +1100
+++ bogofilter-dev/src/version.c 1970-01-01 10:00:00.000000000 +1000
@@ -1,2 +0,0 @@
-#include "globals.h"
-const char * const version = "0.17.2";
diff --exclude='*.spec' --exclude=configure --exclude=mkinstalldirs --exclude='*~' --exclude='*cache*' --exclude='*.in' --exclude='*.m4' --exclude=Makefile --new-file -ur bogofilter-0.17.2/src/wordlists.c bogofilter-dev/src/wordlists.c
--- bogofilter-0.17.2/src/wordlists.c 2004-02-21 04:00:00.000000000 +1100
+++ bogofilter-dev/src/wordlists.c 2004-03-02 13:05:05.000000000 +1100
@@ -89,10 +89,11 @@
exit(EX_ERROR);
} /* switch */
} else { /* ds_open */
- dsv_t val;
- ds_get_msgcounts(list->dsh, &val);
- list->msgcount[IX_GOOD] = val.goodcount;
- list->msgcount[IX_SPAM] = val.spamcount;
+ int good_count, spam_count;
+
+ ds_get_msgcounts(list->dsh, &good_count, &spam_count);
+ list->msgcount[IX_GOOD] = good_count;
+ list->msgcount[IX_SPAM] = spam_count;
} /* ds_open */
} /* for */
} while(retry);
More information about the Bogofilter
mailing list