QDBM and transactions

Stefan Bellon sbellon at sbellon.de
Sat Oct 30 16:20:02 CEST 2004


Stefan Bellon wrote:
> David Relson wrote:
> > You've got a deal.  How does Sunday at noon GMT sound?

> Ok. :-)

All right, attached is the patch.

When releasing it, there should be a note in the ReadMes that whoever
uses QDBM, should do the following *before* upgrading:

$ bogoutil -d wordlist.db > wordlist

And the following *after* upgrading:

$ bogoutil -l wordlist.db < wordlist

-- 
Stefan Bellon
-------------- next part --------------
--- bogofilter-0.92.99.cvs/datastore_qdbm.c	Fri Oct 29 03:19:29 2004
+++ bogofilter-new/datastore_qdbm.c	Sat Oct 30 16:13:17 2004
@@ -8,13 +8,15 @@
 AUTHORS:
 Gyepi Sam <gyepi at praxis-sw.com>          2003
 Matthias Andree <matthias.andree at gmx.de> 2003
-Stefan Bellon <sbellon at sbellon.de>       2003
+Stefan Bellon <sbellon at sbellon.de>       2003-2004
 
 ******************************************************************************/
 
 #include "common.h"
 
 #include <depot.h>
+#include <cabin.h>
+#include <villa.h>
 #include <stdlib.h>
 
 #include "datastore.h"
@@ -24,25 +26,16 @@
 #include "xmalloc.h"
 #include "xstrdup.h"
 
-/* initial bucket array element count (for new data base) */
-static const int DB_INITBNUM = 1913;
-
-/* align to size for quick overwrites. */
-static const int DB_ALIGNSIZE = 12;
-
-/* reorganize data base if record/bucket ratio grows above this 
- * hence "FILL" for "bucket fill ratio". */
-static const double DB_MAXFILL = 0.8;
+#define UNUSED(x) ((void)&x)
 
 typedef struct {
     char *path;
     char *name;
     bool locked;
     bool created;
-    DEPOT *dbp;
+    VILLA *dbp;
 } dbh_t;
 
-
 /* dummy infrastructure, to be expanded by environment
  * or transactional initialization/shutdown */
 
@@ -50,11 +43,27 @@
 
 /* Function definitions */
 
+int cmpkey(const char *aptr, int asiz, const char *bptr, int bsiz)
+{
+    int aiter, biter;
+    
+    for (aiter = 0, biter = 0; aiter < asiz && biter < bsiz; ++aiter, ++biter) {
+        if (aptr[aiter] != bptr[biter])
+            return (aptr[aiter] < bptr[biter]) ? -1 : 1;
+    }
+    
+    if (aiter == asiz && biter == bsiz)
+        return 0;
+    
+    return (aiter == asiz) ? -1 : 1;
+}
+
+
 const char *db_version_str(void)
 {
     static char v[80];
     if (!v[0])
-	snprintf(v, sizeof(v), "QDBM (version %s, Depot API)", dpversion);
+	snprintf(v, sizeof(v), "QDBM (Depot version %s, Villa API)", dpversion);
     return v;
 }
 
@@ -93,7 +102,8 @@
 /* Returns is_swapped flag */
 bool db_is_swapped(void *vhandle)
 {
-    (void) vhandle;		/* suppress compiler warning */
+    UNUSED(vhandle);
+
     return false;
 }
 
@@ -115,21 +125,21 @@
     dbh_t *handle;
 
     int open_flags;
-    DEPOT *dbp;
+    VILLA *dbp;
 
     if (open_mode & DS_WRITE)
-	open_flags = DP_OWRITER;
+	open_flags = VL_OWRITER;
     else
-	open_flags = DP_OREADER;
+	open_flags = VL_OREADER;
 
     handle = dbh_init(db_file, name);
 
     if (handle == NULL) return NULL;
 
-    dbp = handle->dbp = dpopen(handle->name, open_flags, DB_INITBNUM);
+    dbp = handle->dbp = vlopen(handle->name, open_flags, cmpkey);
 
     if ((dbp == NULL) && (open_mode & DS_WRITE)) {
-	dbp = handle->dbp = dpopen(handle->name, open_flags | DP_OCREAT, DB_INITBNUM);
+	dbp = handle->dbp = vlopen(handle->name, open_flags|VL_OCREAT, cmpkey);
 	if (dbp != NULL)
 	    handle->created = true;
     }
@@ -137,20 +147,13 @@
     if (dbp == NULL)
 	goto open_err;
 
-    if (open_flags & DP_OWRITER) {
-	if (!dpsetalign(dbp, DB_ALIGNSIZE)){
-	    dpclose(dbp);
-	    goto open_err;
-	}
-    }
-
     if (DEBUG_DATABASE(1))
-	fprintf(dbgout, "(qdbm) dpopen( %s, %d )\n", handle->name, open_mode);
+	fprintf(dbgout, "(qdbm) vlopen( %s, %d )\n", handle->name, open_mode);
 
     return handle;
 
  open_err:
-    print_error(__FILE__, __LINE__, "(qdbm) dpopen(%s, %d) failed: %s",
+    print_error(__FILE__, __LINE__, "(qdbm) vlopen(%s, %d) failed: %s",
 		handle->name, open_flags, dperrmsg(dpecode));
     dbh_free(handle);
 
@@ -162,13 +165,13 @@
 {
     int ret;
     dbh_t *handle = vhandle;
-    DEPOT *dbp;
+    VILLA *dbp;
 
     dbp = handle->dbp;
-    ret = dpout(dbp, token->data, token->leng);
+    ret = vlout(dbp, token->data, token->leng);
 
     if (ret == 0) {
-	print_error(__FILE__, __LINE__, "(qdbm) dpout('%.*s'), err: %s",
+	print_error(__FILE__, __LINE__, "(qdbm) vlout('%.*s'), err: %s",
 		    CLAMP_INT_MAX(token->leng),
 		    (char *)token->data, dperrmsg(dpecode));
 	exit(EX_ERROR);
@@ -185,9 +188,9 @@
     int dsiz;
 
     dbh_t *handle = vhandle;
-    DEPOT *dbp = handle->dbp;
+    VILLA *dbp = handle->dbp;
 
-    data = dpget(dbp, token->data, token->leng, 0, -1, &dsiz);
+    data = vlget(dbp, token->data, token->leng, &dsiz);
 
     if (data == NULL)
 	return DS_NOTFOUND;
@@ -204,25 +207,25 @@
     val->leng = dsiz;		/* read count */
     memcpy(val->data, data, dsiz);
 
-    free(data); /* not xfree() as allocated by dpget() */
+    free(data); /* not xfree() as allocated by vlget() */
 
     return 0;
 }
 
 
 /*
-   Re-organize database when fill ratio > DB_MAXFILL
+   Re-organize database according to some heuristics
 */
-static inline void db_optimize(DEPOT *dbp, char *name)
+static inline void db_optimize(VILLA *dbp, char *name)
 {
-    int bnum = dpbnum(dbp); /* very cheap: O(1) */
-    int rnum = dprnum(dbp); /* very cheap: O(1) */
-    if (bnum > 0 && rnum > 0 && ((double)rnum / bnum > DB_MAXFILL)) {
-	if (!dpoptimize(dbp, -1))
-	    print_error(__FILE__, __LINE__,
-			"(qdbm) dpoptimize for %s failed: %s",
-			name, dperrmsg(dpecode));
-    }
+    UNUSED(dbp);
+    UNUSED(name);
+
+    /* The Villa API doesn't need optimizing like the formerly used
+       Depot API because Villa uses B+ trees and Depot uses hash tables.
+       Database size may grow larger and could get compacted with
+       vloptimize() however as the database size with Villa is smaller
+       anyway, I don't think it is worth it. */
 }
 
 
@@ -230,9 +233,9 @@
 {
     int ret;
     dbh_t *handle = vhandle;
-    DEPOT *dbp = handle->dbp;
+    VILLA *dbp = handle->dbp;
 
-    ret = dpput(dbp, token->data, token->leng, val->data, val->leng, DP_DOVER);
+    ret = vlput(dbp, token->data, token->leng, val->data, val->leng, VL_DOVER);
 
     if (ret == 0) {
 	print_error(__FILE__, __LINE__,
@@ -254,19 +257,19 @@
 void db_close(void *vhandle)
 {
     dbh_t *handle = vhandle;
-    DEPOT *dbp;
+    VILLA *dbp;
 
     if (handle == NULL) return;
 
     if (DEBUG_DATABASE(1))
-	fprintf(dbgout, "(qdbm) dpclose(%s)\n", handle->name);
+	fprintf(dbgout, "(qdbm) vlclose(%s)\n", handle->name);
 
     dbp = handle->dbp;
 
     db_optimize(dbp, handle->name);
 
-    if (!dpclose(dbp))
-	print_error(__FILE__, __LINE__, "(qdbm) dpclose for %s failed: %s",
+    if (!vlclose(dbp))
+	print_error(__FILE__, __LINE__, "(qdbm) vlclose for %s failed: %s",
 		    handle->name, dperrmsg(dpecode));
 
     handle->dbp = NULL;
@@ -281,10 +284,10 @@
 void db_flush(void *vhandle)
 {
     dbh_t *handle = vhandle;
-    DEPOT * dbp = handle->dbp;
+    VILLA * dbp = handle->dbp;
 
-    if (!dpsync(dbp))
-	print_error(__FILE__, __LINE__, "(qdbm) dpsync failed: %s",
+    if (!vlsync(dbp))
+	print_error(__FILE__, __LINE__, "(qdbm) vlsync failed: %s",
 		    dperrmsg(dpecode));
 }
 
@@ -294,16 +297,16 @@
     int ret = 0;
 
     dbh_t *handle = vhandle;
-    DEPOT *dbp = handle->dbp;
+    VILLA *dbp = handle->dbp;
 
     dbv_t dbv_key, dbv_data;
     int ksiz, dsiz;
     char *key, *data;
 
-    ret = dpiterinit(dbp);
+    ret = vlcurfirst(dbp);
     if (ret) {
-	while ((key = dpiternext(dbp, &ksiz))) {
-	    data = dpget(dbp, key, ksiz, 0, -1, &dsiz);
+	while ((key = vlcurkey(dbp, &ksiz))) {
+	    data = vlcurval(dbp, &dsiz);
 	    if (data) {
 		/* switch to "dbv_t *" variables */
 		dbv_key.leng = ksiz;
@@ -324,9 +327,11 @@
 		free(data); /* not xfree() as allocated by dpget() */
 	    }
 	    free(key); /* not xfree() as allocated by dpiternext() */
+
+	    vlcurnext(dbp);
 	}
     } else {
-	print_error(__FILE__, __LINE__, "(qdbm) dpiterinit err: %s",
+	print_error(__FILE__, __LINE__, "(qdbm) vlcurfirst err: %s",
 		    dperrmsg(dpecode));
 	exit(EX_ERROR);
     }
@@ -335,7 +340,8 @@
 }
 
 
-const char *db_str_err(int e) {
+const char *db_str_err(int e)
+{
     return dperrmsg(e);
 }
 
@@ -350,9 +356,32 @@
     init = false;
 }
 
-/* dummy infrastructure, to be expanded by environment
- * or transactional initialization/shutdown */
-int db_txn_begin(void *d) { (void)d; return 0; }
-int db_txn_abort(void *d) { (void)d; return 0; }
-int db_txn_commit(void *d) { (void)d; return 0; }
-int db_recover(int a, int b) { (void)a; (void)b; return 0; }
+/** begin transaction. Returns 0 for success. */
+int db_txn_begin(void *d)
+{
+    UNUSED(d);
+
+    return 0;
+}
+
+int db_txn_abort(void *d)
+{
+    UNUSED(d);
+
+    return 0;
+}
+
+int db_txn_commit(void *d)
+{
+    UNUSED(d);
+
+    return 0;
+}
+
+int db_recover(int a, int b)
+{
+    UNUSED(a);
+    UNUSED(b);
+
+    return 0;
+}


More information about the bogofilter-dev mailing list