DB backend support for lmdb?

Steffen Nurpmeso steffen at sdaoden.eu
Tue Jul 17 01:35:07 CEST 2018


Ahoi!  And HA!!

Steffen Nurpmeso wrote in <20180716221310.qDo5g%steffen at sdaoden.eu>:
 |Steffen Nurpmeso wrote in <20180715001400.pHS5O%steffen at sdaoden.eu>:
 ||Matthias Andree wrote in <d38d0bd4-eb6e-6b0e-6252-6bf5223818f5 at an3e.de>:
 |||Am 21.06.2018 um 16:14 schrieb Steffen Nurpmeso:
 |||> Steffen Nurpmeso wrote in <20180529185111.0IjYp%steffen at sdaoden.eu>:
 |||>|Matthias Andree <matthias at an3e.de> wrote:
 |||>||Am 28.05.2018 um 23:57 schrieb Steffen Nurpmeso:
 |||>  ...
 ||Sorry for the long delay, this Thursday finally i have found time,
 | ...
 ||causes some tests to fail.  It is too late now, i have to go, and
 ||revisit that on Monday.
 ...
 |So i post what i have, it is what i thought would be final.
 |I thought only, because we have five failing tests still.

This has not changed.  I will try on a GlibC and on FreeBSD
tomorrow.

 |This, however, is far less than what i get if i symlink the
 |executables in src/ to native AlpineLinux bogofilter (sqlite) or
 |my own compiled BerkeleyDB version.  Maybe that is the false way
 |to do things, can i somehow regulary specify where the executables
 |come from?  That is, could there also be Musl libC or busybox
 |tools related problems?
 |
 |But, beside this i can reproduce a problem with a large database
 ...
 |mdb_cursor_put().  I have yet not understood why this is the case.

This seems to be a bug in LMDB.  If i do not call set_mapsize()
when creating the database from scratch, then the crash does not
happen!  I thought it was only an integer assignment, but it seems
there is some magic algorithm going on in LMDB.  Anyway: works!!!
However... the performance is pretty bad:

  /usr/local/bin/s-nail exit status 0, resource usage info:
          User time                          : 9:220997 (sec:usec)
          System time                        : 46:329358 (sec:usec)
          Max. resident size / Shared mem.   : 35744 / 0
          Integral unshared data / stack     : 0 / 0
          Minor / Major page faults / Swaps  : 323047 / 0 / 0
          Block input / output  operations   : 0 / 752784
          Messages sent / received           : 0 / 0
          Signals received                   : 0
          Voluntary / invol. context switches: 21273 / 3657

On real hardware or a more modern system it may be better.
Pretty tough stuff, i wonder why it is so slow.  Anyway, please
find below the little diff on top of the former one.

Thank you!!
Ciao and good night,

diff --git a/bogofilter/src/datastore_lmdb.c b/bogofilter/src/datastore_lmdb.c
index 3f1b80c..3ab53d6 100644
--- a/bogofilter/src/datastore_lmdb.c
+++ b/bogofilter/src/datastore_lmdb.c
@@ -33,7 +33,7 @@
  */
 
 /* mdb_env_set_maxreaders() */
-#define a_BFLM_MAXREADERS 63
+#define a_BFLM_MAXREADERS 15
 
 /* Minimum/initial database size, and DB size grow.
  * Space it so that a DB load does not run against walls too many times.
@@ -93,7 +93,7 @@ struct a_bflm{
 struct a_bflm_txn_cache{
     struct a_bflm_txn_cache *bflmtc_last;
     struct a_bflm_txn_cache *bflmtc_next;   /* Needs to be build before use! */
-    char *bflmtc_caster;    /* Current caster; NULL: full */
+    char *bflmtc_caster;    /* Current caster */
     char *bflmtc_max;       /* Maximum usable byte, exclusive */
     /* Actually points to &self[1] TODO [0] or [8], dep. __STDC_VERSION__! */
     char *bflmtc_data;
@@ -120,7 +120,7 @@ static char const *a_bflm_txn_cache_put(struct a_bflm *bflmp, MDB_val *key,
  * Return NULL on success or an error message otherwise */
 static char const *a_bflm_txn_cache_replay(struct a_bflm *bflmp);
 
-/* Free the recovery stach and possible heap data */
+/* Free the recovery stack and possible heap data */
 static void a_bflm_txn_cache_free(struct a_bflm *bflmp);
 
 static dsm_t /* TODO const*/ a_bflm_dsm = {
@@ -177,11 +177,15 @@ a_bflm_init(bfpath *bfp){
     /* The "problem" is that we need to set_mapsize() before env_open(),
      * otherwise the LMDB default will be used as a default (in 0.9.22).
      * But since this is cheap at this point just do it.. */
+    /* TODO We may not do this because with v0.9.22 a further DB open
+     * TODO may crash in mdb_*_put() after a growing _mapsize! */
+#if 0
     e = mdb_env_set_mapsize(rv->bflm_env, a_BFLM_MINSIZE);
     if(e != MDB_SUCCESS){
         emsg = "mdb_env_set_mapsize()";
         goto jerr2;
     }
+#endif
 
     e = mdb_env_open(rv->bflm_env, rv->bflm_filepath, MDB_NOSUBDIR, 0660);
     if(e != MDB_SUCCESS){
@@ -195,8 +199,9 @@ a_bflm_init(bfpath *bfp){
     rv->bflm_mapsize = envinfo.me_mapsize;
 
     if(rv->bflm_flags & a_BFLM_DEBUG)
-        fprintf(dbgout, "LMDB[%ld]: init: %p/%s\n",
-            (long)getpid(), rv, rv->bflm_filepath);
+        fprintf(dbgout, "LMDB[%ld]: init: %p/%s, mapsize: %lu\n",
+            (long)getpid(), rv, rv->bflm_filepath,
+            (unsigned long)rv->bflm_mapsize);
 jleave:
     return rv;
 
@@ -339,10 +344,14 @@ a_bflm_txn_commit(void *vhandle){
     retries = 0;
 jredo:
     e = mdb_txn_commit(bflmp->bflm_txn);
-    if(e == MDB_MAP_FULL && ++retries <= a_BFLM_GROW_TRIES &&
-            a_bflm_txn_mapfull(bflmp, false)){
-        mdb_cursor_close(bflmp->bflm_cursor);
-        goto jredo;
+    if(e != MDB_SUCCESS){
+        if(e == MDB_MAP_FULL && ++retries <= a_BFLM_GROW_TRIES &&
+                a_bflm_txn_mapfull(bflmp, false)){
+            mdb_cursor_close(bflmp->bflm_cursor);
+            goto jredo;
+        }
+        mdb_txn_abort(bflmp->bflm_txn);
+        e = MDB_PANIC;
     }
 
     a_bflm_txn_cache_free(bflmp);
@@ -361,6 +370,7 @@ jleave:
 
 static bool
 a_bflm_txn_mapfull(struct a_bflm *bflmp, bool close_cursor){
+    MDB_envinfo envinfo;
     char const *emsg;
     int e;
     size_t i;
@@ -381,7 +391,8 @@ a_bflm_txn_mapfull(struct a_bflm *bflmp, bool close_cursor){
         emsg = "mdb_env_set_mapsize()";
         goto jerr1;
     }
-    bflmp->bflm_mapsize = i;
+    /* no error defined */mdb_env_info(bflmp->bflm_env, &envinfo);
+    bflmp->bflm_mapsize = envinfo.me_mapsize;
 
     /* Recreate transaction */
     e = mdb_txn_begin(bflmp->bflm_env, NULL, 0, &bflmp->bflm_txn);

--steffen
|
|Der Kragenbaer,                The moon bear,
|der holt sich munter           he cheerfully and one by one
|einen nach dem anderen runter  wa.ks himself off
|(By Robert Gernhardt)


More information about the bogofilter-dev mailing list