DB backend support for lmdb?

Steffen Nurpmeso steffen at sdaoden.eu
Wed Jul 18 17:07:19 CEST 2018


Hallo and a very nice summer day!

Steffen Nurpmeso wrote in <20180717235921.8OO-E%steffen at sdaoden.eu>:
 |Matthias Andree wrote in <9a51593a-a5ce-06f3-6070-af25b1a6db34 at an3e.de>:
 ||Am 18.07.2018 um 01:19 schrieb Matthias Andree:
 ||> 
 ||> Before doing further development, please switch to
 ||> ^/branches/lmdb-support and/or update. I'm heading for bed now that
 ||> r7064 has been committed.
 ||
 ||...make that r7065.

So below what i have now, i cannot make it crash no more, while
still setting the minimum DB size, but after opening the DB
environment, not before.  And then also a new alternative
implementation, usable by defining a_BFLM_FIXED_SIZE, which does
not include all the cache replay stuff.  For this mode we _can_
set the minimum DB size before opening the DB, just as documented
in the LMDB doc iirc.

Both modes have three failing tests cases into which i will look
this evening: t.bogodir, t.encoding and t.bogoutil.  But then, if
i implement a real db_created(), which is still somehow faked, as
we just do not and cannot know whether we have created the file or
not, the i get failing tests t.encoding, t.maint and t.regtest.

I think this is in parts a problem of bogofilter, if i looked
yesterday i saw it takes three runs until finally this
db_created() results in something to be stored, so how could this
be implemented for LMDB without usage of access(2) on the DB path?
If i do "(rdonly ? 0440 : 0660)" then i get failure on t.bogodir
in addition.  The only remaining idea that i have would be to
open(2) ourselfs, i have not yet tried it.  It may be as easy as
that, but i have not yet really tried, maybe we would need to
silently skip opening a LMDB handle and simply treat this mode
a an "empty DB"; talking about read-only and non-existent that is.

Ciao!

--- datastore_lmdb.c.ma	2018-07-18 16:44:24.000000000 +0200
+++ datastore_lmdb.c	2018-07-18 17:01:20.778076814 +0200
@@ -30,28 +30,39 @@
  * In order to be able to deal with 2. we need to track all changes that are
  * performed in a txn, so that in case we are running against the wall we are
  * capable to replay all changes after having resized the map.
+ *
+ * Alternatively, define a_BFLM_FIXED_SIZE, in which case all the replay code
+ * is not compiled, but instead the given size is fixed, and any DB overflow
+ * results in program abortion.  Since the DB should only consume disc space
+ * for those pages which are used, this should not hurt in practice.
  */
 
+/* Alternative implementation: fixed DB size */
+/*#define a_BFLM_FIXED_SIZE (1u << 31)*/
+
 /* mdb_env_set_maxreaders() */
 #define a_BFLM_MAXREADERS 15
 
-/* Minimum/initial database size, and DB size grow.
- * Space it so that a DB load does not run against walls too many times.
- * We try _TRIES times to resize for a single new entry before giving up */
-#define a_BFLM_MINSIZE (1u << 21)
-#define a_BFLM_GROW (1u << 24)
-#define a_BFLM_GROW_TRIES 3
-
-/* Size of one chunk of the intermediate txn cache, as above.
- * Space it so that a DB load does not require all too many.
- * Of course, if a token requires more space, we allocate a larger chunk */
-#define a_BFLM_TXN_CACHE_SIZE (1u << 20)
-
-/* An entry consists of an uint32_t describing the length of the key.
- * If the high bit is set an uint32_t describing the length of the value
- * follows.  After the data buffers there possibly is alignment pad */
-#define a_BFLM_TXN_CACHE_ALIGN(X) \
+#ifndef a_BFLM_FIXED_SIZE
+    /* Minimum/initial database size, and DB size grow.
+     * Space it so that a DB load does not run against walls too many times.
+     * We try _TRIES times to resize for a single new entry before giving up */
+/*# define a_BFLM_MINSIZE 0x3FFFFFFFu*//*(01u<<30)*//*(1u << 21)*/
+# define a_BFLM_MINSIZE (1u<<21)
+# define a_BFLM_GROW (1u<<21)
+# define a_BFLM_GROW_TRIES 3
+
+    /* Size of one chunk of the intermediate txn cache, as above.
+     * Space it so that a DB load does not require all too many.
+     * Of course, if a token requires more space, we allocate a larger chunk */
+# define a_BFLM_TXN_CACHE_SIZE (1u << 20)
+
+    /* An entry consists of an uint32_t describing the length of the key.
+     * If the high bit is set an uint32_t describing the length of the value
+     * follows.  After the data buffers there possibly is alignment pad */
+# define a_BFLM_TXN_CACHE_ALIGN(X) \
     (((X) + (sizeof(uint32_t) - 1)) & ~(sizeof(uint32_t) - 1))
+#endif /* a_BFLM_FIXED_SIZE */
 
 #include "common.h"
 
@@ -76,20 +87,24 @@ enum a_bflm_flags{
     a_BFLM_NONE,
     a_BFLM_DEBUG = 1u<<0,
     a_BFLM_RDONLY = 1u<<1,
-    a_BFLM_HAS_TXN = 1u<<2
+    a_BFLM_DB_CREATED = 1u<<2,
+    a_BFLM_HAS_TXN = 1u<<3
 };
 
 struct a_bflm{
     char *bflm_filepath;    /* bfpath.filepath (points to &self[1]) */
     MDB_env *bflm_env;
-    size_t bflm_mapsize;    /* Current notion of DB map size */
     MDB_txn *bflm_txn;
     MDB_cursor *bflm_cursor;
     MDB_dbi bflm_dbi;
     uint32_t bflm_flags;
+#ifndef a_BFLM_FIXED_SIZE
+    size_t bflm_mapsize;    /* Current notion of DB map size */
     struct a_bflm_txn_cache *bflm_txn_cache;    /* Stack thereof */
+#endif
 };
 
+#ifndef a_BFLM_FIXED_SIZE
 struct a_bflm_txn_cache{
     struct a_bflm_txn_cache *bflmtc_last;
     struct a_bflm_txn_cache *bflmtc_next;   /* Needs to be build before use! */
@@ -98,9 +113,10 @@ struct a_bflm_txn_cache{
     /* Actually points to &self[1] TODO [0] or [8], dep. __STDC_VERSION__! */
     char *bflmtc_data;
 };
+#endif
 
 /**/
-static struct a_bflm *a_bflm_init(bfpath *bfp);
+static struct a_bflm *a_bflm_init(bfpath *bfp, bool rdonly);
 static void a_bflm_free(struct a_bflm *bflmp);
 
 /**/
@@ -108,6 +124,7 @@ static int a_bflm_txn_begin(void *vhandl
 static int a_bflm_txn_abort(void *vhandle);
 static int a_bflm_txn_commit(void *vhandle);
 
+#ifndef a_BFLM_FIXED_SIZE
 /**/
 static bool a_bflm_txn_mapfull(struct a_bflm *bflmp, bool close_cursor);
 
@@ -122,6 +139,7 @@ static char const *a_bflm_txn_cache_repl
 
 /* Free the recovery stack and possible heap data */
 static void a_bflm_txn_cache_free(struct a_bflm *bflmp);
+#endif /* a_BFLM_FIXED_SIZE */
 
 static dsm_t /* TODO const*/ a_bflm_dsm = {
     /* public -- used in datastore.c */
@@ -152,9 +170,11 @@ static dsm_t /* TODO const*/ a_bflm_dsm
 };
 
 static struct a_bflm *
-a_bflm_init(bfpath *bfp){
+a_bflm_init(bfpath *bfp, bool rdonly){
     /* No variable array for .bflm_filepath, use same method as in word.h */
+#ifndef a_BFLM_FIXED_SIZE
     MDB_envinfo envinfo;
+#endif
     int e;
     char const *emsg;
     struct a_bflm *rv;
@@ -165,8 +185,10 @@ a_bflm_init(bfpath *bfp){
     memset(rv, 0, sizeof *rv);
     memcpy(rv->bflm_filepath = (char*)&rv[1], bfp->filepath, i);
 
-    rv->bflm_flags = ((DEBUG_DATABASE(1) || getenv("BF_DEBUG_DB") != NULL)
-            ? a_BFLM_DEBUG : a_BFLM_NONE);
+    rv->bflm_flags = (((DEBUG_DATABASE(1) || getenv("BF_DEBUG_DB") != NULL)
+            ? a_BFLM_DEBUG : a_BFLM_NONE) |
+            (rdonly ? a_BFLM_RDONLY : a_BFLM_NONE));
+
     e = mdb_env_create(&rv->bflm_env);
     if(e != MDB_SUCCESS){
         emsg = "mdb_env_open()";
@@ -174,13 +196,11 @@ a_bflm_init(bfpath *bfp){
     }
 
     mdb_env_set_maxreaders(rv->bflm_env, a_BFLM_MAXREADERS);
-    /* The "problem" is that we need to set_mapsize() before env_open(),
-     * otherwise the LMDB default will be used as a default (in 0.9.22).
-     * But since this is cheap at this point just do it.. */
-    /* TODO We may not do this because with v0.9.22 a further DB open
-     * TODO may crash in mdb_*_put() after a growing _mapsize! */
-#if 0
-    e = mdb_env_set_mapsize(rv->bflm_env, a_BFLM_MINSIZE);
+    /* TODO We may not do this before opening the _env, because with v0.9.22
+     * TODO a further DB open will then crash in mdb_*_put() after a growing
+     * TODO _mapsize call! ... */
+#ifdef a_BFLM_FIXED_SIZE
+    e = mdb_env_set_mapsize(rv->bflm_env, a_BFLM_FIXED_SIZE);
     if(e != MDB_SUCCESS){
         emsg = "mdb_env_set_mapsize()";
         goto jerr2;
@@ -193,15 +213,29 @@ a_bflm_init(bfpath *bfp){
         goto jerr2;
     }
 
-    /* ..then query the actual environment and use the reported map size:
-     * Note: LMDB documents to reject requests to shrink the real map size! */
+    /* TODO ... But if we do it thereafter, anything is fine! */
+#ifndef a_BFLM_FIXED_SIZE
+    e = mdb_env_set_mapsize(rv->bflm_env, a_BFLM_MINSIZE);
+    if(e != MDB_SUCCESS){
+        emsg = "mdb_env_set_mapsize()";
+        goto jerr2;
+    }
     /* no error defined */mdb_env_info(rv->bflm_env, &envinfo);
     rv->bflm_mapsize = envinfo.me_mapsize;
+#endif
+
+    /* Let us fake a "has been created" event */
+    if(!(rv->bflm_flags & a_BFLM_RDONLY)){
+        MDB_stat s;
+
+        /* no error defined */mdb_env_stat(rv->bflm_env, &s);
+        if(s.ms_entries == 0)
+            rv->bflm_flags |= a_BFLM_DB_CREATED;
+    }
 
     if(rv->bflm_flags & a_BFLM_DEBUG)
-        fprintf(dbgout, "LMDB[%ld]: init: %p/%s, mapsize: %lu\n",
-            (long)getpid(), rv, rv->bflm_filepath,
-            (unsigned long)rv->bflm_mapsize);
+        fprintf(dbgout, "LMDB[%ld]: init: %p [%s]\n",
+            (long)getpid(), rv, rv->bflm_filepath);
 jleave:
     return rv;
 
@@ -219,11 +253,13 @@ jerr1:
 static void
 a_bflm_free(struct a_bflm *bflmp){
     if(bflmp != NULL){
+#ifndef a_BFLM_FIXED_SIZE
         if(bflmp->bflm_txn_cache != NULL){
             if(DEBUG_DATABASE(1))
                 fprintf(dbgout, "LMDB _free(): error: there is txn_cache!\n");
             a_bflm_txn_cache_free(bflmp);
         }
+#endif
 
         mdb_env_close(bflmp->bflm_env);
 
@@ -312,7 +348,9 @@ a_bflm_txn_abort(void *vhandle){
 
     mdb_cursor_close(bflmp->bflm_cursor);
     mdb_txn_abort(bflmp->bflm_txn);
+#ifndef a_BFLM_FIXED_SIZE
     a_bflm_txn_cache_free(bflmp);
+#endif
 
     bflmp->bflm_flags &= ~a_BFLM_HAS_TXN;
 jleave:
@@ -322,7 +360,10 @@ jleave:
 static int
 a_bflm_txn_commit(void *vhandle){
     struct a_bflm *bflmp;
-    int e, retries;
+#ifndef a_BFLM_FIXED_SIZE
+    int retries;
+#endif
+    int e;
 
     e = DST_OK;
 
@@ -341,20 +382,26 @@ a_bflm_txn_commit(void *vhandle){
 
     mdb_cursor_close(bflmp->bflm_cursor);
 
+#ifndef a_BFLM_FIXED_SIZE
     retries = 0;
 jredo:
+#endif
     e = mdb_txn_commit(bflmp->bflm_txn);
     if(e != MDB_SUCCESS){
+#ifndef a_BFLM_FIXED_SIZE
         if(e == MDB_MAP_FULL && ++retries <= a_BFLM_GROW_TRIES &&
                 a_bflm_txn_mapfull(bflmp, false)){
             mdb_cursor_close(bflmp->bflm_cursor);
             goto jredo;
         }
+#endif
         mdb_txn_abort(bflmp->bflm_txn);
         e = MDB_PANIC;
     }
 
+#ifndef a_BFLM_FIXED_SIZE
     a_bflm_txn_cache_free(bflmp);
+#endif
 
     bflmp->bflm_flags &= ~a_BFLM_HAS_TXN;
     if(e == MDB_SUCCESS)
@@ -368,6 +415,7 @@ jleave:
     return e;
 }
 
+#ifndef a_BFLM_FIXED_SIZE
 static bool
 a_bflm_txn_mapfull(struct a_bflm *bflmp, bool close_cursor){
     MDB_envinfo envinfo;
@@ -581,6 +629,7 @@ a_bflm_txn_cache_free(struct a_bflm *bfl
         xfree(bflmtcp);
     }
 }
+#endif /* a_BFLM_FIXED_SIZE */
 
 dsm_t /* const TODO */ *dsm = &a_bflm_dsm;
 
@@ -589,12 +638,9 @@ db_open(void *env, bfpath *bfp, dbmode_t
     struct a_bflm *bflmp;
     UNUSED(env);
 
-    if((bflmp = a_bflm_init(bfp)) == NULL)
+    if((bflmp = a_bflm_init(bfp, (open_mode == DS_READ))) == NULL)
         goto jleave;
 
-    if(open_mode == DS_READ)
-        bflmp->bflm_flags |= a_BFLM_RDONLY;
-
     if(bflmp->bflm_flags & a_BFLM_DEBUG)
         fprintf(dbgout, "LMDB[%ld]: db_open(%p [%s; rdonly=%d])\n",
             (long)getpid(), bflmp, bflmp->bflm_filepath,
@@ -629,7 +675,12 @@ db_is_swapped(void *vhandle){
 
 bool
 db_created(void *vhandle){
-    return (vhandle != NULL);
+    bool created;
+    struct a_bflm *bflmp;
+
+    created = ((bflmp = vhandle) != NULL &&
+            (bflmp->bflm_flags & a_BFLM_DB_CREATED) != 0);
+    return created;
 }
 
 int
@@ -649,10 +700,6 @@ db_get_dbvalue(void *vhandle, const dbv_
         goto jerr;
     }
 
-    if(DEBUG_DATABASE(3))
-        fprintf(dbgout, "LMDB db_get_dbvalue(): %lu <%.*s>\n",
-            (unsigned long)token->leng, (int)token->leng, token->data);
-
     key.mv_data = token->data;
     key.mv_size = token->leng;
     e = mdb_cursor_get(bflmp->bflm_cursor, &key, &val, MDB_SET);
@@ -670,6 +717,10 @@ db_get_dbvalue(void *vhandle, const dbv_
 
     e = 0;
 jleave:
+    if(DEBUG_DATABASE(3))
+        fprintf(dbgout, "LMDB db_get_dbvalue(): %lu <%.*s> -> %d\n",
+            (unsigned long)token->leng, (int)token->leng, token->data,
+            (e == 0));
     return e;
 jerr:
     if(e != MDB_NOTFOUND){
@@ -687,7 +738,10 @@ db_set_dbvalue(void *vhandle, const dbv_
     MDB_val key, val;
     char const *emsg;
     struct a_bflm *bflmp;
-    int e, retries;
+#ifndef a_BFLM_FIXED_SIZE
+    int retries;
+#endif
+    int e;
 
     e = 0;
 
@@ -699,30 +753,36 @@ db_set_dbvalue(void *vhandle, const dbv_
         goto jerr;
     }
 
-    if(DEBUG_DATABASE(3))
-        fprintf(dbgout, "LMDB db_set_dbvalue(): %lu <%.*s>\n",
-            (unsigned long)token->leng, (int)token->leng, token->data);
-
+#ifndef a_BFLM_FIXED_SIZE
     retries = 0;
 jredo:
+#endif
     key.mv_data = token->data;
     key.mv_size = token->leng;
     val.mv_data = value->data;
     val.mv_size = value->leng;
     e = mdb_cursor_put(bflmp->bflm_cursor, &key, &val, 0);
     if(e != MDB_SUCCESS){
+#ifndef a_BFLM_FIXED_SIZE
         if(e == MDB_MAP_FULL && ++retries <= a_BFLM_GROW_TRIES &&
                 a_bflm_txn_mapfull(bflmp, true))
             goto jredo;
+#endif
         emsg = "mdb_cursor_put()";
         goto jerr;
     }
 
+#ifndef a_BFLM_FIXED_SIZE
     if((emsg = a_bflm_txn_cache_put(bflmp, &key, &val)) != NULL)
         goto jerr;
+#endif
 
     e = 0;
 jleave:
+    if(DEBUG_DATABASE(3))
+        fprintf(dbgout, "LMDB db_set_dbvalue(): %lu <%.*s> -> %d\n",
+            (unsigned long)token->leng, (int)token->leng, token->data,
+            (e == 0));
     return e;
 jerr:
     print_error(__FILE__, __LINE__, "LMDB[%ld]: db_set_dbvalue(), %s: %d, %s",
@@ -735,7 +795,10 @@ db_delete(void *vhandle, const dbv_t *to
     MDB_val key;
     char const *emsg;
     struct a_bflm *bflmp;
-    int e, retries;
+#ifndef a_BFLM_FIXED_SIZE
+    int retries;
+#endif
+    int e;
 
     e = 0;
 
@@ -748,12 +811,10 @@ db_delete(void *vhandle, const dbv_t *to
         goto jerr;
     }
 
-    if(DEBUG_DATABASE(3))
-        fprintf(dbgout, "LMDB db_delete(): %lu <%.*s>\n",
-            (unsigned long)token->leng, (int)token->leng, token->data);
-
+#ifndef a_BFLM_FIXED_SIZE
     retries = 0;
 jredo:
+#endif
     key.mv_data = token->data;
     key.mv_size = token->leng;
     e = mdb_cursor_get(bflmp->bflm_cursor, &key, NULL, MDB_SET_KEY);
@@ -764,19 +825,27 @@ jredo:
 
     e = mdb_cursor_del(bflmp->bflm_cursor, 0);
     if(e != MDB_SUCCESS){
+#ifndef a_BFLM_FIXED_SIZE
         /* Should not happen, though */
         if(e == MDB_MAP_FULL && ++retries <= a_BFLM_GROW_TRIES &&
                 a_bflm_txn_mapfull(bflmp, true))
             goto jredo;
+#endif
         emsg = "mdb_cursor_del()";
         goto jerr;
     }
 
+#ifndef a_BFLM_FIXED_SIZE
     if((emsg = a_bflm_txn_cache_put(bflmp, &key, NULL)) != NULL)
         goto jerr;
+#endif
 
     e = 0;
 jleave:
+    if(DEBUG_DATABASE(3))
+        fprintf(dbgout, "LMDB db_delete(): %lu <%.*s> -> %d\n",
+            (unsigned long)token->leng, (int)token->leng, token->data,
+            (e == 0));
     return e;
 jerr:
     print_error(__FILE__, __LINE__, "LMDB[%ld]: db_delete(), %s: %d, %s",

--steffen
|
|Der Kragenbaer,                The moon bear,
|der holt sich munter           he cheerfully and one by one
|einen nach dem anderen runter  wa.ks himself off
|(By Robert Gernhardt)


More information about the bogofilter-dev mailing list