DB backend support for lmdb?
Steffen Nurpmeso
steffen at sdaoden.eu
Wed Jul 18 17:07:19 CEST 2018
Hallo and a very nice summer day!
Steffen Nurpmeso wrote in <20180717235921.8OO-E%steffen at sdaoden.eu>:
|Matthias Andree wrote in <9a51593a-a5ce-06f3-6070-af25b1a6db34 at an3e.de>:
||Am 18.07.2018 um 01:19 schrieb Matthias Andree:
||>
||> Before doing further development, please switch to
||> ^/branches/lmdb-support and/or update. I'm heading for bed now that
||> r7064 has been committed.
||
||...make that r7065.
So below what i have now, i cannot make it crash no more, while
still setting the minimum DB size, but after opening the DB
environment, not before. And then also a new alternative
implementation, usable by defining a_BFLM_FIXED_SIZE, which does
not include all the cache replay stuff. For this mode we _can_
set the minimum DB size before opening the DB, just as documented
in the LMDB doc iirc.
Both modes have three failing tests cases into which i will look
this evening: t.bogodir, t.encoding and t.bogoutil. But then, if
i implement a real db_created(), which is still somehow faked, as
we just do not and cannot know whether we have created the file or
not, the i get failing tests t.encoding, t.maint and t.regtest.
I think this is in parts a problem of bogofilter, if i looked
yesterday i saw it takes three runs until finally this
db_created() results in something to be stored, so how could this
be implemented for LMDB without usage of access(2) on the DB path?
If i do "(rdonly ? 0440 : 0660)" then i get failure on t.bogodir
in addition. The only remaining idea that i have would be to
open(2) ourselfs, i have not yet tried it. It may be as easy as
that, but i have not yet really tried, maybe we would need to
silently skip opening a LMDB handle and simply treat this mode
a an "empty DB"; talking about read-only and non-existent that is.
Ciao!
--- datastore_lmdb.c.ma 2018-07-18 16:44:24.000000000 +0200
+++ datastore_lmdb.c 2018-07-18 17:01:20.778076814 +0200
@@ -30,28 +30,39 @@
* In order to be able to deal with 2. we need to track all changes that are
* performed in a txn, so that in case we are running against the wall we are
* capable to replay all changes after having resized the map.
+ *
+ * Alternatively, define a_BFLM_FIXED_SIZE, in which case all the replay code
+ * is not compiled, but instead the given size is fixed, and any DB overflow
+ * results in program abortion. Since the DB should only consume disc space
+ * for those pages which are used, this should not hurt in practice.
*/
+/* Alternative implementation: fixed DB size */
+/*#define a_BFLM_FIXED_SIZE (1u << 31)*/
+
/* mdb_env_set_maxreaders() */
#define a_BFLM_MAXREADERS 15
-/* Minimum/initial database size, and DB size grow.
- * Space it so that a DB load does not run against walls too many times.
- * We try _TRIES times to resize for a single new entry before giving up */
-#define a_BFLM_MINSIZE (1u << 21)
-#define a_BFLM_GROW (1u << 24)
-#define a_BFLM_GROW_TRIES 3
-
-/* Size of one chunk of the intermediate txn cache, as above.
- * Space it so that a DB load does not require all too many.
- * Of course, if a token requires more space, we allocate a larger chunk */
-#define a_BFLM_TXN_CACHE_SIZE (1u << 20)
-
-/* An entry consists of an uint32_t describing the length of the key.
- * If the high bit is set an uint32_t describing the length of the value
- * follows. After the data buffers there possibly is alignment pad */
-#define a_BFLM_TXN_CACHE_ALIGN(X) \
+#ifndef a_BFLM_FIXED_SIZE
+ /* Minimum/initial database size, and DB size grow.
+ * Space it so that a DB load does not run against walls too many times.
+ * We try _TRIES times to resize for a single new entry before giving up */
+/*# define a_BFLM_MINSIZE 0x3FFFFFFFu*//*(01u<<30)*//*(1u << 21)*/
+# define a_BFLM_MINSIZE (1u<<21)
+# define a_BFLM_GROW (1u<<21)
+# define a_BFLM_GROW_TRIES 3
+
+ /* Size of one chunk of the intermediate txn cache, as above.
+ * Space it so that a DB load does not require all too many.
+ * Of course, if a token requires more space, we allocate a larger chunk */
+# define a_BFLM_TXN_CACHE_SIZE (1u << 20)
+
+ /* An entry consists of an uint32_t describing the length of the key.
+ * If the high bit is set an uint32_t describing the length of the value
+ * follows. After the data buffers there possibly is alignment pad */
+# define a_BFLM_TXN_CACHE_ALIGN(X) \
(((X) + (sizeof(uint32_t) - 1)) & ~(sizeof(uint32_t) - 1))
+#endif /* a_BFLM_FIXED_SIZE */
#include "common.h"
@@ -76,20 +87,24 @@ enum a_bflm_flags{
a_BFLM_NONE,
a_BFLM_DEBUG = 1u<<0,
a_BFLM_RDONLY = 1u<<1,
- a_BFLM_HAS_TXN = 1u<<2
+ a_BFLM_DB_CREATED = 1u<<2,
+ a_BFLM_HAS_TXN = 1u<<3
};
struct a_bflm{
char *bflm_filepath; /* bfpath.filepath (points to &self[1]) */
MDB_env *bflm_env;
- size_t bflm_mapsize; /* Current notion of DB map size */
MDB_txn *bflm_txn;
MDB_cursor *bflm_cursor;
MDB_dbi bflm_dbi;
uint32_t bflm_flags;
+#ifndef a_BFLM_FIXED_SIZE
+ size_t bflm_mapsize; /* Current notion of DB map size */
struct a_bflm_txn_cache *bflm_txn_cache; /* Stack thereof */
+#endif
};
+#ifndef a_BFLM_FIXED_SIZE
struct a_bflm_txn_cache{
struct a_bflm_txn_cache *bflmtc_last;
struct a_bflm_txn_cache *bflmtc_next; /* Needs to be build before use! */
@@ -98,9 +113,10 @@ struct a_bflm_txn_cache{
/* Actually points to &self[1] TODO [0] or [8], dep. __STDC_VERSION__! */
char *bflmtc_data;
};
+#endif
/**/
-static struct a_bflm *a_bflm_init(bfpath *bfp);
+static struct a_bflm *a_bflm_init(bfpath *bfp, bool rdonly);
static void a_bflm_free(struct a_bflm *bflmp);
/**/
@@ -108,6 +124,7 @@ static int a_bflm_txn_begin(void *vhandl
static int a_bflm_txn_abort(void *vhandle);
static int a_bflm_txn_commit(void *vhandle);
+#ifndef a_BFLM_FIXED_SIZE
/**/
static bool a_bflm_txn_mapfull(struct a_bflm *bflmp, bool close_cursor);
@@ -122,6 +139,7 @@ static char const *a_bflm_txn_cache_repl
/* Free the recovery stack and possible heap data */
static void a_bflm_txn_cache_free(struct a_bflm *bflmp);
+#endif /* a_BFLM_FIXED_SIZE */
static dsm_t /* TODO const*/ a_bflm_dsm = {
/* public -- used in datastore.c */
@@ -152,9 +170,11 @@ static dsm_t /* TODO const*/ a_bflm_dsm
};
static struct a_bflm *
-a_bflm_init(bfpath *bfp){
+a_bflm_init(bfpath *bfp, bool rdonly){
/* No variable array for .bflm_filepath, use same method as in word.h */
+#ifndef a_BFLM_FIXED_SIZE
MDB_envinfo envinfo;
+#endif
int e;
char const *emsg;
struct a_bflm *rv;
@@ -165,8 +185,10 @@ a_bflm_init(bfpath *bfp){
memset(rv, 0, sizeof *rv);
memcpy(rv->bflm_filepath = (char*)&rv[1], bfp->filepath, i);
- rv->bflm_flags = ((DEBUG_DATABASE(1) || getenv("BF_DEBUG_DB") != NULL)
- ? a_BFLM_DEBUG : a_BFLM_NONE);
+ rv->bflm_flags = (((DEBUG_DATABASE(1) || getenv("BF_DEBUG_DB") != NULL)
+ ? a_BFLM_DEBUG : a_BFLM_NONE) |
+ (rdonly ? a_BFLM_RDONLY : a_BFLM_NONE));
+
e = mdb_env_create(&rv->bflm_env);
if(e != MDB_SUCCESS){
emsg = "mdb_env_open()";
@@ -174,13 +196,11 @@ a_bflm_init(bfpath *bfp){
}
mdb_env_set_maxreaders(rv->bflm_env, a_BFLM_MAXREADERS);
- /* The "problem" is that we need to set_mapsize() before env_open(),
- * otherwise the LMDB default will be used as a default (in 0.9.22).
- * But since this is cheap at this point just do it.. */
- /* TODO We may not do this because with v0.9.22 a further DB open
- * TODO may crash in mdb_*_put() after a growing _mapsize! */
-#if 0
- e = mdb_env_set_mapsize(rv->bflm_env, a_BFLM_MINSIZE);
+ /* TODO We may not do this before opening the _env, because with v0.9.22
+ * TODO a further DB open will then crash in mdb_*_put() after a growing
+ * TODO _mapsize call! ... */
+#ifdef a_BFLM_FIXED_SIZE
+ e = mdb_env_set_mapsize(rv->bflm_env, a_BFLM_FIXED_SIZE);
if(e != MDB_SUCCESS){
emsg = "mdb_env_set_mapsize()";
goto jerr2;
@@ -193,15 +213,29 @@ a_bflm_init(bfpath *bfp){
goto jerr2;
}
- /* ..then query the actual environment and use the reported map size:
- * Note: LMDB documents to reject requests to shrink the real map size! */
+ /* TODO ... But if we do it thereafter, anything is fine! */
+#ifndef a_BFLM_FIXED_SIZE
+ e = mdb_env_set_mapsize(rv->bflm_env, a_BFLM_MINSIZE);
+ if(e != MDB_SUCCESS){
+ emsg = "mdb_env_set_mapsize()";
+ goto jerr2;
+ }
/* no error defined */mdb_env_info(rv->bflm_env, &envinfo);
rv->bflm_mapsize = envinfo.me_mapsize;
+#endif
+
+ /* Let us fake a "has been created" event */
+ if(!(rv->bflm_flags & a_BFLM_RDONLY)){
+ MDB_stat s;
+
+ /* no error defined */mdb_env_stat(rv->bflm_env, &s);
+ if(s.ms_entries == 0)
+ rv->bflm_flags |= a_BFLM_DB_CREATED;
+ }
if(rv->bflm_flags & a_BFLM_DEBUG)
- fprintf(dbgout, "LMDB[%ld]: init: %p/%s, mapsize: %lu\n",
- (long)getpid(), rv, rv->bflm_filepath,
- (unsigned long)rv->bflm_mapsize);
+ fprintf(dbgout, "LMDB[%ld]: init: %p [%s]\n",
+ (long)getpid(), rv, rv->bflm_filepath);
jleave:
return rv;
@@ -219,11 +253,13 @@ jerr1:
static void
a_bflm_free(struct a_bflm *bflmp){
if(bflmp != NULL){
+#ifndef a_BFLM_FIXED_SIZE
if(bflmp->bflm_txn_cache != NULL){
if(DEBUG_DATABASE(1))
fprintf(dbgout, "LMDB _free(): error: there is txn_cache!\n");
a_bflm_txn_cache_free(bflmp);
}
+#endif
mdb_env_close(bflmp->bflm_env);
@@ -312,7 +348,9 @@ a_bflm_txn_abort(void *vhandle){
mdb_cursor_close(bflmp->bflm_cursor);
mdb_txn_abort(bflmp->bflm_txn);
+#ifndef a_BFLM_FIXED_SIZE
a_bflm_txn_cache_free(bflmp);
+#endif
bflmp->bflm_flags &= ~a_BFLM_HAS_TXN;
jleave:
@@ -322,7 +360,10 @@ jleave:
static int
a_bflm_txn_commit(void *vhandle){
struct a_bflm *bflmp;
- int e, retries;
+#ifndef a_BFLM_FIXED_SIZE
+ int retries;
+#endif
+ int e;
e = DST_OK;
@@ -341,20 +382,26 @@ a_bflm_txn_commit(void *vhandle){
mdb_cursor_close(bflmp->bflm_cursor);
+#ifndef a_BFLM_FIXED_SIZE
retries = 0;
jredo:
+#endif
e = mdb_txn_commit(bflmp->bflm_txn);
if(e != MDB_SUCCESS){
+#ifndef a_BFLM_FIXED_SIZE
if(e == MDB_MAP_FULL && ++retries <= a_BFLM_GROW_TRIES &&
a_bflm_txn_mapfull(bflmp, false)){
mdb_cursor_close(bflmp->bflm_cursor);
goto jredo;
}
+#endif
mdb_txn_abort(bflmp->bflm_txn);
e = MDB_PANIC;
}
+#ifndef a_BFLM_FIXED_SIZE
a_bflm_txn_cache_free(bflmp);
+#endif
bflmp->bflm_flags &= ~a_BFLM_HAS_TXN;
if(e == MDB_SUCCESS)
@@ -368,6 +415,7 @@ jleave:
return e;
}
+#ifndef a_BFLM_FIXED_SIZE
static bool
a_bflm_txn_mapfull(struct a_bflm *bflmp, bool close_cursor){
MDB_envinfo envinfo;
@@ -581,6 +629,7 @@ a_bflm_txn_cache_free(struct a_bflm *bfl
xfree(bflmtcp);
}
}
+#endif /* a_BFLM_FIXED_SIZE */
dsm_t /* const TODO */ *dsm = &a_bflm_dsm;
@@ -589,12 +638,9 @@ db_open(void *env, bfpath *bfp, dbmode_t
struct a_bflm *bflmp;
UNUSED(env);
- if((bflmp = a_bflm_init(bfp)) == NULL)
+ if((bflmp = a_bflm_init(bfp, (open_mode == DS_READ))) == NULL)
goto jleave;
- if(open_mode == DS_READ)
- bflmp->bflm_flags |= a_BFLM_RDONLY;
-
if(bflmp->bflm_flags & a_BFLM_DEBUG)
fprintf(dbgout, "LMDB[%ld]: db_open(%p [%s; rdonly=%d])\n",
(long)getpid(), bflmp, bflmp->bflm_filepath,
@@ -629,7 +675,12 @@ db_is_swapped(void *vhandle){
bool
db_created(void *vhandle){
- return (vhandle != NULL);
+ bool created;
+ struct a_bflm *bflmp;
+
+ created = ((bflmp = vhandle) != NULL &&
+ (bflmp->bflm_flags & a_BFLM_DB_CREATED) != 0);
+ return created;
}
int
@@ -649,10 +700,6 @@ db_get_dbvalue(void *vhandle, const dbv_
goto jerr;
}
- if(DEBUG_DATABASE(3))
- fprintf(dbgout, "LMDB db_get_dbvalue(): %lu <%.*s>\n",
- (unsigned long)token->leng, (int)token->leng, token->data);
-
key.mv_data = token->data;
key.mv_size = token->leng;
e = mdb_cursor_get(bflmp->bflm_cursor, &key, &val, MDB_SET);
@@ -670,6 +717,10 @@ db_get_dbvalue(void *vhandle, const dbv_
e = 0;
jleave:
+ if(DEBUG_DATABASE(3))
+ fprintf(dbgout, "LMDB db_get_dbvalue(): %lu <%.*s> -> %d\n",
+ (unsigned long)token->leng, (int)token->leng, token->data,
+ (e == 0));
return e;
jerr:
if(e != MDB_NOTFOUND){
@@ -687,7 +738,10 @@ db_set_dbvalue(void *vhandle, const dbv_
MDB_val key, val;
char const *emsg;
struct a_bflm *bflmp;
- int e, retries;
+#ifndef a_BFLM_FIXED_SIZE
+ int retries;
+#endif
+ int e;
e = 0;
@@ -699,30 +753,36 @@ db_set_dbvalue(void *vhandle, const dbv_
goto jerr;
}
- if(DEBUG_DATABASE(3))
- fprintf(dbgout, "LMDB db_set_dbvalue(): %lu <%.*s>\n",
- (unsigned long)token->leng, (int)token->leng, token->data);
-
+#ifndef a_BFLM_FIXED_SIZE
retries = 0;
jredo:
+#endif
key.mv_data = token->data;
key.mv_size = token->leng;
val.mv_data = value->data;
val.mv_size = value->leng;
e = mdb_cursor_put(bflmp->bflm_cursor, &key, &val, 0);
if(e != MDB_SUCCESS){
+#ifndef a_BFLM_FIXED_SIZE
if(e == MDB_MAP_FULL && ++retries <= a_BFLM_GROW_TRIES &&
a_bflm_txn_mapfull(bflmp, true))
goto jredo;
+#endif
emsg = "mdb_cursor_put()";
goto jerr;
}
+#ifndef a_BFLM_FIXED_SIZE
if((emsg = a_bflm_txn_cache_put(bflmp, &key, &val)) != NULL)
goto jerr;
+#endif
e = 0;
jleave:
+ if(DEBUG_DATABASE(3))
+ fprintf(dbgout, "LMDB db_set_dbvalue(): %lu <%.*s> -> %d\n",
+ (unsigned long)token->leng, (int)token->leng, token->data,
+ (e == 0));
return e;
jerr:
print_error(__FILE__, __LINE__, "LMDB[%ld]: db_set_dbvalue(), %s: %d, %s",
@@ -735,7 +795,10 @@ db_delete(void *vhandle, const dbv_t *to
MDB_val key;
char const *emsg;
struct a_bflm *bflmp;
- int e, retries;
+#ifndef a_BFLM_FIXED_SIZE
+ int retries;
+#endif
+ int e;
e = 0;
@@ -748,12 +811,10 @@ db_delete(void *vhandle, const dbv_t *to
goto jerr;
}
- if(DEBUG_DATABASE(3))
- fprintf(dbgout, "LMDB db_delete(): %lu <%.*s>\n",
- (unsigned long)token->leng, (int)token->leng, token->data);
-
+#ifndef a_BFLM_FIXED_SIZE
retries = 0;
jredo:
+#endif
key.mv_data = token->data;
key.mv_size = token->leng;
e = mdb_cursor_get(bflmp->bflm_cursor, &key, NULL, MDB_SET_KEY);
@@ -764,19 +825,27 @@ jredo:
e = mdb_cursor_del(bflmp->bflm_cursor, 0);
if(e != MDB_SUCCESS){
+#ifndef a_BFLM_FIXED_SIZE
/* Should not happen, though */
if(e == MDB_MAP_FULL && ++retries <= a_BFLM_GROW_TRIES &&
a_bflm_txn_mapfull(bflmp, true))
goto jredo;
+#endif
emsg = "mdb_cursor_del()";
goto jerr;
}
+#ifndef a_BFLM_FIXED_SIZE
if((emsg = a_bflm_txn_cache_put(bflmp, &key, NULL)) != NULL)
goto jerr;
+#endif
e = 0;
jleave:
+ if(DEBUG_DATABASE(3))
+ fprintf(dbgout, "LMDB db_delete(): %lu <%.*s> -> %d\n",
+ (unsigned long)token->leng, (int)token->leng, token->data,
+ (e == 0));
return e;
jerr:
print_error(__FILE__, __LINE__, "LMDB[%ld]: db_delete(), %s: %d, %s",
--steffen
|
|Der Kragenbaer, The moon bear,
|der holt sich munter he cheerfully and one by one
|einen nach dem anderen runter wa.ks himself off
|(By Robert Gernhardt)
More information about the bogofilter-dev
mailing list