DB backend support for lmdb?
Steffen Nurpmeso
steffen at sdaoden.eu
Tue Jul 17 01:35:07 CEST 2018
Ahoi! And HA!!
Steffen Nurpmeso wrote in <20180716221310.qDo5g%steffen at sdaoden.eu>:
|Steffen Nurpmeso wrote in <20180715001400.pHS5O%steffen at sdaoden.eu>:
||Matthias Andree wrote in <d38d0bd4-eb6e-6b0e-6252-6bf5223818f5 at an3e.de>:
|||Am 21.06.2018 um 16:14 schrieb Steffen Nurpmeso:
|||> Steffen Nurpmeso wrote in <20180529185111.0IjYp%steffen at sdaoden.eu>:
|||>|Matthias Andree <matthias at an3e.de> wrote:
|||>||Am 28.05.2018 um 23:57 schrieb Steffen Nurpmeso:
|||> ...
||Sorry for the long delay, this Thursday finally i have found time,
| ...
||causes some tests to fail. It is too late now, i have to go, and
||revisit that on Monday.
...
|So i post what i have, it is what i thought would be final.
|I thought only, because we have five failing tests still.
This has not changed. I will try on a GlibC and on FreeBSD
tomorrow.
|This, however, is far less than what i get if i symlink the
|executables in src/ to native AlpineLinux bogofilter (sqlite) or
|my own compiled BerkeleyDB version. Maybe that is the false way
|to do things, can i somehow regulary specify where the executables
|come from? That is, could there also be Musl libC or busybox
|tools related problems?
|
|But, beside this i can reproduce a problem with a large database
...
|mdb_cursor_put(). I have yet not understood why this is the case.
This seems to be a bug in LMDB. If i do not call set_mapsize()
when creating the database from scratch, then the crash does not
happen! I thought it was only an integer assignment, but it seems
there is some magic algorithm going on in LMDB. Anyway: works!!!
However... the performance is pretty bad:
/usr/local/bin/s-nail exit status 0, resource usage info:
User time : 9:220997 (sec:usec)
System time : 46:329358 (sec:usec)
Max. resident size / Shared mem. : 35744 / 0
Integral unshared data / stack : 0 / 0
Minor / Major page faults / Swaps : 323047 / 0 / 0
Block input / output operations : 0 / 752784
Messages sent / received : 0 / 0
Signals received : 0
Voluntary / invol. context switches: 21273 / 3657
On real hardware or a more modern system it may be better.
Pretty tough stuff, i wonder why it is so slow. Anyway, please
find below the little diff on top of the former one.
Thank you!!
Ciao and good night,
diff --git a/bogofilter/src/datastore_lmdb.c b/bogofilter/src/datastore_lmdb.c
index 3f1b80c..3ab53d6 100644
--- a/bogofilter/src/datastore_lmdb.c
+++ b/bogofilter/src/datastore_lmdb.c
@@ -33,7 +33,7 @@
*/
/* mdb_env_set_maxreaders() */
-#define a_BFLM_MAXREADERS 63
+#define a_BFLM_MAXREADERS 15
/* Minimum/initial database size, and DB size grow.
* Space it so that a DB load does not run against walls too many times.
@@ -93,7 +93,7 @@ struct a_bflm{
struct a_bflm_txn_cache{
struct a_bflm_txn_cache *bflmtc_last;
struct a_bflm_txn_cache *bflmtc_next; /* Needs to be build before use! */
- char *bflmtc_caster; /* Current caster; NULL: full */
+ char *bflmtc_caster; /* Current caster */
char *bflmtc_max; /* Maximum usable byte, exclusive */
/* Actually points to &self[1] TODO [0] or [8], dep. __STDC_VERSION__! */
char *bflmtc_data;
@@ -120,7 +120,7 @@ static char const *a_bflm_txn_cache_put(struct a_bflm *bflmp, MDB_val *key,
* Return NULL on success or an error message otherwise */
static char const *a_bflm_txn_cache_replay(struct a_bflm *bflmp);
-/* Free the recovery stach and possible heap data */
+/* Free the recovery stack and possible heap data */
static void a_bflm_txn_cache_free(struct a_bflm *bflmp);
static dsm_t /* TODO const*/ a_bflm_dsm = {
@@ -177,11 +177,15 @@ a_bflm_init(bfpath *bfp){
/* The "problem" is that we need to set_mapsize() before env_open(),
* otherwise the LMDB default will be used as a default (in 0.9.22).
* But since this is cheap at this point just do it.. */
+ /* TODO We may not do this because with v0.9.22 a further DB open
+ * TODO may crash in mdb_*_put() after a growing _mapsize! */
+#if 0
e = mdb_env_set_mapsize(rv->bflm_env, a_BFLM_MINSIZE);
if(e != MDB_SUCCESS){
emsg = "mdb_env_set_mapsize()";
goto jerr2;
}
+#endif
e = mdb_env_open(rv->bflm_env, rv->bflm_filepath, MDB_NOSUBDIR, 0660);
if(e != MDB_SUCCESS){
@@ -195,8 +199,9 @@ a_bflm_init(bfpath *bfp){
rv->bflm_mapsize = envinfo.me_mapsize;
if(rv->bflm_flags & a_BFLM_DEBUG)
- fprintf(dbgout, "LMDB[%ld]: init: %p/%s\n",
- (long)getpid(), rv, rv->bflm_filepath);
+ fprintf(dbgout, "LMDB[%ld]: init: %p/%s, mapsize: %lu\n",
+ (long)getpid(), rv, rv->bflm_filepath,
+ (unsigned long)rv->bflm_mapsize);
jleave:
return rv;
@@ -339,10 +344,14 @@ a_bflm_txn_commit(void *vhandle){
retries = 0;
jredo:
e = mdb_txn_commit(bflmp->bflm_txn);
- if(e == MDB_MAP_FULL && ++retries <= a_BFLM_GROW_TRIES &&
- a_bflm_txn_mapfull(bflmp, false)){
- mdb_cursor_close(bflmp->bflm_cursor);
- goto jredo;
+ if(e != MDB_SUCCESS){
+ if(e == MDB_MAP_FULL && ++retries <= a_BFLM_GROW_TRIES &&
+ a_bflm_txn_mapfull(bflmp, false)){
+ mdb_cursor_close(bflmp->bflm_cursor);
+ goto jredo;
+ }
+ mdb_txn_abort(bflmp->bflm_txn);
+ e = MDB_PANIC;
}
a_bflm_txn_cache_free(bflmp);
@@ -361,6 +370,7 @@ jleave:
static bool
a_bflm_txn_mapfull(struct a_bflm *bflmp, bool close_cursor){
+ MDB_envinfo envinfo;
char const *emsg;
int e;
size_t i;
@@ -381,7 +391,8 @@ a_bflm_txn_mapfull(struct a_bflm *bflmp, bool close_cursor){
emsg = "mdb_env_set_mapsize()";
goto jerr1;
}
- bflmp->bflm_mapsize = i;
+ /* no error defined */mdb_env_info(bflmp->bflm_env, &envinfo);
+ bflmp->bflm_mapsize = envinfo.me_mapsize;
/* Recreate transaction */
e = mdb_txn_begin(bflmp->bflm_env, NULL, 0, &bflmp->bflm_txn);
--steffen
|
|Der Kragenbaer, The moon bear,
|der holt sich munter he cheerfully and one by one
|einen nach dem anderen runter wa.ks himself off
|(By Robert Gernhardt)
More information about the bogofilter-dev
mailing list