Another chi-squared routine

Clint Adams schizo at debian.org
Sun Jan 19 17:36:30 CET 2003


> I decided to try gsl anyhow.  I'm not offering a patch, but it's
> trivially easy to add to any version of bogofilter that does Fisher:

Here's the patch.  Is using gsl if present and the included dcdf
otherwise too inconsistent?

Index: Makefile.am
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/Makefile.am,v
retrieving revision 1.106
diff -u -r1.106 Makefile.am
--- Makefile.am	19 Jan 2003 15:01:47 -0000	1.106
+++ Makefile.am	19 Jan 2003 16:31:34 -0000
@@ -60,8 +60,7 @@
 	wordhash.h wordhash.c wordlists.h wordlists.c \
 	xmalloc.h xcalloc.c xmalloc.c xmem_error.c xrealloc.c xstrdup.h xstrdup.c \
 	xstrlcat.h xstrlcat.c \
-	xstrlcpy.h xstrlcpy.c \
-	dcdflib/src/dcdflib.c dcdflib/src/ipmpar.c
+	xstrlcpy.h xstrlcpy.c
 
 CLEANFILES=version.c directories.c
 
Index: configure.ac
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/configure.ac,v
retrieving revision 1.2
diff -u -r1.2 configure.ac
--- configure.ac	19 Jan 2003 15:01:46 -0000	1.2
+++ configure.ac	19 Jan 2003 16:31:35 -0000
@@ -145,6 +145,7 @@
 AC_ARG_ENABLE(robinson-fisher,
 	AC_HELP_STRING([--disable-robinson-fisher],
 	[Disable Fisher's method of combining P]),
+	[
 	if test "$enableval" = no; then
 		[USE_FISHER=NO]
 	else
@@ -155,7 +156,14 @@
 # trouble ???
 #		AC_CHECK_LIB([dcdf], [cdfchi], LIBS="$LIBS -ldcdf",
 #		    AC_MSG_ERROR([libdcdf (from package dcdflib.c) was not found.]))
+		AC_CHECK_LIB(gslcblas, main)
+		AC_CHECK_LIB(gsl, main)
 	fi
+	],
+	[
+		AC_CHECK_LIB(gslcblas, main)
+		AC_CHECK_LIB(gsl, main)
+	]
 )
 
 if test x$USE_GRAHAM != xNO ; then
Index: fisher.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/fisher.c,v
retrieving revision 1.19
diff -u -r1.19 fisher.c
--- fisher.c	13 Jan 2003 15:40:17 -0000	1.19
+++ fisher.c	19 Jan 2003 16:31:35 -0000
@@ -13,7 +13,9 @@
 #include <config.h>
 #include "common.h"
 
-#include <dcdflib.h>
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_integration.h>
+#include <gsl/gsl_errno.h>
 
 #include "fisher.h"
 
@@ -68,16 +70,21 @@
 
 /* Function Definitions */
 
-double prbf(double x, double df)
-{
-    int which=1;
-    double p, q;
-    int status;
-    double bound;
-
-    cdfchi(&which, &p, &q, &x, &df, &status, &bound);
-
-    return(status==0 ? q : 1.0);
+double chisq(double x, void *p) {
+    return(gsl_ran_chisq_pdf(x, *(double *)p));
+}
+
+double prbf(double x, double df) {
+    gsl_function chi; int status;
+    double p, abserr; size_t neval;
+    chi.function = chisq;
+    chi.params = &df;
+    gsl_set_error_handler_off();
+    status = gsl_integration_qng(&chi, 0.00001, x, 0.0001, 0.01, &p,
+        &abserr, &neval);
+    /* if we didn't converge we might be outside [0,1] */
+    p = max(0.0, 1.0 - p);
+    return(min(1.0, p));
 }
 
 double fis_get_spamicity(size_t robn, FLOAT P, FLOAT Q )




More information about the bogofilter-dev mailing list