Another chi-squared routine
Clint Adams
schizo at debian.org
Sun Jan 19 17:36:30 CET 2003
> I decided to try gsl anyhow. I'm not offering a patch, but it's
> trivially easy to add to any version of bogofilter that does Fisher:
Here's the patch. Is using gsl if present and the included dcdf
otherwise too inconsistent?
Index: Makefile.am
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/Makefile.am,v
retrieving revision 1.106
diff -u -r1.106 Makefile.am
--- Makefile.am 19 Jan 2003 15:01:47 -0000 1.106
+++ Makefile.am 19 Jan 2003 16:31:34 -0000
@@ -60,8 +60,7 @@
wordhash.h wordhash.c wordlists.h wordlists.c \
xmalloc.h xcalloc.c xmalloc.c xmem_error.c xrealloc.c xstrdup.h xstrdup.c \
xstrlcat.h xstrlcat.c \
- xstrlcpy.h xstrlcpy.c \
- dcdflib/src/dcdflib.c dcdflib/src/ipmpar.c
+ xstrlcpy.h xstrlcpy.c
CLEANFILES=version.c directories.c
Index: configure.ac
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/configure.ac,v
retrieving revision 1.2
diff -u -r1.2 configure.ac
--- configure.ac 19 Jan 2003 15:01:46 -0000 1.2
+++ configure.ac 19 Jan 2003 16:31:35 -0000
@@ -145,6 +145,7 @@
AC_ARG_ENABLE(robinson-fisher,
AC_HELP_STRING([--disable-robinson-fisher],
[Disable Fisher's method of combining P]),
+ [
if test "$enableval" = no; then
[USE_FISHER=NO]
else
@@ -155,7 +156,14 @@
# trouble ???
# AC_CHECK_LIB([dcdf], [cdfchi], LIBS="$LIBS -ldcdf",
# AC_MSG_ERROR([libdcdf (from package dcdflib.c) was not found.]))
+ AC_CHECK_LIB(gslcblas, main)
+ AC_CHECK_LIB(gsl, main)
fi
+ ],
+ [
+ AC_CHECK_LIB(gslcblas, main)
+ AC_CHECK_LIB(gsl, main)
+ ]
)
if test x$USE_GRAHAM != xNO ; then
Index: fisher.c
===================================================================
RCS file: /cvsroot/bogofilter/bogofilter/fisher.c,v
retrieving revision 1.19
diff -u -r1.19 fisher.c
--- fisher.c 13 Jan 2003 15:40:17 -0000 1.19
+++ fisher.c 19 Jan 2003 16:31:35 -0000
@@ -13,7 +13,9 @@
#include <config.h>
#include "common.h"
-#include <dcdflib.h>
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_integration.h>
+#include <gsl/gsl_errno.h>
#include "fisher.h"
@@ -68,16 +70,21 @@
/* Function Definitions */
-double prbf(double x, double df)
-{
- int which=1;
- double p, q;
- int status;
- double bound;
-
- cdfchi(&which, &p, &q, &x, &df, &status, &bound);
-
- return(status==0 ? q : 1.0);
+double chisq(double x, void *p) {
+ return(gsl_ran_chisq_pdf(x, *(double *)p));
+}
+
+double prbf(double x, double df) {
+ gsl_function chi; int status;
+ double p, abserr; size_t neval;
+ chi.function = chisq;
+ chi.params = &df;
+ gsl_set_error_handler_off();
+ status = gsl_integration_qng(&chi, 0.00001, x, 0.0001, 0.01, &p,
+ &abserr, &neval);
+ /* if we didn't converge we might be outside [0,1] */
+ p = max(0.0, 1.0 - p);
+ return(min(1.0, p));
}
double fis_get_spamicity(size_t robn, FLOAT P, FLOAT Q )
More information about the bogofilter-dev
mailing list