[PATCH] tuning.sh
David Relson
relson at osagesoftware.com
Sat Jun 14 21:45:13 CEST 2003
Greetings,
The patch below incorporates fixes for the problems found by pi and
Greg. It also reformats the Top 10 Results as they have suggested and no
longer tests robs=1 or min_dev=0.475 as these values are just too darn big!
David
--- 01362/tuning/tuning.sh 2003-06-05 13:47:35.000000000 -0400
+++ cvs/tuning/tuning.sh 2003-06-14 15:40:40.000000000 -0400
@@ -11,13 +11,14 @@
# range of values for testing
-svals="1 3.2e-1 1e-1 3.2e-2 1e-2"
-mvals=`seq 0.025 0.025 0.47501` # wide value range
+svals="3.2e-1 1e-1 3.2e-2 1e-2"
+mvals=`seq 0.025 0.025 0.45001` # wide value range
#mvals=`seq 0.420 0.020 0.46001` # high value range
# approx 0.1%-0.3% of nonspam corpus size for selecting spam_cutoff value
-target=24
+target=12
+export target
# file names
@@ -70,7 +71,7 @@
function getco () {
opts="-m$1 -o$2"
- shift ; shift ; shift
+ shift ; shift
res=`cat $* | bogofilter -t -c $CFG $opts -v 2>&1 | \
perl -e ' $target = $ENV{"target"}; while (<>) { ' \
-e ' ($i, $d) = split; push @diffs, $d unless $i != 1; }' \
@@ -83,7 +84,7 @@
function wrapper () {
v="-v"
opts="-m$1 -o$2"
- shift ; shift ; shift
+ shift ; shift
res=`cat $1 | bogofilter -t -c $CFG $opts -v | grep -c $v '^1'`
}
@@ -93,16 +94,16 @@
date=`date "+%m/%d %H:%M:%S"`
echo -n $date " "
printf "%-7s %5.3f fpos..." $rs $md
- getco $md 0.10 $rs r0.ns.mc r1.ns.mc r2.ns.mc
+ getco $md,$rs 0.10 r0.ns.mc r1.ns.mc r2.ns.mc
fpos=${res##* }; co=${res%% *}; let fpos=$fpos/3
printf "%d at cutoff %8.6f, run0..." $fpos $co
- run=0; wrapper $md $co $rs r0.sp.mc; fneg1=$res
+ run=0; wrapper $md,$rs $co r0.sp.mc; fneg1=$res
echo "$rs $md $co $run $fpos $fneg1" >> $PARM_TBL
printf "%3d run1..." $fneg1
- run=1; wrapper $md $co $rs r1.sp.mc; fneg2=$res
+ run=1; wrapper $md,$rs $co r1.sp.mc; fneg2=$res
echo "$rs $md $co $run $fpos $fneg2" >> $PARM_TBL
printf "%3d run2..." $fneg2
- run=2; wrapper $md $co $rs r2.sp.mc; fneg3=$res
+ run=2; wrapper $md,$rs $co r2.sp.mc; fneg3=$res
echo "$rs $md $co $run $fpos $fneg3" >> $PARM_TBL
printf "%3d" $fneg3
let fneg="$fneg1+$fneg2+$fneg3"
@@ -124,7 +125,10 @@
# get 10 best results (lowest false negative count)
(echo "" ; \
-echo "Top 10 results" ; \
-grep fpos < $RESULTS | sed "s@\.\.\. at .. @g" | sort -g --key=15 | head -10
) | tee -a $RESULTS
+echo "Top 10 results:" ; \
+echo " robs min_dev spam_cutoff run0 run1 run2 total" ; \
+grep fpos < $RESULTS | grep -v cutoff.0.415000 | sed "s@\.\.\. at .. @g" | \
+awk '{printf "%6.4f %7s %12s %4d %4d %4d %5d\n", $3, $4, $9, $11, $13,
$15, $16 }' | \
+tr -d "," | sort -g --key=7 | head -10 ) | tee -a $RESULTS
date "+%m/%d %H:%M:%S"
More information about the Bogofilter
mailing list