[PATCH] tuning.sh

David Relson relson at osagesoftware.com
Sat Jun 14 21:45:13 CEST 2003


Greetings,

The patch below incorporates fixes for the problems found by pi and 
Greg.  It also reformats the Top 10 Results as they have suggested and no 
longer tests robs=1 or min_dev=0.475 as these values are just too darn big!

David

--- 01362/tuning/tuning.sh	2003-06-05 13:47:35.000000000 -0400
+++ cvs/tuning/tuning.sh	2003-06-14 15:40:40.000000000 -0400
@@ -11,13 +11,14 @@

  # range of values for testing

-svals="1 3.2e-1 1e-1 3.2e-2 1e-2"
-mvals=`seq 0.025 0.025 0.47501`		# wide value range
+svals="3.2e-1 1e-1 3.2e-2 1e-2"
+mvals=`seq 0.025 0.025 0.45001`		# wide value range
  #mvals=`seq 0.420 0.020 0.46001` 	# high value range

  # approx 0.1%-0.3% of nonspam corpus size for selecting spam_cutoff value

-target=24
+target=12
+export target

  # file names

@@ -70,7 +71,7 @@

  function getco () {
      opts="-m$1 -o$2"
-    shift ; shift ; shift
+    shift ; shift
      res=`cat $* | bogofilter -t -c $CFG $opts -v 2>&1 | \
      perl -e ' $target = $ENV{"target"}; while (<>) { ' \
  	 -e ' ($i, $d) = split; push @diffs, $d unless $i != 1; }' \
@@ -83,7 +84,7 @@
  function wrapper () {
      v="-v"
      opts="-m$1 -o$2"
-    shift ; shift ; shift
+    shift ; shift
      res=`cat $1 | bogofilter -t -c $CFG $opts -v | grep -c $v '^1'`
  }

@@ -93,16 +94,16 @@
      date=`date "+%m/%d %H:%M:%S"`
      echo -n $date "  "
      printf "%-7s %5.3f fpos..." $rs $md
-    getco $md 0.10 $rs r0.ns.mc r1.ns.mc r2.ns.mc
+    getco $md,$rs 0.10 r0.ns.mc r1.ns.mc r2.ns.mc
      fpos=${res##* }; co=${res%% *}; let fpos=$fpos/3
      printf "%d at cutoff %8.6f, run0..." $fpos $co
-    run=0; wrapper $md $co $rs r0.sp.mc; fneg1=$res
+    run=0; wrapper $md,$rs $co r0.sp.mc; fneg1=$res
      echo "$rs $md $co $run $fpos $fneg1" >> $PARM_TBL
      printf "%3d  run1..." $fneg1
-    run=1; wrapper $md $co $rs r1.sp.mc; fneg2=$res
+    run=1; wrapper $md,$rs $co r1.sp.mc; fneg2=$res
      echo "$rs $md $co $run $fpos $fneg2" >> $PARM_TBL
      printf "%3d  run2..." $fneg2
-    run=2; wrapper $md $co $rs r2.sp.mc; fneg3=$res
+    run=2; wrapper $md,$rs $co r2.sp.mc; fneg3=$res
      echo "$rs $md $co $run $fpos $fneg3" >> $PARM_TBL
      printf "%3d"  $fneg3
      let fneg="$fneg1+$fneg2+$fneg3"
@@ -124,7 +125,10 @@

  # get 10 best results (lowest false negative count)
  (echo "" ; \
-echo "Top 10 results" ; \
-grep fpos < $RESULTS | sed "s@\.\.\. at .. @g" | sort -g --key=15 | head -10 
) | tee -a $RESULTS
+echo "Top 10 results:" ; \
+echo " robs   min_dev spam_cutoff  run0 run1 run2 total" ; \
+grep fpos < $RESULTS | grep -v cutoff.0.415000 | sed "s@\.\.\. at .. @g" | \
+awk '{printf "%6.4f %7s %12s  %4d %4d %4d %5d\n", $3, $4, $9, $11, $13, 
$15, $16 }' | \
+tr -d "," | sort -g --key=7 | head -10 ) | tee -a $RESULTS

  date "+%m/%d %H:%M:%S"





More information about the Bogofilter mailing list