patch for bogotune

m at mo.optusnet.com.au m at mo.optusnet.com.au
Sun Jul 20 06:12:43 CEST 2003



My corpus is held in MH style folders, but owinging to the number of
messages, the directories are 'hashed' to keep the size on individual
directories manageable.

bogotune hates this. :)

Attached is a patch that expands the MH support to handle nested
folders. Should be fully backward compatable.


Michael.


--- /usr/share/bogofilter/tuning/bogotune	Fri Jun 27 05:31:34 2003
+++ ./bogotune	Sun Jul 20 13:48:09 2003
@@ -138,11 +138,11 @@
 if(-d $spfiles[0]) {
     $msgformat = "MH"; $scount = 0;
     foreach my $dir (@spfiles) {
-	$scount += `ls $dir/[0-9]* 2>/dev/null | wc -l`;
+	$scount += `find $dir -type f -name '[0-9]*' 2>/dev/null | wc -l`;
     }
     $ncount = 0;
     foreach my $dir (@nsfiles) {
-	$ncount += `ls $dir/[0-9]* 2>/dev/null | wc -l`;
+	$ncount += `find $dir -type f -name '[0-9]*' 2>/dev/null | wc -l`;
     }
 } else {
     my $cmd = join(" ", "cat", @spfiles, "| grep -c '^From ' |");
@@ -178,28 +178,24 @@
     } else {
 	unlink($spwork);
 	foreach my $dir (@spfiles) {
-	    opendir(DH, $dir) or yuk(7, "Problem processing spam files");
-	    my @msgs = readdir(DH); closedir(DH);
-	    foreach my $msg(@msgs) {
-		if($msg =~ /^[0-9]/) {
-		    my $cmd = join(" ", "cat $dir/$msg | bogol", $bogodir,
+	    open FL, "find $dir -type f -name '[0-9]*'|" or yuk(7, "Problem processing spam files");
+	    foreach my $msg (<FL>) {
+		chomp($msg);
+		my $cmd = join(" ", "cat $msg | bogol", $bogodir,
 			$cf, ">>$spwork");
-		    system($cmd) == 0 or
+		system($cmd) == 0 or
 			yuk(7, "Problem writing spam msg-count file");
-		}
 	    }
 	}
 	unlink($nswork);
 	foreach my $dir (@nsfiles) {
-	    opendir(DH, $dir) or yuk(7, "Problem processing nonspam files");
-	    my @msgs = readdir(DH); closedir(DH);
-	    foreach my $msg (@msgs) {
-		if($msg =~ /^[0-9]/) {
-		    my $cmd = join(" ", "cat $dir/$msg | bogol", $bogodir,
+	    open FL, "find $dir -type f -name '[0-9]*'|" or yuk(7, "Problem processing nonspam files");
+	    foreach my $msg (<FL>) {
+		chomp($msg);
+		my $cmd = join(" ", "cat $msg | bogol", $bogodir,
 			$cf, ">>$nswork");
-		    system($cmd) == 0 or
+		system($cmd) == 0 or
 			yuk(7, "Problem writing nonspam msg-count file");
-		}
 	    }
 	}
     }




More information about the bogofilter-dev mailing list