/etc/bogofilter.cf

Boris 'pi' Piwinger 3.14 at logic.univie.ac.at
Fri Jan 24 14:13:25 CET 2003


Boris 'pi' Piwinger wrote:

[9)]
> I added a line of explanation.

Sorry, the mail went out too early. I continue where I
stopped the previous one.

>>>12) THRESHOLD Values (???)
>> 
>> This'll take a while to explain.  I'll have to send you another message on 
>> these.

In the meantime I'll just put it to another place.

>>>16) more details for algorithms (should stay here, but
>>>settings from 9), 11), and 12)? should be merged here)

Please read carefully!

"Format of spamicity output" has to be worked on. What
happens, if I set spamicity_tags to three values, but only a
binary state is used?

There needs to be some text which explains spamicity_formats.

pi
-------------- next part --------------
#### Sample bogofilter configuration file
#
#	bogofilter config files should be located in:
#	    /etc as /etc/bogofilter.cf and/or
#	    $HOME as .bogofilter.cf
#       Also see BOGOFILTER_DIR below.

# Note: comment lines describe the configuration parameters
#	and show other possible values for the parameter
#
#	non-comment lines show default values 
#	as used in the bogofilter source code.
#       @@@ since this might be changed a better wording is needed
#       @@@ unclear why sometimes we show defaults as comments and sometimes as active


##### Begin general settings ########################################

#### BOGOFILTER_DIR
#
#	directory for wordlists

#bogofilter_dir=/var/lib/bogofilter
bogofilter_dir=~/.bogofilter

#	name/location of user config file

user_config_file=~/.bogofilter.cf
#user_config_file=~/.bogofilterrc
#user_config_file=~/.bogofilter/config
#@@@ can we change the last line to use bogofilter_dir?


#### WORDLIST: define additional word lists
#
#	char type: 's','g','i' (denoting spam, good, or ignore)
#	char *name: name of list, e.g. "good", "spam", "ignore"
#	char *path: path to file
#	double weight - probability BIAS for list
#	int override - skip lower valued lists

#wordlist i,ignore,.ignorelist.db,1,0,0


#### SPAM_HEADER_NAME
#
#	used in reporting spamicity and
#	in removing already existing headers

spam_header_name=X-Bogosity


##### STATS_IN_HEADER
#
#	non-zero (default):  put spamicity info in message header
#	zero:  put spamicity info in message body
#	can use "bool" values of True, False, Yes, No, 1, or 0

stats_in_header=Y


#### Format of SPAM_HEADER
#
#	formatting characters:
#
#	    h - spam_header_name, e.g. "X-Bogosity"
#
#	    a - algorithm, e.g. "graham", "robinson", "fisher"
#
#	    c - classification, e.g. Yes/No, Spam/Ham/Unsure, +/-/?
#
#	    e - spamicity as 'e' format
#	    f - spamicity as 'f' format
#
#	    l - logging tag (from '-l' option)
#
#	    o - spam_cutoff, ex. cutoff=%c
#
#	    r - runtype
#	        w - word count
#	        m - message count
#
#	    v - version, ex. "version=%v"
#
#	below are options showing name and current value
#
# @@@ add explanation to these four settings:
# header_format = %h: %c, tests=bogofilter, spamicity=%p, version=%v
# terse_format = %1.1c %f
# log_header_format = %h: %c, spamicity=%p, version=%v
# log_update_format = register-%r, %w words, %m messages

##### End general settings ##########################################



##### Begin tokenizer settings ######################################

#### BLOCK ON SUBNETS
#
#	convert IPADDRs into a special token, url:1.2.3.4,
#	and also return url:1.2.3, url:1.2, and url:1
#       to allow identifying spammers by ip address / subnets.

block_on_subnets=no


#### CHARSET handling
#
#	specify default charset

charset_default=us-ascii
#charset_default=iso-8859-1

#	replace non-7bit chars with '?'

replace_nonascii_characters=N

##### End tokenizer settings ########################################



##### Begin general constants for classification ####################

#### MINIMUM DEVIATION
#
#	if token spamicity closer to EVEN_ODDS (0.5f)
#	than MIN_DEV, don't use the word in the 
#	spamicity calculation

min_dev=0.0


##### THRESHOLD Values
#
#	used to determine if/when spamicity 
#	values are output by print_bogostats()

thresh_index = 0
thresh_stats = 0.0f
thresh_rtable = 0.0f

# thresh_index = 12
# thresh_stats =0.2f
# thresh_rtable=0.4f

##### End general constants for classification ######################



##### Begin algorithm settings ######################################

#### ALGORITHM
#
#	specify spamicity algorithm
#       @@@ maybe add some info
#       @@@ explain which changes require database rebuild

#algorithm=graham
algorithm=robinson
#algorithm=fisher


##### Robinson Constants
#
#	Specify floating point values for
#	Robinson S and X coefficients
#       Does apply only to graham

robs=0.001
robx=0.415


#### CUTOFF Values
#
#	for Graham and Robinson:
#	    only spam_cutoff is needed.
#
#	for Fisher:
#	    both ham_cutoff and spam_cutoff are allowed.
#	    setting ham_cutoff to a non-zero value will
#	    enable tristate results (Yes/No/Unsure).

# graham:

spam_cutoff = 0.90

# robinson:

spam_cutoff = 0.54

# fisher (with Yes/No/Unsure output, like -3):
#
# ham_cutoff = 0.10
# spam_cutoff = 0.95

# fisher (with Yes/No output, like -2):
#
# ham_cutoff = 0.00
# spam_cutoff = 0.95


#### Format of spamicity output
#
# for robinson or fisher (with binary state)
#	using "Yes" and "No" as labels.
# @@@ does this also work for graham?
#
# spamicity_tags = Yes, No
# spamicity_formats = %0.6f, %0.6f

# for fisher (with 3-way state)
#	using "Spam", "Ham", and "Unsure" as labels,
#	format "%6.2e" for displaying Spam/Ham scores.
#	and format "%0.6f" for Unsure scores.
#
# spamicity_tags = Spam, Ham, Unsure 
# spamicity_formats = %6.2e, %6.2e, %0.6f 

##### End algorithm settings ########################################



More information about the bogofilter mailing list