#!/usr/bin/perl -w my $PGP_SIGNED=<<'spfilter.pl_0.59_031104'; -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 spfilter.pl_0.59_031104 my %CFG = ( project => 'spfilter', version => '0.59', date => '031104' ); ## !/usr/bin/perl5 -wT ## http://spfilter.sourceforge.net/ ## http://mirror.bliab.com/spfilter/ ## file signed to prevent tampering, check integrity with 'make verify' ################################################################ ## requires XML::Simple, LWP::UserAgent or wget, rsync, bunzip2 ## optional: CDB_File, GDBM_File, DB_File, gpgv, diff, gunzip ################################################################ ## nothing here to edit ## try `perl ./spfilter.pl -h` or `perldoc spfilter.pl` use strict; use English; use Fcntl; use IO::File; use POSIX qw(strftime setuid); POSIX::setlocale(&POSIX::LC_ALL, 'C'); ## location for additional modules use lib '~/perl'; #perl2exe_include XML::Simple ## LWP::UserAgent ## Sys::Syslog ## DB_File ## the global hashes, used everywhere my %SRC = (); # hash of hash of selected input-sources my %FMT = (); # hash of hash of selected output-formats # experimental preset section, see config.xml my %PRESET = (); # filled in by &_readConfig(), used by &parseSourceArg() # dont change the default values listed here. # specify them in commandline-arguments or in '-x local-config.xml' my %DEFAULT = ( sources => 'DEFAULT', # not used cachedir => './cache', outdir => './outdir', conf_path => ($OSNAME eq 'MSWin32')?'.':'.:/usr/local/etc', # not used xmlconfig => 'spfilter-config.xml', keyring => 'spfilter-keyring.gpg', zone_name => 'localhost', zone_addr => '127.0.0.1', zone_ttl => 43200, exec_path => ($OSNAME eq 'MSWin32')?".;$ENV{PATH}":'~/bin:/bin:/usr/bin:/usr/local/bin', exec_http => 'wget -nv -T 30 -t 2 -U "{AGENT}" --header="If-Modified-Since: {MODIFIED}" -O "{FILE}" "{URL}"', exec_rsync => 'rsync -Lvxz --partial --delete-after --bwlimit=16 --timeout=60 --modify-window=3600 "{URL}" "{FILE}"', # exec_axfr => 'dig @{URL} axfr >"{FILE}"', # not implemented exec_gpgv => 'gpgv --keyring "{KEYRING}" "{FILE}"', exec_gunzip => 'gunzip -f "{FILE}"', exec_bunzip => ($OSNAME eq 'MSWin32')?'bunzip2 "{FILE}"':'bunzip2 -f "{FILE}"', exec_bzip => 'bzip2 -czf9 "{FILE}"', # for publishing exec_diff => 'diff -abBds "{OLD}" "{NEW}"', # for publishing ); ## order: commandline-args, spfilter-local.xml, spfilter-config.xml &parseConfig(\%CFG, \%DEFAULT); # pass defaults &lockFile(\%CFG, $PID, $EXECUTABLE_NAME, $PROGRAM_NAME, @{$CFG{argv}}) unless($CFG{debug}); ################################################## &msg(2, 'b) update sources'); ## prepare sources my $name; foreach $name (sort keys %SRC) { my $status = &checkCache($SRC{$name}, $SRC{$name}->{interval}); if($status > 0) # recently cached { &msg(1, 'Recent', $SRC{$name}->{cache_fname}, "($name)"); $CFG{count_sources}++; $CFG{count_cached}++; next; } $status = &fetchSource($SRC{$name}); if($status < 0) # 304 not modified { $CFG{count_sources}++; $CFG{count_notmod}++; } elsif($status > 0) # 200 OK { $CFG{count_sources}++; $CFG{count_fetched}++; my $file = $SRC{$name}->{fetch_fname} || $SRC{$name}->{cache_fname} or die(); # ugly, temporary # &msg(3, 'Updated',"$file ($name)"); ## publish as *.bz2 ©Pack($file, "$CFG{pubdir}/input/$SRC{$name}->{filename}.bz2") if($CFG{pubdir} && -d "$CFG{pubdir}/input"); ## handle diffs, optional my $diff; if($CFG{exec_diff} && ($diff=&makeDiff($CFG{cachedir}, $name))) { ©Pack($diff, "$CFG{pubdir}/diff/$SRC{$name}->{filename}.diff.$CFG{yymmdd}.bz2") if($CFG{pubdir} && -d "$CFG{pubdir}/diff"); &unlinkOld($CFG{cachedir}, $SRC{$name}->{filename}.'.diff', 1) } ## purge old &unlinkOld($CFG{cachedir}, $SRC{$name}->{filename}, 1); } elsif(&checkCache($SRC{$name}, 30)) # up to one month back { $CFG{count_sources}++; # need ? $CFG{count_errors}++; &msg(1, 'WARNING', "fallback to old '$SRC{$name}->{cache_fname}' ($name)"); } else { &msg(0, 'ERROR',"cant get source '$name'"); } } # $CFG{count_sources} = keys(%SRC); # todo: add $CFG{count_errors} &msg(2, 'Total', "$CFG{count_sources} sources ($CFG{count_cached} recent cached, $CFG{count_notmod} not-modified, $CFG{count_fetched} fetched)"); die('FATAL: no sources') unless($CFG{count_sources}); unless($CFG{count_fetched}) { if($CFG{count_notmod}) { &msg(2, 'Note', 'no sources modified - continue anyway...'); } else { &msg(1, 'Note', 'all sources still good - continue anyway...'); } ## exit(1); } ################################################## ## read and parse source(s), dedupe/consolidate into %RECORDS &msg(2, 'c) read/consolidate'); my %RECORDS = (); # master hash ## tie hash to in-memory or file-based db, increased cpu usage if($CFG{tiehash}) { $CFG{tiedb} = &dbOpen(\%RECORDS, $CFG{tiehash}); } ## experimental: special treatment if text starts with OK, IGNORE or FREEMAIL my %WHITELIST = (); # with sendmail-compatible keyword 'OK' or 'WHITELIST' my %FREEMAIL = (); # extension: reject if host does not match mx my $duperec = 0; foreach $name (sort keys %SRC) { $duperec += &slurpFile($SRC{$name}); # $SRC{$name}->{fetch_fname} } # todo: morph into $CFG{count_records} my $totalrec = keys(%RECORDS); &msg(2, 'Total',"$totalrec records ($duperec dupes)"); die('FATAL: no records') unless($totalrec); ################################################## ## output data &msg(2, 'd) output/build list'); # $CFG{count_formats} = keys(%FMT) or die('FATAL: no valid format specified'); foreach $name (sort keys %FMT) { if($CFG{outfile} eq 'STDOUT') { &printOutput($FMT{$name}, 'STDOUT'); $CFG{count_formats}++; &msg(0, 'WARNING', 'cant print multiple format to STDOUT') if($CFG{count_formats}>1); last; } elsif($CFG{outfile} eq 'NULL') { die('outfile set to NULL'); } # compose name of output-file my $outfile; # make up prefix and filename if($CFG{outfile}) { $outfile = $CFG{outfile}; } # explicit elsif($CFG{opt_s} =~ /^[-a-zA-Z0-9._]+$/) { $outfile = "$CFG{opt_s}.$name"; } # single argument elsif($CFG{sources} =~ /^[-a-zA-Z0-9._]+$/) { $outfile = "$CFG{sources}.$name"; } # single source else { $outfile = "SPFILTER.$name"; } # failsave if(&printOutput($FMT{$name}, $CFG{outdir}.'/'.$outfile)) { $CFG{count_formats}++; ©Pack("$CFG{outdir}/$outfile", "$CFG{pubdir}/output/$outfile.bz2") if($FMT{$name}->{publish} && $CFG{pubdir} && -d "$CFG{pubdir}/output"); } } ################################################## ## all done if(defined($CFG{tiedb})) { ## &dbClose($CFG{tiedb}, \%RECORDS, $CFG{tiehash}); # wont work die("cant use CDB for tiehash ($CFG{tiehash})") if($CFG{tiehash} =~ /\.cdb(\.tmp)?$/); ## $CFG{tiedb}->finish or die("$PROGRAM_NAME: CDB_File->finish failed: $OS_ERROR") if($CFG{tiehash} =~ /\.cdb(\.tmp)?$/); undef($CFG{tiedb}); untie(%RECORDS); ## if($CFG{tiehash} !~ /\.cdb(\.tmp)?$/); &unlinkFile($CFG{tiehash}) if($CFG{tiehash} ne 'NULL'); } &lockFile(\%CFG, undef) unless($CFG{debug}); # clear lock ## todo: calculate retval based on success/failures &msg(2, 'finish, exit(0)'); exit(0); ################################################## ## functions ## todo: check owner and if directory is writable by others for uid=0 ## global $FMT, %RECORDS, $CFG{yymmdd} ## global $CFG{magic} $CFG{debug}, $CFG{loglevel}, $CFG{cachedir} ## extensions FREEMAIL sub printOutput { my ($fmt,$file) = @ARG; my $name = $fmt->{name} or die('missing \$FMT{\$name}'); # my $name = shift or die('missing name'); # my $file = shift; # STDOUT, directory or filename # my $format = $FMT{$name} or die('FATAL: invalid %FMT: '.$name); my $include = $fmt->{include}; # todo: rename to ? if($include) # expand macros { $include =~ s/{YYMMDD}/$CFG{yymmdd}/g; $include =~ s/{AGENT}/$CFG{useragent}/g; $include =~ s/{ZONE}/$CFG{zone_name}/g if($CFG{zone_name}); $include =~ s/{ADDR}/$CFG{zone_addr}/g if($CFG{zone_addr}); $include =~ s/{TTL}/$CFG{zone_ttl}/g if($CFG{zone_ttl}); $include =~ s/\n\s/\n/g; # trim leading whitespaces (Anatoly) } my $type = $fmt->{type} || 'txt'; # probably should be 'octets' my $notation = $fmt->{notation}; my $option; # bindhack,tinydnshack,tcpserverhack if($fmt->{option}) { $option = $fmt->{option}; } ## legacy support, will go away soon elsif($name =~ /bind/) { $option = 'bindhack'; } elsif($name =~ /tinydns/) { $option = 'tinydnshack'; } elsif($name =~ /(qmail|tcpserver|rblsmtpd)/) { $option = 'tcpserverhack'; } else { $option = ''; } my $linestart = $fmt->{linestart}; my $separator = $fmt->{separator}; my $lineend = $fmt->{lineend}; my $maxlength = $fmt->{maxlength} || 1023; # limit text per record my $secondline = $fmt->{secondline}; my $secondlinestart = $fmt->{secondlinestart}; # default '+' for tinydns if($option && $option eq 'tinydnshack') # for tinydns only { die('$CFG{zone_name} not set') unless($CFG{zone_name}); $separator =~ s/{ZONE}/$CFG{zone_name}/; $secondline =~ s/{ZONE}/$CFG{zone_name}/; $secondline =~ s/{TTL}/$CFG{zone_ttl}/; } ## my $search = $fmt->{search}; ## my $replace = $fmt->{replace}; my $commentchar = ($option eq 'bindhack')?'; ':'# '; &msg(3, 'Option', $option, "commentchar=$commentchar") if($option); my ($magic_update,$temp,$db,%dbhash); if($file ne 'STDOUT') { $temp = $file.'.tmp'; if(-e $temp) { &msg(1, 'Trash', $temp); &unlinkFile($temp); } } my $fh = new IO::File; if($file eq 'STDOUT') # output to dup of STDOUT { &msg(0, 'WARNING','cant magic_update on STDOUT') if($fmt->{magic_update}); $temp = ''; # need me ? $fh->open('>&STDOUT') or die("cant dup(STDOUT): $OS_ERROR"); ## print out static header print $fh $include if($include); # import raw lines } elsif($type eq 'txt') # default octets { $magic_update = ($fmt->{magic_update} && $CFG{magic}); &msg(3, 'Output', $temp, "($name)"); $fh->open($temp, O_WRONLY|O_CREAT|O_EXCL) or die("FATAL: open $temp: $OS_ERROR"); ## import lines above 'start' and below 'end', if any my $count = &importMagic($commentchar.$CFG{magic}, $file, $fh) if($magic_update); print $fh $commentchar, $CFG{magic},'sources: ', $CFG{sources},' (',$CFG{version},'_',$CFG{yymmdd},")\n"; if($magic_update) { print $fh $commentchar, $CFG{magic},'start: ', $CFG{yymmdd}; print $fh ' - imported ',$count,' lines' if($count); print $fh ' - do not edit below',"\n"; } print $fh $include if($include); # import raw lines } elsif($type =~ /^[cgn]?db[m]?$/) # not all combinations supported { &msg(0, 'WARNING','cant do magic_update on DB') if($fmt->{magic_update}); $db = &dbOpen(\%dbhash, $temp); if($include) # import raw lines { my ($key,$val,$line); my (@lines) = split(/[\n\r\l]+/, $include); chomp @lines; foreach $line (@lines) { next if($line =~ /^#/); # trash ($key,$val) = split(/[:\s]+/, $line, 2); ## print STDERR "header: $key -> $val\n" if($CFG{loglevel}>1); if($type eq 'cdb') { $db->insert($key,$val); } else { $dbhash{$key} = $val; } } } } else { die("FATAL printOutput: invalid output-format: $type"); } my (@key,$addr,$text); # my $todo = keys %RECORDS; # todo: print progress percentage my $count = 0; if($type eq 'txt') { @key = (sort keys %RECORDS); } else { @key = (keys %RECORDS); } # DBM foreach $addr (@key) { if(defined($WHITELIST{$addr})) # pass and allow unconditionally (sendmail) { $text = 'OK '.$WHITELIST{$addr}.'; '.$RECORDS{$addr}; &msg(2, 'White4',"$addr: $text"); } elsif(defined($FREEMAIL{$addr})) # pass and allow only if mx matches (rcptfilter) { $text = 'FREEMAIL'; $text .= ' '.$FREEMAIL{$addr} if($FREEMAIL{$addr}); $text .= ' ('.$RECORDS{$addr}.')' if($RECORDS{$addr} && $RECORDS{$addr} !~ /^FREEMAIL/); ## print STDERR "FREEMAIL_246: $addr $text ($RECORDS{$addr}\)\n" if($CFG{loglevel}); } else { $text = $RECORDS{$addr}; } if($type eq 'txt') { if($notation eq 'reverse') # reverse-octets for dnsbl-zones { $addr = &reverseDns($addr); if($option eq 'tinydnshack') { $text =~ s/:/\\072/g; } elsif($option eq 'bindhack') { $text = substr($text, 0, 255); } ## legacy, done via maxlength } elsif($option eq 'tcpserverhack') # add dot on partial octets { $addr .= '.' unless($addr =~ /\.[0-9]+\.[0-9]+\./); } if(!$text) { print $fh $addr, "\n"; } # optimize for further aggregation else { ## limit length for bind, max 255 chars $text = substr($text, 0, $maxlength) if($maxlength); print $fh $linestart, $addr, $separator, $text, $lineend,"\n" or die("$OS_ERROR"); print $fh $secondlinestart,$addr,$secondline,"\n" if($secondline); } } elsif($notation eq 'cidr') # quick hack, no text { my ($a,$b,$c,$d) = split(/\./, $addr, 4); if(defined($d)) { print $fh $addr, '/32',"\n"; } elsif(defined($c)) { print $fh $addr, '.0/24',"\n"; } elsif(defined($b)) { print $fh $addr, '.0.0/16',"\n"; } elsif($a) { print $fh $addr, '.0.0.0/8',"\n"; } else { print $fh "# ERROR: $addr\n"; next; } } else # cdb,db,gdbm { ## linestart, separator and lineend silently ignored ## note: cdb not compatible with tinydns-data if($type eq 'cdb') { $db->insert($addr,$text); } else { $dbhash{$addr} = $text or die("dbInsert $type: $addr = $text: $OS_ERROR"); } } $count++; } # end foreach $addr if($file eq 'STDOUT') { $fh->close; } elsif($type eq 'txt') { print $fh $commentchar, $CFG{magic},'end: ',$CFG{yymmdd},' - added ',$count,' records - do not edit above',"\n"; $fh->close; } else # finalize db { ## &dbClose($db, \%dbhash, $temp); # wont work $db->finish or die("$PROGRAM_NAME: CDB_File->finish failed: $OS_ERROR") if($temp =~ /\.cdb(\.tmp)?$/); undef($db); untie(%dbhash) if($temp !~ /\.cdb(\.tmp)?$/); } ## last chance to backup existing file - not implemented if($temp && $temp ne $file && !rename($temp, $file)) { &msg(0, 'ERROR',"rename($temp,$file): $OS_ERROR"); return(0); } &msg(1, 'Done', $file, "($name, $count lines)"); return($count); } ################################################## ## stage 1: fetch source into cache ## experimental live-update (not really yet) ## only ./cache/${name}.YYMMDD will be updated sub liveUpdate { my $xml = shift; my $name = shift or die('liveUpdate: missing \$name'); die("liveUpdate: $name: invalid xml type=$xml->{$name}->{type}") unless($xml->{$name}->{type} =~ /^config/); if(!$CFG{keyring} || ($CFG{keyring} ne 'NULL' && !$CFG{exec_gpgv}) ) { &msg(2, 'Note', "Update refused without gpgv and keyring - run 'make keyring'"); return(0); } my %upd = (); $upd{name} = $name; pushSource($name, $xml->{$name}, \%upd, 0); # untrusted return(-1) if(&checkCache(\%upd, $upd{interval}) > 0); # simulate 304 my $status = &fetchSource(\%upd); if($status > 0) # 200 OK { my $file = $upd{fetch_fname}; # || $upd{cache_fname; if(&verifyFile($file) < 0) { &msg(0, 'ALERT',"gpg-signature '$file' FAILED - please report ($file.BADSIG)"); &msg(1, 'HINT',"use argument -keyring=NULL to skip gpg verification"); rename($file, $file.'.BADSIG') or die("FATAL: cant rename($file,$file.BADSIG): $OS_ERROR"); return(0); } ## redistribution of xml-config - causes too much lag # ©Pack($file, "$CFG{pubdir}/input/$upd{filename}.bz2") # if($CFG{pubdir} && -d "$CFG{pubdir}/input"); # todo: make shure $CFG{cachedir} is already defined &unlinkOld($CFG{cachedir}, $upd{filename}, 1); } return($status); } # global $CFG{cachedir} sub _getCached { my $file = shift or die('undefined \$filename'); my $alias = shift; ## does not really belong here if($alias && $alias =~ /^[-a-zA-Z_.]+$/) { &msg(2, 'ALIAS',"$file -> $alias"); $file = $alias; } my (@files) = readDir($CFG{cachedir}, '^'.$file.'\.0[0-9]{5,7}$'); my $cached = pop @files; # most recent only # &msg(4, 'CACHED',"file=$file, alias=$alias, cached=$cached"); return($CFG{cachedir}.'/'.$cached) if($cached && -s $CFG{cachedir}.'/'.$cached); return(''); } sub _checkAge { my $filename = shift; my $maxage = shift; if($filename =~ /\.(0[0-9]{5})[0-9]*$/ ) { my $filedate = $1; # YYMMDD my $mindate = strftime("%y%m%d", gmtime($BASETIME-($maxage*86400))); if($filedate > $mindate) { return(1); } else { &msg(3, 'TooOld', $filename, "(file=$filedate min=$mindate"); return(-1); } } return(0); } ## check if file aged < interval days already exists in cachedir ## set cache_status, cache_fname ## return $CFG{cache_status} tristate: 1: OK, 0: NONE, -1: TooOld sub checkCache { my $src = shift; my $interval = shift || $src->{interval} || 0; # special values: -1, 999 my $name = $src->{name} or die('FATAL: undefined \$src->{name}'); ## my $filename = &_getCached($alias) or return($src->{cache_status} = 0); my $filename = &_getCached($src->{filename}, $src->{alias}) or return($src->{cache_status} = 0); $src->{cache_fname} = $filename; $src->{cache_ifmod} = &fileDate($filename, '%a, %d %b %Y %H:%M:%S GMT'); ## $src->{cache_yymmdd} = &fileDate($filename, '%y%m%d'); return($src->{cache_status} = -1) if($interval < 1); my $status = &_checkAge($filename, $interval); return($src->{cache_status} = $status) unless($status == 1); my $size = &checkFile($filename, $src->{minsize}, $src->{maxsize}); if(!$size) { &msg(0, 'CACHE',"badfile '$filename'"); return($src->{cache_status} = 0); } &msg(2, 'Cached',"$filename (${size}k)"); return($src->{cache_status} = 1); } ## todo: rewrite sub unlinkOld { my $dir = shift; # || $CFG{cachedir}; my $glob = shift; # || return(0); my $keep = shift || 2; my (@files) = &readDir($dir, '^'.$glob.'\.0[0-9]{5}$'); my $cnt = $#files; return(0) if($cnt<$keep); # shortcut ## print STDERR "LIST: @files\n" if($CFG{loglevel}>2); pop @files while($keep--); # always $keep that many &msg(1, 'Purge', join(',', @files), "($cnt files)"); my $file; foreach $file (@files) { &unlinkFile($dir.'/'.$file); } return($cnt); } ## experimental sub makeDiff { my $dir = shift; # usually $CFG{cachedir} my $name = shift; ## my (@files) = &readDir($dir, '^'.$name.'\.0[0-9]{5,7}$'); my (@files) = &readDir($dir, '^'.$SRC{$name}->{filename}.'\.0[0-9]{5,7}$'); my $new = pop @files; my $old = pop @files; unless($new && $old) { &msg(2, 'Diff',"need two files on disk ($name)"); return(0); } my $cmdline = $CFG{exec_diff} or die('FATAL: exec_diff undefined'); if($cmdline =~ /{OLD}/) { $cmdline =~ s/{OLD}/$dir\/$old/; } # proper else { $cmdline .= ' "'.$dir.'/'.$old.'"'; } # legacy if($cmdline =~ /{NEW}/) { $cmdline =~ s/{NEW}/$dir\/$new/; } else { $cmdline .= ' "'.$dir.'/'.$new.'"'; } &msg(3, 'Diff',$cmdline); my @cmdout; my $retval = &forkExec($cmdline, \@cmdout, 1); if($retval != 1 || $#cmdout<1) # no changes or error { my $errmsg = substr("@cmdout"||'ERROR',0,256); &msg(0, 'DIFF', $errmsg,"(retval=$retval)"); return(0); } my $file = "$dir/$name.diff.$CFG{yymmdd}"; &msg(1, 'Diff',"$file ($name,",($#cmdout+1),' lines)'); my $lines = &writeFile($file, \@cmdout, "# $cmdline\n", '', 0); undef(@cmdout); return($file) if($lines); return(''); # bad } ## global %SRC, $CFG{yymmdd}, $CFG{loglevel} sub fetchSource { my $src = shift; my $name = $src->{name} or die('missing \$SRC{\$name}'); unless(defined($src->{url}) || $src->{url}[0]) { &msg(0, 'WARNING',"missing url ($name)"); return(0); } my $status = 0; # 1 = fetched, 0: error, -1: not modified my $filename = $CFG{cachedir}.'/'; ## does not belong to here ## todo: import properties from alias, but not here if($src->{alias} =~ /^[-a-zA-Z_.]+$/) { $filename .= $src->{alias}; } else { $filename .= $src->{filename}; } $filename = &untaint($filename.'.'.$CFG{yymmdd}, 'path'); &msg(3, 'Source', $name, "($filename)"); my $url; foreach $url (@{$src->{url}}) { ## experimental diff update - not implemented if($url =~ /{LATEST}/ && $src->{cache_fname} =~ /\.(0[0-9]{5})([0-9]{2})?$/) { my $yymmdd = $1; if(($yymmdd eq $CFG{yymmdd}) || (($yymmdd+1) != ($CFG{yymmdd}+0))) { &msg(1, 'LATEST', "not from yesterday", "($src->{cache_fname})"); return(0); # error for now } &msg(1, 'LATEST', $yymmdd, "($src->{cache_fname})"); $url =~ s/{LATEST}/$yymmdd/; } $url =~ s/\.\./!!/g; # sanitize possible path $url = &untaint($url, 'url'); if(!$url) { &msg(0, 'WARNING',"empty url in '$name'"); next; } else { &msg(2, 'Trying', $url); } if($url =~ /^(rsync):\/\//) { next if($ENV{NORSYNC} || !$CFG{exec_rsync}); # hack for sf $status = &rsyncUrl($src, $url, $filename); } elsif($url =~ /^(http|ftp):\/\//) # ftp not tested { $status = &fetchUrl($src, $url, $filename); } elsif($url =~ /^(axfr):\/\//) # not implemented { my($junk,$nameserver,$zonename) = split(/:/, $url); $zonename = $nameserver unless($zonename); &msg(0, 'WARNING',"source AXFR '\@$nameserver $zonename' not implemented ($name)"); return(0); # $status = &axfrUrl($src, $url, $filename); # to be written } elsif($url =~ /^(dir):\//) # all files in local directory { $status = &catDir(substr($url,5), $filename); } elsif($url eq 'NULL') { &msg(0, 'WARNING',"source disabled with url=NULL ($name)"); return(0); } else # assume local file, needs reworking { my $local; if($url =~ /^file:\//) { $local = substr($url,6); } elsif($url =~ /^[\/.]/) { $local = $url; } else { die("bad file value for url: $url"); } if(-f $local) { if(&fileNewer($local,$filename)) { $status = 1 if(©File($local, $filename)); } else { $status = -1; } } else { &msg(0, 'WARNING',"nonexistent file '$local'"); } } if($status) # we got something { if($status < 0) { &msg(1, 'Status', '304', $url); } else { &msg(1, 'Status', '200', $url); } last; } } if(!$status) # 404 { &msg(2, 'WARNING', "could not fetch source '$name'"); return($src->{fetch_status} = 0); } if($status < 0) # 304 Not Modified { if($src->{cache_fname} ne $filename) { if(-s $filename) { &msg(1, 'WARNING',"file '$filename' already exists ($name)"); return(-1); } elsif(-f $filename) { unlink($filename) or die("cant unlink($filename)"); } rename($src->{cache_fname}, $filename) or die("cant rename($src->{cache_fname},$filename)"); } $src->{cache_fname} = $filename; # fixup, ugly } # die('FATAL: nonexistent or empty '.$filename) unless(-f $filename && -s $filename); ## check file size, owner and permissions my $size = &checkFile($filename, $src->{minsize}, $src->{maxsize}); if(!$size) # nonexistent file, bad size, permission or owner { &msg(1, 'BADFILE', $filename, "($name)"); rename($filename, $filename.'.BADFILE') or &msg(0, 'WARNING', "rename $filename: $!"); return($src->{fetch_status} = 0); } if($status > 0) { &msg(2, 'Fetched',"$filename ($name)"); $src->{fetch_fname} = $filename; } elsif($status < 0) # 304 Not Modified { &msg(2, 'Recent', $filename, "($name)"); $src->{fetch_fname} = $filename; # need me ? } return($src->{fetch_status} = $status); } ## update existing file via rsync ## rsync worked in an early test, but should not used yet ## but there is suitable source available via rsync yet anyway. ## return status: 1 = fetched, 0: error, -1: not modified sub rsyncUrl { my $src = shift; my $url = shift; my $file = shift; my $name = $src->{name} or die('missing \$src->{name}'); die('FATAL: invalid rsync-url') if($url !~ /^rsync:\/\//); # need me ? if(!(-s $file)) # copy last available version to $file { if($src->{cache_fname}) { ©File($src->{cache_fname},$file); } else { &msg(1, 'WARNING','rsync update on nonexistent file'); } } # &msg(3, 'Rsync', $url); # &msg(3, 'Dest', $file, "($name)"); my $cmdline = $CFG{exec_rsync} or die('FATAL: $CFG{exec_rsync} not defined'); if($cmdline =~ /{URL}/) { $cmdline =~ s/{URL}/$url/; } else { $cmdline .= ' "'.$url.'"'; } # legacy if($cmdline =~ /{FILE}/) { $cmdline =~ s/{FILE}/$file/; } else { $cmdline .= ' "'.$file.'"'; } # legacy my @cmdout; my $retval = &forkExec($cmdline.' 2>&1', \@cmdout, 30); if(!$retval) # sync worked { # wrote 143 bytes read 104 bytes 98.80 bytes/sec my (@word) = split(/\s+/, $cmdout[1]); if(defined($word[4])) # added 030816, for NJABL { if($word[4] !~ /^[0-9]+$/) { &msg(3, 'RSYNC', $cmdout[1]); } elsif($word[4] < 256) { return(-1); } # simulate 304 } chomp($cmdout[0]); my $stats = ''; if($#word>5) { $stats = ' ('.$word[4].' '.$word[5].', '.$word[6]||''.' '.$word[7]||''.')'; } &msg(2, 'Rsync', $cmdout[0] .$stats); return(1); } if($retval==12 || $retval==23) # unknown module ,not found { chomp($cmdout[0]); &msg(1, 'RSYNC', '404', $cmdout[0], "($retval)"); } elsif($retval==10 || $retval==30) # refused, timeout { chomp($cmdout[0]); &msg(1, split(/: /,$cmdout[0]), "($retval)"); } else # even more verbose { my $errmsg = substr("@cmdout"||'ERROR',0,256); &msg(0, 'RSYNC', $errmsg,"($retval)"); } undef(@cmdout); return(0); } ## fetch the url and save to disk, bunzip2 if necessary ## global $CFG{exec_http], $CFG{pack_ext}, $CFG{useragent} sub fetchUrl { my $src = shift; my $url = shift or die('fetchUrl: missing url'); # http source url my $file = shift or die('fetchUrl: missing file'); # plain dest file die('\$CFG{exec_http} undefined') unless($CFG{exec_http}); # need me ? my $name = $src->{name} or die('\$src->{name} undefined'); # should not happen my $option = $src->{option} || ''; # html2text, hack for SBL &msg(3, 'Option', $option, "($name)") if($option); &msg(3, 'If_Mod','If-Modified-Since:', $src->{cache_ifmod}) if($src->{cache_ifmod}); &msg(3, 'Dest', $file, "($name)"); my $outfile = $file; if($url =~ /\.$CFG{pack_ext}$/) { if($url =~ /\.bz2$/) { $outfile .= '.bz2'; } elsif($url =~ /\.gz$/) { $outfile .= '.gz'; } # untested else { die('FATAL: unsupported packer extension'); } &msg(3, 'Temp', $outfile); } if($CFG{exec_http} eq 'LWP::UserAgent') # LWP available { my $ua = LWP::UserAgent->new( keep_alive => 1 ); &msg(3, 'Fetch', 'use ', $CFG{exec_http}, '('.$ua->agent().')'); $ua->env_proxy(1); $ua->timeout(30); $ua->agent($CFG{useragent}); ## $ua->max_size($src->{maxsize}*1024) if($src->{maxsize}); # confusing ## $ua->credentials($netloc, $realm, $uname, $pass); # not implemented my $request = HTTP::Request->new('GET', $url); $request->header('If-Modified-Since', $src->{cache_ifmod}) if($src->{cache_ifmod}); my $tdelta = time(); my $response = $ua->request($request); ## &msg(3, 'Status', $response->status_line, '('.substr($response->base,7,64).')'); # &msg(3, 'REQUEST', $response->request->headers_as_string); # &msg(3, 'IfMod', $response->request->header('If_Modified_Since')); # &msg(3, 'RESPONSE', $response->headers_as_string); if($response->code() == 304) # OK, not modified { &msg(2, 'Status', $response->status_line, "($src->{cache_ifmod})"); return(-1); # not modified } elsif($response->code() == 200) # OK, save to disk { &msg(2, 'Modified', $response->header('Last_Modified') || 'MISSING Last_Modified'); my $content_length = $response->header('Content_Length'); my $response_length = length($response->content); &msg(3, 'DEBUG', 'Content_Length:', $content_length, ', Response_Length:', $response_length, 'bytes'); if(($tdelta=(time()-$tdelta)) > 1) { &msg(2, 'Status', $response->status_line, "($response_length bytes,",int($response_length/$tdelta),'bytes/sec)'); } else { &msg(2, 'Status', $response->status_line, "($response_length bytes, ~$tdelta sec)"); } if(($response_length < 128) || ($content_length && $content_length != $response_length)) { &msg(1, 'CORRUPT', 'Content_Length:', $content_length||'UNKNOWN', ', Response_Length:', $response_length, 'Bytes'); &msg(0, 'REJECT', 'empty or corrupted source file', "($response_length bytes)"); return(0); # may try another url } my @array; if($option eq 'html2text') { ## hack for SBL_DIRECT, until they fix that on their end &msg(1, 'HACK', 'html2text-hack for SBL'); my $html = $response->content; $html =~ s/
[\s\n]*/
\n/ig; # SBL hack: fixup missing newlines ## print $html; print "EOF\n"; die(); (@array) = split('
', $html); } else { (@array) = ($response->content); } undef($response); # free up memory &writeFile($outfile, \@array, '', '', 0) or die("cant write $outfile: $OS_ERROR"); undef(@array); # free up memory } else # bad thing happen, may be 206 with oversized source { &msg(0, 'ERROR', $response->status_line, "($file)"); &msg(2, 'RESP', $response->headers_as_string); if(-s $outfile) # need me ? { my $badname = $outfile.'.'.$response->code(); &msg(0, 'RENAME', $badname); rename($outfile, $badname) or die("cant rename($outfile,$badname): $OS_ERROR"); } return(0); } } elsif(! -s $file) # dont do multiple updates per day with wget { ## experimental 'option=html2text' for SBL, only with LWP die('tag option not implemented with wget') if($option); my $cmdline = $CFG{exec_http} or die('FATAL: \$CFG{exec_http} not defined'); $cmdline =~ s/{AGENT}/$CFG{useragent}/; ## experimental HTTP_BASIC_AUTH, todo: read from url ...://user:pass@... # my $httpauth = 'Authorization: '.&to64($name.':'.$name); # $cmdline =~ s/{AUTH}/$httpauth/; # experimental $cmdline =~ s/{MODIFIED}/$src->{cache_ifmod}/; # may be '0' if($cmdline =~ /{FILE}/) { $cmdline =~ s/{FILE}/$outfile/; } else { die("FATAL: missing {FILE} in exec_http: $CFG{exec_http}"); } if($cmdline =~ /{URL}/) { $cmdline =~ s/{URL}/$url/; } # proper else { $cmdline .= ' "'.$url.'"'; } # legacy my @cmdout; my $goodret = 1; # wget: 19:58:28 ERROR 304: Not Modified. my $retval = &forkExec($cmdline.' 2>&1', \@cmdout, $goodret); if($retval) # not modified or troubles { my $text; my $errmsg = substr("@cmdout"||'',0,256); if(!$errmsg) { $text = "UNKNOWN_ERROR (retval=$retval)"; } elsif($retval && grep(/404.*Not Found/, $errmsg)) { $text = "404 Not Found ($name)"; } elsif($retval && grep(/40[0-9].*Denied/, $errmsg)) { $text = "40x Access Denied ($name)"; } elsif($retval==$goodret && grep(/304.*Not Modified/, $errmsg)) { &msg(1, 'Status', '304 Not Modified','('.substr($url,7,52).')'); # &msg(1, 'Status', '304 Not Modified', "($src->{cache_ifmod})"); die("FATAL: non-empty outfile '$outfile)") if(-s $outfile); unlink($outfile) or die("FATAL: cant unlink($outfile): $OS_ERROR") if($outfile ne $file && -f $outfile); return(-1); # not modified } else { $text = $errmsg.' (retval='.$retval.')'; } &msg(0, 'STATUS', $text); # &msg(3, 'OUTPUT', $errmsg) if($errmsg); return(0); # trouble } # else assume 200 } else { &msg(0, 'WGET', $file,' already updated today - ignoring'); return(-1); } die("FATAL: nonexistent $outfile") unless(-e $outfile); # need me ? die("FATAL: empty $outfile") unless(-s $outfile); $outfile = &unpackFile($outfile) if($outfile =~ /\.$CFG{pack_ext}$/); if(!(-f $outfile && -s $outfile)) { &msg(0, 'WARNING', "nonexistent or empty '$outfile'"); return(0); } elsif($outfile ne $file) { rename($outfile, $file) or die("FATAL: cant rename($outfile,$file): $OS_ERROR"); } # &msg(1, 'Fetched', "$file ($name)"); return(1); # good } ## all-in-one: fork, setuid, exec, read, check and reap ## http://www.oreilly.com/catalog/cgi2/chapter/ch08.html ## globals: $CFG{exec_uid}, sub forkExec { my $cmdline = shift; my $outref = shift; # array ref for output my $maxret = shift || 0; # ugly hack, diff uses '1' on success my $child = -1; if($OSNAME eq 'MSWin32') # cheap hack for Windoze ActiveState { # todo: get stderr via tempfile, low priority &msg(3, $OSNAME, "system($cmdline)"); system($cmdline); } else { my $cmdfh = new IO::File; if(!($child = open($cmdfh, "-|"))) { die("FATAL: cant fork($cmdfh): $OS_ERROR") unless defined($child); # binmode($child) if($OSNAME eq 'MSWin32'); # bad joke unless($UID) # still running as root { if($CFG{exec_uid}) # drop privilegies, see argument -u { setuid($CFG{exec_uid}) or die("FATAL: cant setuid($CFG{exec_uid}): $OS_ERROR"); &msg(3, 'Setuid', "uid=$UID:$GID, euid=$EUID:$EGID"); } else { &msg(2, 'WARNING', "exec '$cmdline' as root (uid=$UID)"); } } else { &msg(3, 'Exec', "$cmdline (uid=$UID)"); } exec("$cmdline"); die("FATAL: cant exec($cmdline): $OS_ERROR"); } if(!eof($cmdfh)) { (@{$outref}) = <$cmdfh>; } # else { &msg(3, 'NOTICE', 'premature EOF from forked child'); } ## close pipe and check retval from forked child $cmdfh->close; } my $retval = ($CHILD_ERROR >> 8); if($retval > $maxret) { my $signal = ($CHILD_ERROR & 127); my $core = ($CHILD_ERROR & 128); &msg(0, 'EXEC', $cmdline, "(retval=$retval signal=$signal core=$core)"); my $errmsg = substr("@{$outref}"||'', 0, 256); &msg(1, 'OUTPUT', $errmsg) if($errmsg); die("FATAL: exec $cmdline: retval=$retval signal=$signal core=$core") if($retval>($maxret+1)); } if($OSNAME ne 'MSWin32') ## reap zombies { use POSIX ":sys_wait_h"; do { $child = waitpid(-1, &WNOHANG); } until ($child == -1); } return($retval); } ## not currently used ## $SIG{CHLD} = \&sigchldHandler ; # sub sigchldHandler # { # wait() ; # $SIG{CHLD} = \&sigchldHandler ; # } ################################################### ## stage 2: build list previously cached files ## experimental: detect source type based on first 10 lines ## return one of: addr, cidr/nn, range, axfr/cname, axfr/txt, axfr/a sub sourceDetect { my $aref = shift; my $default = shift || 'addr'; my ($i,$j,@probe); my $type = ''; for($j=8, $i=0; $i<40 && defined($aref->[$i]) ; $i++) { if($aref->[$i] !~ /^[#;@\s]/) { push @probe, $aref->[$i]; last unless($j--); } } for($j=8, $i=$#{$aref}; $i>10; $i--) # some from the bottom { if($aref->[$i] !~ /^[#;@\s]/) { push @probe, $aref->[$i]; last unless($j--); } } my $n = 0; if($n=grep(/^\$(TTL|GENERATE)/, @probe)) { $type = 'axfr'; } elsif($n=grep(/^[0-9.]{5,15}\/[12]/, @probe)) { $type = 'cidr'; } elsif($n=grep(/^[0-9.]{7,15}\s*-\s*[0-9.]{7,15}[\s\$]/,@probe)) { $type = 'range'; } elsif($n=grep(/^[0-9.]{3,15}[\s\$]/, @probe)) { $type = 'addr'; } elsif($n=grep(/^[-a-z.]{5,}[\s\$]/, @probe)) { $type = 'host'; } my $rel = $n.'/'.($#probe+1); # still experimental if(!$type) { &msg(2, 'DETECT', "cant detect source type, use default ($rel)"); } elsif($type ne $default && !($type eq 'addr' && $default eq 'cidr')) { &msg(2, 'DETECT', "detected type '$type' mismatch ($rel)"); } else { &msg(3, 'Detect', "type '$type' detected ($rel)"); } return($type); # || $default } ## the real workhorse, read and parse $file into %RECORDS ## global %SRC sub slurpFile { my $src = shift; my $name = $src->{name} or die('missing name'); unless($src->{fetch_fname} || $src->{cache_fname}) { &msg(0, 'SLURP', "missing (\$src->{fetch_fname} || \$src->{cache_fname}) ($name)"); return(0); } my $file = $src->{fetch_fname} || $src->{cache_fname} or die('missing *_fname'); my $type = (split('/',$src->{type}))[0] || 'addr'; # needs fixing for subtype die('FATAL: unknown type: '.$type) unless($type =~ /^(addr|cidr|range|reverse|rbldns|axfr|host)/); die("FATAL: nonexistent or empty file '$file' ($name)") unless(-f $file && -s $file); my $dupes = 0; my @line; my $count = &readFile($file, \@line); # my $detected = &sourceDetect(\@line, $type); # experimental, disabled for now &msg(1, 'Parse', $file, "($type, $count lines)"); my $source_option = $src->{option} || ''; # (notext) my $rx_include = $src->{regexp_include} || ''; my $rx_exclude = $src->{regexp_exclude} || ''; ## outsource if axfr-syntax (expanding bind $GENERATE) if($type eq 'axfr') { $count = &parseAxfr($name, \@line, $src->{type}, $source_option, $rx_include, $rx_exclude); return(0); } elsif($type eq 'rbldns') # experimental { &msg(1, 'WARN', 'experimental rbldns parser') if($CFG{debug}); $count = &parseRbldns($name, \@line, $src->{type}, $source_option, $rx_include, $rx_exclude); return(0); } ## expand cidr, stuff into hash and dedupe/consolidate foreach (@line) { next if(/^[#;:]/ || /^$/ || /^127\.0\.0\.127/); next if($rx_include && !/$rx_include/); next if($rx_exclude && /$rx_exclude/); # experimental # if($type eq 'range' && /^[^\s]+\s+([0-9\.]+)(\/[0-9]+)?[\s:!]+(.*)$/ ) # { # ## http://www.blackholes.us, experimental range, cidr in second field # $dupes += &pushAddr($name,$1,$2,$3); # } # els if($type eq 'reverse' && /^(\*\.)?([0-9\.]+)[\s:]+(.*)$/) { # experimental DRBL support hacked into spfilter $dupes += &pushAddr($name,&unReverse($2),0,$3); } ## elsif(/^([0-9\.]+)(\/[0-9]+)?[\s:!]+(.*)$/) # addr and/or cidr elsif(/^([0-9\.]+)[\s:!]+(.*)$/) # octet or addr { $dupes += &pushAddr($name,$1,0,$2); } elsif(/^([0-9\.]+)\/([0-9]+)[\s:!]+(.*)$/) # cidr notation { $dupes += &pushAddr($name,$1,$2,$3); } elsif(/^([-a-zA-Z0-9\.@\*\+\/\<~{&=_%!@]{4,48})\s+(.*)$/) # host or email { ## todo: check for type "host" $dupes += &pushHost($name,$1,$2); } elsif($CFG{debug} || $CFG{loglevel}>2) # something else { ## todo: interpret some keywords ## Connect:127 RELAY ## Spam:abuse@ FRIEND ## From:abuse@ OK &msg(1, 'WTF?', $ARG); # die() if($CFG{debug}); # sleep(1); # delay to receice CTRL-C... } } # foreach return($dupes); } ## global %SRC, %RECORDS sub pushAddr { my ($name,$addr,$cidr,$text) = @ARG; ## if($cidr && ($cidr=int(substr($cidr,1)))) # sanitize /cidr if($cidr) { if(!int($cidr)) # safety check { &msg(0, 'ALERT', "skip invalid CIDR: $addr/$cidr ($name)"); $cidr = 0; } elsif($cidr > 31) { &msg(0, 'WARNING', "ignore invalid CIDR: $addr/$cidr ($name)") if($cidr > 32); $cidr = 0; } elsif($SRC{$name}->{cidr} && $cidr < $SRC{$name}->{cidr}) { &msg(0, 'WARNING', "ignore oversized CIDR: $addr/$cidr ($name)"); # &msg(3, 'WARNING', @ARG); $cidr = 0; } elsif($cidr == 24) # trim down to three octets { my (@part) = split(/\./, $addr, 4); $addr = $part[0].'.'.$part[1].'.'.$part[2]; $cidr = 0; } elsif($cidr == 16) # trim down to two octets { my (@part) = split(/\./, $addr, 3); $addr = $part[0].'.'.$part[1]; $cidr = 0; } } my $junk; # suppress warning ## experimental extensions - might not work as expected with cidr ## additional text will be passed through - experimental if($text && $text =~ /^(OK\s|WHITELIST)/) { ($junk,$WHITELIST{$addr}) = split(/\s+/,$text); } # elsif($text =~ /^(EXCEPT|IGNORE)/) { ($junk,$EXCEPTION{$addr}) = split(/\s+/,$text); } ## leave this here until the above is fully tested ## experimental option 'notext', useful for blackholes.us if($SRC{$name}->{option} && $SRC{$name}->{option} eq 'notext') { $text = &_composeText($addr, $SRC{$name}->{tag}, '', $SRC{$name}->{prepend}, $SRC{$name}->{append}); } else { $text = &_composeText($addr, $SRC{$name}->{tag}, $text, $SRC{$name}->{prepend}, $SRC{$name}->{append}); } my $dupes = 0; if($cidr) { &msg(1, 'ALERT', "skip big CIDR: $addr/$cidr ($text)") if($cidr<4); # safety my $octet; foreach $octet (cidr2octets($addr.'/'.$cidr)) { if($RECORDS{$octet}) { $RECORDS{$octet} .= '; '.substr($text,0,64) if($RECORDS{$octet} ne $text); $dupes++; } else { $RECORDS{$octet} = $text; } } } else # 2, 3 or 4 octets { ## could also check if complete /24 already listed if($RECORDS{$addr}) { $RECORDS{$addr} .= '; '.substr($text,0,80) if($RECORDS{$addr} ne $text); $dupes++; } else { $RECORDS{$addr} = $text; } } return($dupes); } ## global %SRC, %RECORDS sub pushHost { my ($name,$host,$text) = @ARG; $host =~ tr/A-Z/a-z/; # for case-insensitive matching my $dupe = 0; my $junk; ## todo: parsing of OK, IGNORE, FREEMAIL, REJECT chaotic & buggy ## experimental extensions - FREEMAIL only used for name-based records ## additional text will be passed through - experimental if($text =~ /^(OK\s|WHITELIST)/) { ($junk,$text) = split(/\s+/,$text,2); $WHITELIST{$host} = $text; if($RECORDS{$host}) { if($text) { $text .= ' ('.$RECORDS{$host}.')[3])'; } else { $text = '('.$RECORDS{$host}.')[4]'; } delete($RECORDS{$host}); $dupe = 1; } if($text) { $RECORDS{$host} = 'OK '.$text; } else { $RECORDS{$host} = 'OK'; } } # elsif($text =~ /^(EXCEPT|IGNORE)/) { ($junk,$EXCEPTION{$host}) = split(/\s+/,$text,2); } elsif($text =~ /^(FREEMAIL|MXCHECK)/) { ($junk,$text) = split(/\s+/,$text,2); if($RECORDS{$host}) { delete($RECORDS{$host}); $dupe = 1; } if($text) { $RECORDS{$host} = 'FREEMAIL '.$text; } else { $RECORDS{$host} = 'FREEMAIL'; $text = ''; } $FREEMAIL{$host} = $text; } else # REJECT { ## todo: obbey ($SRC{$name}->{option} eq 'notext') $text = &_composeText($host, $SRC{$name}->{tag}, $text, $SRC{$name}->{prepend}, $SRC{$name}->{append}); if($RECORDS{$host}) { if($RECORDS{$host} ne $text && $text ne 'REJECT') { $RECORDS{$host} .= '; '.substr($text,0,80); } $dupe = 1; } elsif($text ne 'REJECT') { $RECORDS{$host} = $text; } elsif($SRC{$name}->{tag}) { $RECORDS{$host} = $SRC{$name}->{tag}; } else { $RECORDS{$host} = $name; } } return($dupe); } sub _composeText { my ($addr,$tag,$text,$prepend,$append) = @ARG; if($text) # sanitize { ## sanitize $text and prepend source-specific stuff $text =~ tr/ / /; # eliminate tabs $text =~ tr/-a-zA-Z0-9_@:\/\?=&%#;, \.\*\+\[\]\(\)|//cd; } if($tag) { if($text) { ## SBL hack: dont append space if tag ends with = if($tag =~ /=$/) { $text = $tag.$text; } else { $text = $tag.' '.$text; } } else { $text = $tag; } } # else should not happen, shure ? ## deprecated, legacy only, will be removed soon if($prepend) { if($text) { $text .= ' '.$prepend; } else { $text = $prepend; } } if($append) { if($text) { $text .= ' '.$append.$addr; } else { $text = $append.$addr; } } # return($text||'REJECT'); if(!$text && $CFG{debug}) { print STDERR "# _composeText empty text: addr=$addr\n"; sleep(1); return('REJECT'); } return($text); } ## sub unReverse { my $reverse = shift or die('missing reverse ip'); my (@array) = split(/\./, $reverse); my $a = pop @array; my $b = pop @array; my $c = pop @array; my $d = pop @array; return "$a.$b.$c.$d" if(defined($d) && $d ne '*'); return "$a.$b.$c" if(defined($c) && $c ne '*'); return "$a.$b" if(defined($b) && $b ne '*'); return("255.255.255.255"); # should not happen } ## called for every line with $format->notation => 'reverse' sub reverseDns { my $octet = shift or return('0.0.0.0'); my ($a,$b,$c,$d) = split(/\./, $octet); return "$d.$c.$b.$a" if defined($d); return "*.$c.$b.$a" if defined($c); return "*.$b.$a" if defined($b); &msg(1, 'WARN', 'reverseDNS('.$octet.'): illegal IPv4 format'); return("; -ERROR-$octet-"); # should not happen } ## experimental parser for rbldns formatted source ## minimal implementation: no CIDR, ignore text ## !1.2.3.4 ## 1.2.3.4-99 ## !1.2.3.10-20 sub parseRbldns { my ($name, $ptr, $type, $option, $rx_include, $rx_exclude) = @ARG; if($type) {}; # suppress warning my $do_expand = 1; # to be implemented my %bl = (); my %ex = (); my($line,$addr,$descr,$base,$first,$last,$a,$b,$c,$d,$i,$cidr); my $dupes = 0; my $message = 'name='.$name.' option='.$option||'NULL'; if($rx_exclude) { $message .= ' exclude=/'.$rx_exclude.'/'; } elsif($rx_include) { $message .= ' include=/'.$rx_include.'/'; } &msg(1, 'RBLDNS', $message); foreach $line (@{$ptr}) { next if($line =~ /^([#:\$]|\s*$)/); # skip junk ($addr,$descr) = split(/[\s:]+/, $line, 2); chomp($descr); next if($rx_exclude && $descr && $descr =~ /$rx_exclude/); if($addr =~ /-/) # range specified { ($addr,$last) = split(/-/, $addr, 2); ($a,$b,$c,$d) = split(/\./, $addr, 4); if(defined($d)) { $base = "$a.$b.$c"; $first = $d; } elsif(defined($c)) { $base = "$a.$b"; $first = $c; } elsif(defined($b)) { $base = "$a"; $first = $b; } } else { $base = $first = $last = 0; } if($addr =~ /^!/) # exception { if($base=substr($base,1)) { for($i=$first; $i<=$last; $i++) { $ex{"$base.$i"} = 1; } } elsif($addr=substr($addr, 1)) { $ex{$addr} = 1; } else { print STDERR "# ERROR(1428): $line\n"; } &msg(1, 'WARN', 'cant handle exceptions with CIDR: '.$addr) if($addr =~ /\//); next; } else # block { if($descr) # optimize { next if($rx_include && $descr !~ /$rx_include/); if($option eq 'notext') { $descr = 1; } elsif($descr =~ /^[0-9.]+:/) # strip rbldnsd style return value { ($i,$descr) = split(/[:\s]+/,$descr,2); ## chomp($descr); # keep this until fully tested $descr = $i||1 unless($descr); # failsave } } else { $descr = 1; } if($base) # range specified { for($i=$first; $i<=$last; $i++) { if(!$bl{"$base.$i"}) { $bl{"$base.$i"} = $descr; } else { $dupes++; } } } elsif(!$bl{$addr}) { $bl{$addr} = $descr; } # plain else { $dupes++; } # duplicate } } undef(@{$ptr}); # free memory my $blcount = keys(%bl); my $excount = keys(%ex); print STDERR "DEBUG: blocks=$blcount exceptions=$excount (dupes=$dupes)\n" if($CFG{debug}); my $expanded; my $exhandled = 0; foreach $addr (keys %ex) # attn: addr my be partial octet { delete($ex{$addr}); # try to stay below 128mb if($bl{$addr}) # remove exact match { delete($bl{$addr}); $exhandled++; } next unless($do_expand); # todo: prepend 'EXCEPTION' $expanded = 0; ($a,$b,$c,$d) = split(/\./, $addr); # if(defined($b) && $bl{"$a"}) { $expanded = &expandExcept(\%bl, "$a", $b); } if(defined($c) && $bl{"$a.$b"}) { $expanded = &expandExcept(\%bl, "$a.$b", $c); } if(defined($d) && $bl{"$a.$b.$c"}) { $expanded = &expandExcept(\%bl, "$a.$b.$c", $d); } $exhandled += $expanded; } undef(%ex); # free memory print STDERR "DEBUG: exceptions=$excount handled=$exhandled\n" if($CFG{debug}); $blcount = 0; if($option eq 'notext') # optimize loop { foreach $addr (keys %bl) { if($addr =~ /\//) { ($addr,$cidr) = split(/\//,$addr); } else { $cidr = 0; } &pushAddr($name,$addr,$cidr,''); # todo: implement CIDR # $blcount++; } } else ## full blown with text { foreach $addr (keys %bl) { if($addr =~ /\//) { ($addr,$cidr) = split(/\//,$addr); } else { $cidr = 0; } &pushAddr($name,$addr,$cidr,$bl{$addr}); # $blcount++; } } # print STDERR "DEBUG: expanded count=$blcount\n" if($CFG{debug}); return($blcount); } ## fairly working with Easynet DYNABLOCK_AXFR, FIVETEN and NOMORE ## WARNING: cant handle void cnames - do not use for blocking ## type="axfr/cname" ignores any TXT and saves memory sub parseAxfr { my ($name, $ptr, $rawtype, $option, $rx_include, $rx_exclude) = @ARG; my ($junk,$type) = split(/\//, $rawtype); $type = 'txt' unless($type); # default my $dupes = 0; ## bind-style exceptions as used by DYNABLOCK and FIVETEN my %ex = (); &msg(1, 'AXFR','type='.$type.' include=~/'.$rx_include||'.*'.'/ exclude!~/'.$rx_exclude||'.*'.'/'); my %c2a = (); # {cname} => addr my %c2t = (); # {cname} => txt my %a2c = (); # {addr} => cname my ($dbfile,$dbhandle); if($option eq 'axfrexpand') # trying to stay below 128mb { &msg(1, 'AXFR','expand exception parent ('.$name.')'); ## $dbfile = $CFG{cachedir}.'/axfr_expand.db'; # $dbfile = 'NULL'; # still too much # $dbhandle = &dbOpen(\%c2a, $dbfile); } my ($line,$i,$addr,$reverse,$cname,$text); my $host = '.'; foreach $line (@{$ptr}) { next if($line =~ /^([#;]|\s*$)/); # skip junk if($line =~ /^([-\*a-z0-9.]*)\s+([0-9HDW]+\s+)?(IN\s+)?A\s+([0-9.]+)/i) { $host = $1 if($1); $addr = $4; next if($type eq 'cname'); # shortcut next if($rx_include && $host !~ /$rx_include/); # need me ? # next if($rx_exclude && $host =~ /$rx_exclude/); # need me ? # print STDERR "A c2a\{$host\}: $addr\n" if($CFG{debug}); $c2a{$host} = $addr; } elsif($line =~ /^([-\*a-z0-9.]*)?\s+([0-9HDW]+\s+)?(IN\s+)?TXT\s+\"([^\"\$]+)\"/i) { $host = $1 if($1); $text = $4; next if($type ne 'txt'); # shortcut next if($rx_include && $host !~ /$rx_include/); # next if($rx_exclude && $host =~ /$rx_exclude/); # need me ? # print STDERR "T c2t\{$host\}: $text\n" if($CFG{debug}); if($c2t{$host}) { $c2t{$host} .= ', '.$text; $dupes++; } # append else { $c2t{$host} = $text; } } elsif($line =~ /^([-\*a-z0-9.]+)\s+([0-9HDW]+\s+)?(IN\s+)?(CNAME)\s+([-a-z0-9.]+)/i) { $host = $5; $reverse = $1; next if($rx_include && $host !~ /$rx_include/); # next if($rx_exclude && $host =~ /$rx_exclude/); # need me ? $addr = &unReverse($reverse); if($host =~ /$rx_exclude/) { # print STDERR "Except1 $addr: $line" if($CFG{debug}); $ex{$addr} = $host; next; } # print STDERR "C a2c\{$addr\}: $host\n" if($CFG{debug}); $a2c{$addr} = $host; } elsif($line =~ /^\$GENERATE\s+1-1\s+([\*0-9.]+)\s+(IN\s+)?(CNAME)\s+([-a-z0-9.]+)/i) # format used by FIVETEN too often { $host = $4; $reverse = $1; next if($rx_include && $host !~ /$rx_include/); # next if($rx_exclude && $host =~ /$rx_exclude/); # need me ? $addr = &unReverse($reverse); if($host =~ /$rx_exclude/) # mostly hit by FIVETEN { # print STDERR "Except2 $addr: $line" if($CFG{debug}); $ex{$addr} = $host; next; } # print STDERR "C a2c\{$addr\}: $host\n" if($CFG{debug}); $a2c{$addr} = $host; } elsif($line =~ /^\$GENERATE\s+([0-9]+)-([0-9]+)\s+([\*0-9.]*)\$([\*0-9.]+)\s+(IN\s+)?(CNAME)\s+([-a-z0-9.]+)/i) { $host = $7; my $from = $1; my $to = $2; my $pre = $3; my $post = $4; next if($rx_include && $host !~ /$rx_include/); # next if($rx_exclude && $host =~ /$rx_exclude/); # need me ? # print STDERR "G $host: $from-$to $pre\$$post\n" if($CFG{debug}); if($host =~ /$rx_exclude/) # mostly hit by WIREHUB { # print STDERR "Except3 f=$from t=$to: $line" if($CFG{debug}); for($i=$from; $i<=$to; $i++) { $addr = &unReverse($pre.$i.$post); $ex{$addr} = $host; } } else { for($i=$from; $i<=$to; $i++) { $addr = &unReverse($pre.$i.$post); $a2c{$addr} = $host; } } } elsif($line =~ /^([-0-9a-z.]+)?\s+([0-9HDW]+\s+)?(IN\s+)?(NS|RP)\s+[a-z]/i) { next; } # skip junk elsif($line =~ /^\$(TTL|ORIGIN)\s/) { next; } # skip junk else # still experimental { &msg(1, '? ', $line); sleep(1) if($CFG{debug}); } } undef(@{$ptr}); # free up some memory &msg(3, 'AXFR', 'a2c=',int(keys %a2c),' c2t=',int(keys %c2t),' c2a=',int(keys %c2a) ); ## handle exceptions as used by DYNABLOCK and FIVETEN my $excount = keys(%ex); my $exhandled = 0; my ($a,$b,$c,$d,$expanded,$deleted); foreach $addr (keys %ex) # attn: addr my be partial octet { ($a,$b,$c,$d) = split(/\./, $addr); $cname = $ex{$addr}; delete($ex{$addr}); # trying to stay below 128mb # print STDERR "Except4 addr=$addr cname=$cname\n" if($CFG{debug}); $deleted = 0; if($a2c{$addr}) # remove exact matches { delete($a2c{$addr}); $deleted = 1; # suppress warning below $exhandled++; } ## still experimental, please report irregularites if($option eq 'axfrexpand') # expand parent blocks, bloats the list { $expanded = 0; if(defined($b) && $a2c{"$a"}) { $expanded = &expandExcept(\%a2c, "$a", $b); } if(defined($c) && $a2c{"$a.$b"}) { $expanded = &expandExcept(\%a2c, "$a.$b", $c); } if(defined($d) && $a2c{"$a.$b.$c"}) { $expanded = &expandExcept(\%a2c, "$a.$b.$c", $d); } if($expanded) # nothing done { # $c2t{$cname} .= ' {EXCEPTION}'; # for debugging delete($c2t{$cname}); # does that help any ? $exhandled += $expanded; } elsif(!$deleted) { &msg(3, 'AXFR', "NO PARENT for $addr"); } next; } else # prepend EXCEPTION and leave parent alone { if(!$a2c{"$a"}) { next if(!defined($b)); if(!$a2c{"$a.$b"}) { next if(!defined($c) || !$a2c{"$a.$b.$c"}); } } if($type eq 'cname') # shortcut, DYNABLOCK { ## $a2c{$addr} = $cname.' EXCEPTION '.$class; $a2c{$addr} = 'EXCEPTION '.$cname; } else # half blown, NOMORE, FIVETEN { $a2c{$addr} = $cname; $c2t{$cname} = 'EXCEPTION'; } $exhandled++; ## print STDERR "Except6 addr=$addr cname=$cname c2t=$c2t{$cname}\n" if($CFG{debug}); } } &msg(3, 'AXFR', "exceptions=$excount, handled=$exhandled"); undef(%ex); # free up some memory my $count = 0; if($type eq 'a') # simple bind zone, INFLOW_NOFLOW { foreach $addr (sort keys %c2a) { &pushAddr($name,&unReverse($addr),0,''); $count++; } return($count); } elsif($type eq 'cname') # shortcut, skip txt for DYNABLOCK { # foreach $addr (sort keys %a2c) foreach $addr (keys %a2c) # trying to stay below 128mb { &pushAddr($name,$addr,0,$a2c{$addr}); $count++; } return($count); } elsif($type ne 'txt') { die("FATAL parseAxfr: unknown type: $type"); } my ($match,@c); foreach $addr (sort keys %a2c) # full blown with txt { if( !($cname = $a2c{$addr}) ) # still experimental { &msg(0, "! UNKNOWN cname for '$addr'"); sleep(1) if($CFG{debug}); # time for ^C next; } if($c2t{$cname}) # ugly but works for now { if($c2t{$cname} =~ /^EXCEPTION(\s|$)/) { &pushAddr($name,$addr,0,$c2t{$cname}.' '.$cname); } else { &pushAddr($name,$addr,0,$cname.': '.$c2t{$cname}); } $count++; next; } ## else find parent wildcard (@c) = split(/\./, $cname); (@c) = reverse(@c); if($c[1] && ($text=$c2t{"*.$c[1].$c[0]"})) { $match = ".$c[1].$c[0]"; } elsif($c[1] && ($text=$c2a{"*.$c[1].$c[0]"})) { $match = ".$c[1].$c[0]"; } elsif($text=$c2t{"*.$c[0]"}) { $match = ".$c[0]"; } elsif($text=$c2a{"*.$c[0]"}) { $match = ".$c[0]"; } else # still experimental { &msg(2, "! $cname: c2a=",$c2a{$cname}||'','0=',$c[0],' c2t=',$c2t{$cname}||''); sleep(1) if($CFG{debug}); # time for ^C next; # shortcut } &pushAddr($name,$addr,0,$cname); $count++; } # if($option eq 'axfrexpand') # trying to stay below 128mb # { # &dbClose($dbhandle, \%c2a, $dbfile); # &unlinkFile($dbfile) unless($dbfile eq 'NULL'); # } return($count); } # experimental sub expandExcept { my ($bl, $prefix, $except) = @ARG; my $parent = $bl->{$prefix}; delete($bl->{$prefix}); delete($bl->{"$prefix.$except"}); # try to stay below 128mb # &msg(3, 'AXFR', "expand $prefix.* except $prefix.$except ($parent)\n"); my $i; for($i=0; $i<256; $i++) { if($i != $except) { $bl->{"$prefix.$i"} = $parent; } else { delete($bl->{"$prefix.$i"}); } } return(255); # guess } ## global $CFG{yymmdd}, $CFG{loglevel} sub importMagic { my ($magic, $file, $fh) = @ARG; return(0) unless(-f $file); print $fh $magic,'import: ',$CFG{yymmdd},' - file ',$file,' - editing allowed',"\n"; my $line; my $count = 0; my $skip = 0; my $magic_start = $magic.'start'; my $magic_end = $magic.'end'; ## todo: use &readFile($file, \@array); my $input = new IO::File; $input->open($file, O_RDONLY) or die("open $file: $OS_ERROR"); while (<$input>) { last if (/^$magic_start/); next if (/^$magic/); print $fh $ARG; $count++; } while (<$input>) { last if (/^$magic_end/); $skip++; } while (<$input>) { print $fh $ARG; $count++; } $input->close; &msg(1, 'IMPORT', "$file (imported=$count,skipped=$skip)"); return($count); } ################################################# ## stage 0: parse configuration and arguments # experimental, still wont run with -T sub untaint { my $string = shift || ''; my $type = shift || 'all'; my %rx = ( file => '^([-\w.]+)$', path => '^([-\/\w.:\\\\]+)$', url => '^([-\/\w.:\@?=&+]+)$', text => '^([-\/\w.:\@?=&#+\s,\(\)\{\}"\'\\\\]+)$', all => '^([.+]+)$', ); # &msg(2, 'Untaint', "$type ($string =~ /$rx{$type}/)"); return($1) if($string =~ /$rx{$type}/); die("FATAL: cant untaint $type ($string =~ /$rx{$type}/)"); } sub parseConfig { my $c = shift; # \%CFG my $d = shift; # \%DEFAULT # basic sanity checks die('FATAL: refuse to run without flag -w') unless($WARNING); umask(022) or die("FATAL: cant set umask(022): $!") if($OSNAME eq 'cygwin'); # untaint for Cwd, badly placed delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; $ENV{PATH} = $d->{exec_path}; &_parseArguments($c); $c->{opt_f} = 'octets' unless($c->{opt_f}); # ugly hack &msg(1, "$PROGRAM_NAME (uid=$UID, os=$OSNAME)"); ## used only in &_readConfig(), but this may be multiple times eval { require XML::Simple; import XML::Simple; }; die("FATAL: spfilter depends on module XML::Simple\n$EVAL_ERROR\n") if($EVAL_ERROR); if($c->{xmllocal}) # optional additional local xml-config { &msg(2, 'Config', 'parse local', $c->{xmllocal} ); &_readConfig($c, $c->{xmllocal}, 1); } ## untaint %ENV for invocation of gpgv (perl still croaks) delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; $c->{exec_path} = $d->{exec_path} unless($c->{exec_path}); $ENV{PATH} = $c->{exec_path}; $ENV{LC_ALL} = 'C'; $ENV{TZ} = 'UTC'; # if($OSNAME ne 'MSWin32') { $ENV{SHELL} = '/bin/sh'; $ENV{IFS} = "\n"; } ## $ENV{HOME} = $c->{workdir}; # need me ? &checkGpg($c, $d->{exec_gpgv}, $d->{keyring}, $d->{exec_path}); # pass defaults &_setDefaults($c, $d); ## not really configurable yet, defaults to spfilter-config.xml if($ENV{XMLCONF}) { $c->{xmlconfig} = $ENV{XMLCONF}; } # hack elsif(!$c->{xmlconfig}) { $c->{xmlconfig} = $DEFAULT{xmlconfig}; } if($c->{xmlconfig} eq 'NULL') { $c->{xmlconfig} = '__DATA__'; } elsif($c->{xmlconfig} !~ /\.xml(\.0[0-9]{5,7})?$/) { die("bad extension for xmlconfig '$c->{xmlconfig}"); } elsif($c->{xmlconfig} !~ /\//) # try in ./ and /usr/local/etc/ { my ($cached,$size); if(-f "./$c->{xmlconfig}") { $c->{xmlconfig} = "./$c->{xmlconfig}"; } elsif(-f "/usr/local/etc/$c->{xmlconfig}") { $c->{xmlconfig} = "/usr/local/etc/$c->{xmlconfig}"; } elsif( !$ENV{NOCACHED} && $c->{keyring} && ($cached = &_getCached($c->{xmlconfig}, '')) && ($size = &checkFile($cached,20,80)) ) { &msg(2, 'Config', "using cached '$cached' (${size}k)"); $c->{xmlconfig} = $cached; } else # fallback to built-in xml { &msg(2, 'Config', "external '$c->{xmlconfig}' not found"); $c->{xmlconfig} = '__DATA__'; } } elsif(! -f $c->{xmlconfig}) { die("FATAL: nonexistent xmlconfig '$c->{xmlconfig}'"); } # provide some fallback to __DATA__, ugly die("FATAL: cant find any valid xml '$c->{xmlconfig}'") if( !&_readConfig($c, $c->{xmlconfig}, 0) && ($c->{xmlconfig} eq '__DATA__' || !&_readConfig($c, '__DATA__', 0)) ); ## chop off trailing colon chop($c->{sources}); chop($c->{formats}); ## smart prefix, still far from perfect, use -o for override if($c->{sources} =~ /^[-a-zA-Z0-9_]+$/) { $c->{prefix} = $c->{sources}.'.'; } elsif($c->{opt_s} =~ /^[-a-zA-Z0-9_]+$/) { $c->{prefix} = $c->{opt_s}.'.'; } else { $c->{prefix} = ''; } # fill in later ## big-brother hack my $agent; if($c->{email}) { $agent = substr($c->{email},0,48); } # contact else { $agent = substr($c->{opt_s}.', '.$c->{formats},0,48); } # statistics $c->{useragent} = &untaint($c->{useragent}.' ('.$agent.', '.$OSNAME.')', 'text'); ## print information &msg(1, 'Agent', $c->{useragent}); return(0) if($c->{loglevel}<=1); &msg(3, 'Dirs', "-work=$c->{workdir} -cachedir=$c->{cachedir} outdir=$c->{outdir} -pubdir=$c->{pubdir}"); &msg(3, 'Files', "prefix=$c->{prefix} outfile=$c->{outfile} -xmllocal=$c->{xmllocal}"); # return(0) if($c->{loglevel}<=2); &msg(3, 'Opts', "debug=$c->{debug} loglevel=$c->{loglevel} interval=$c->{interval}"); exit(0) if($c->{loglevel}>=9); # for debugging return(0); } ## todo sub _parseArguments { my $c = shift; my $noargs = $#ARGV+1; @{$c->{argv}} = @ARGV; # for experimental &lockFile() my %opt = (); my @args = ( 'c=s', # cache directory 'd+', # debug mode 'e=s', # email contact 'f=s', # output format 'h', # help 'i=i', # interval 'k=s', # keyring 'l:s', # syslog or logfile 'o=s', # outdir or outfile 'p=s', # publish directory 'q+', # quiet mode 's=s', # input sources 't:s', # in-memory of file-db 'u=s', # suid or trsted user 'v+', # increase verbosits 'w=s', # working directory 'x=s', # additional xml-config 'y', # experimental: dump embedded xml 'z:s', # zone:1.2.3.4 ## '<>', \&invalidArgument # leaves empty @ARGV behind, unusable ); use Getopt::Long; # Getopt::Long::config('auto_abbrev','permute','bundling','pass_through'); Getopt::Long::config('auto_abbrev','bundling','pass_through'); GetOptions(\%opt, @args); ## configurable via commandline only exec('perldoc', $PROGRAM_NAME) if($opt{h}); # show embedded manpage if($opt{y}) { print STDOUT ; exit(0); } # only dump xml $c->{debug} = ($opt{d}&&$opt{d}>0)?$opt{d}:0; $c->{quiet} = ($opt{q}&&$opt{q}>0)?$opt{q}:0; if($noargs) { $c->{loglevel} = ($opt{v}&&$opt{v}>0)?$opt{v}:0; $c->{loglevel} += (1 + $c->{debug} - $c->{quiet}); } else { $c->{loglevel} = 2; } &msg(2, "a) initialize/config (loglevel=$c->{loglevel})"); ## workdir -w, for use within pipes and '-o STDOUT' die("FATAL: cant chdir to '$c->{workdir}': $OS_ERROR") if(($c->{workdir}=&_parseOpt($opt{w})) && !chdir($c->{workdir})); use Cwd; $c->{workdir} = cwd() or die("cant get cwd(): $OS_ERROR"); # todo: display with verbose>1 if($opt{x}) # optional spfilter-local.xml, precheck { $c->{xmllocal} = &_parseOpt($opt{x}); die("FATAL: nonexistent or empty xml '$opt{x}'") unless($c->{xmllocal} && -f $c->{xmllocal} && -s $c->{xmllocal}); } else { $c->{xmllocal} = ''; } $c->{interval} = $opt{i}||0; # need me ? ## configurable via commandline or trusted local xml $c->{email} = &_parseOpt($opt{e}); $c->{cachedir} = &_parseOpt($opt{c}); $c->{outdir} = &_parseOpt($opt{o}); $c->{exec_user} = &_parseOpt($opt{u}); $c->{tiehash} = &_parseOpt($opt{t}); # NULL|/tmp/tiehash.gdbm|/tmp/tiehash.db ## experimental -pubdir=./publish, republish files as *.bz2 ## requires 3 more subdirs below: input, output and diff $c->{pubdir} = &_parseOpt($opt{p}); $c->{opt_f} = &_parseOpt($opt{f}); # default 'octets' $c->{formats} = ''; # $c->{opt_f} parsed into $c->{formats} later if($opt{k} && ($c->{keyring}=&_parseOpt($opt{k})) ne 'NULL') # default 'spfilter-keyring.gpg' { die("ALERT: nonexistend keyring '$c->{keyring}'") unless(-f $c->{keyring}); # enforce } ## items assigned from commandline, spfilter-*.xml or default if($#ARGV>=0) # sources now taken from remainder of commandline { if($opt{s}) { $c->{opt_s} = join(' ', $opt{s}, @ARGV); } else { $c->{opt_s} = join(' ', @ARGV); } } elsif($opt{s}) { $c->{opt_s} = $opt{s}; } else { $c->{opt_s} = 'DEFAULT'; } # ugly hack $c->{sources} = ''; # filled from $c->{opt_s} or 'DEFAULT' ## dnsbl-in-a-box if($opt{z}) { ($c->{zone_name},$c->{zone_addr},$c->{zone_ttl}) = split(/[:,\s]+/, &_parseOpt($opt{z})); } ## non-configurable items, hands off $c->{useragent} = $c->{project}.'/'.$c->{version}; $c->{magic} = $c->{project}.' magic '; # magic_update marker $c->{yymmdd} = strftime("%y%m%d", gmtime($BASETIME)); ## $c->{hhmmss} = strftime("%H%M%S", gmtime($BASETIME)); ## $c->{yymm} = strftime("%y%m", gmtime); # todo: subdir for diff archive } ## ugly hack, todo: there must be a better way... sub _parseOpt { my $opt = shift or return(''); # &msg(2, 'ArgIn', $opt); return($opt) if($opt !~ /=/); # shortcut my($one,$two) = split(/=/, $opt, 2); # &msg(2, 'ArgOut', $two); return($two); } ## verify settings and filenames sub _setDefaults { my $c = shift; # \%CFG my $d = shift; # \%DEFAULT ## configurable via commandline or local config # changed: fallback to . was a bad implementation # todo: try also /usr/local/var/spfilter/cache $c->{cachedir} = $d->{cachedir} unless($c->{cachedir}); # ./cache if(!(-d $c->{cachedir})) { die("FATAL: nonexistent -cachedir=$c->{cachedir} (-workdir=$c->{workdir})"); } ## -o ./outdir may contain directory and/or filename, or 'STDOUT' $c->{outfile} = ''; if(!$c->{outdir}) { $c->{outdir} = (-d $d->{outdir})?$d->{outdir}:'.'; # ./outdir } elsif($c->{outdir} eq 'STDOUT' || $c->{outdir} eq '-') { $c->{outdir} = 'STDOUT'; $c->{outfile} = 'STDOUT'; } elsif(! -d $c->{outdir}) { if( $c->{outdir} =~ /^(.+)\/([^\/]+)$/ ) { $c->{outdir} = $1; $c->{outfile} = $2; } elsif( $c->{outdir} =~ /^[-a-zA-Z0-9_.]+$/ ) { $c->{outfile} = $c->{outdir}; $c->{outdir} = (-d $d->{outdir})?$d->{outdir}:'.'; } else { die('FATAL: cant get outfile from outdir: '.$c->{outdir}); } die('FATAL: nonexistent outdir: '.$c->{outdir}) unless(-d $c->{outdir}); } $c->{interval} = 0 unless(defined($c->{interval})); # todo: first try user 'spfilter' # todo: add $c->{trusted_uid} for filecheck $c->{exec_user} = '' unless($c->{exec_user}); $c->{exec_uid} = &checkUser($c->{exec_user}); # looks insecure ## used for building dnsbl, only used for tinydns and bind if($c->{zone_name} || $c->{opt_f} =~ /(bind|tinydns)/) { $c->{zone_name} = $d->{zone_name} unless($c->{zone_name} && $c->{zone_name} =~ /^[-a-z.]{4,32}$/); # localhost $c->{zone_addr} = $d->{zone_addr} unless($c->{zone_addr} && $c->{zone_addr} =~ /^[0-9.]{7,15}$/); # 127.0.0.1 $c->{zone_ttl} = $d->{zone_ttl} unless($c->{zone_ttl} && int($c->{zone_ttl}) >= 300); &msg(2, 'Zone', $c->{zone_name}, "\[$c->{zone_addr}\]", "(TTL $c->{zone_ttl})"); } ## configurable via local config only $c->{email} = '' unless($c->{email}); # prevent warnings if(!$ENV{NOLWP} && !$c->{exec_http}) # hack to disable libwww-perl { eval { require LWP::UserAgent; import LWP::UserAgent; }; if(!$EVAL_ERROR) { $c->{exec_http} = 'LWP::UserAgent'; # magic &msg(3, 'WARNING', "running '$c->{exec_http}' as root (uid=$UID, euid=$EUID)") unless($UID || $EUID); } else { &msg(2, 'WARNING', 'LWP::UserAgent not available - fallback to wget'); &msg(3, 'HINT', 'set \$config{exec_http} or \$ENV{NOLWP} to disable LWP'); } } ## legacy wget, --header="{AUTH}" removed ## trouble: fetch cant set headers, lynx has no exitcodes, both unusable $c->{exec_http} = $d->{exec_http} unless($c->{exec_http}); # fallback to default if($c->{exec_http} ne 'LWP::UserAgent') # ugly { # locate binary in \$c->{exec_path} $c->{exec_http} = &whichFile($c->{exec_http},$c->{exec_path}); } ## url will be appended $c->{exec_rsync} = $d->{exec_rsync} unless($c->{exec_rsync}); $c->{exec_rsync} = &whichFile($c->{exec_rsync},$c->{exec_path}); ## not implemented # $c->{exec_axfr} = $d->{exec_axfr} unless($c->{exec_axfr}); # $c->{exec_axfr} = &whichFile($c->{exec_axfr},$c->{exec_path}); $c->{pack_ext} = '(bz2|gz)'; $c->{exec_bunzip} = $d->{exec_bunzip} unless($c->{exec_bunzip}); $c->{exec_bunzip} = &whichFile($c->{exec_bunzip},$c->{exec_path}) or die("FATAL: bunzip2[.exe] not in \$CFG{exec_path} '$c->{exec_path}'"); ## warning: gunzip has never been tested $c->{exec_gunzip} = $d->{exec_gunzip} unless($c->{exec_gunzip}); $c->{exec_gunzip} = &whichFile($c->{exec_gunzip},$c->{exec_path}); # bzip2 and diff required only for master sites: (experimental) $c->{exec_bzip} = $d->{exec_bzip} unless($c->{exec_bzip}); $c->{exec_bzip} = &whichFile($c->{exec_bzip},$c->{exec_path}); # two filenames appended, retval=1 for success $c->{exec_diff} = $d->{exec_diff} unless($c->{exec_diff}); $c->{exec_diff} = &whichFile($c->{exec_diff},$c->{exec_path}); ## $c->{exec_patch} # update from diff, not implemented yet ## only publish with gpgv and keyring available if($c->{pubdir}) # this and at least one subdir must exist { if(!$c->{keyring} || $c->{keyring} eq 'NULL' || !$c->{exec_gpgv}) { &msg(0, 'BADARG',"-pubdir=$c->{pubdir}: gpgv and keyring required"); $c->{pubdir} = ''; } elsif(!-d $c->{pubdir} || !(-d $c->{pubdir}.'/input' || -d $c->{pubdir}.'/diff' || -d $c->{pubdir}.'/output') ) { &msg(0, 'WARNING', "argument -pubdir=$c->{pubdir}: missing subdir input, diff and output"); $c->{pubdir} = ''; } } ## initialize counters $c->{count_fetched} = 0; $c->{count_notmod} = 0; $c->{count_cached} = 0; $c->{count_sources} = 0; $c->{count_formats} = 0; $c->{count_errors} = 0; } ## handle crypto stuff - work in progress sub checkGpg { my $c = shift; my $defaultgpgv = shift || $DEFAULT{exec_gpgv}; my $defaultkeyring = shift || $DEFAULT{keyring}; # my $defaultdirs = shift || $DEFAULT{exec_path}; # not implemented if( ($c->{keyring} && $c->{keyring} eq 'NULL') || $ENV{NOVERIFY} || $ENV{NOPGP}) { &msg(2, 'Note', 'gpgv and signatures disabled (GPG DISABLED)'); $c->{exec_gpgv} = ''; # disable gpg $c->{keyring} = 'NULL'; # hide warnings return(0); # shortcut } $c->{exec_gpgv} = $defaultgpgv unless($c->{exec_gpgv}); ## $c->{exec_gpgv} .= ' -v' if($c->{loglevel}>2); # breaks gpgv unless($c->{exec_gpgv}=&whichFile($c->{exec_gpgv},$c->{exec_path})) { &msg(0, 'WARNING', "gpgv not in path '$c->{exec_path}' (GPG DISABLED)"); die("FATAL: keyring '$c->{keyring}' specified but gpgv not found") if($c->{keyring}); $c->{exec_gpgv} = ''; # disable gpg $c->{keyring} = ''; # show warnings return(0); # shortcut } $c->{keyring} = $defaultkeyring unless($c->{keyring}); ## $c->{keyring} = $DEFAULT{keyring} unless($c->{keyring}); if($c->{keyring} !~ /\//) # locate in ./ and /usr/local/etc { if(-f "./$c->{keyring}") { $c->{keyring} = "./$c->{keyring}"; } elsif(-f "/usr/local/etc/$c->{keyring}") { $c->{keyring} = "/usr/local/etc/$c->{keyring}"; } else { &msg(1, 'WARNING', "cant locate keyring '$c->{keyring}' (GPG DISABLED)"); $c->{exec_gpgv} = ''; # disable gpg $c->{keyring} = 'NULL'; # no more warnings return(0); # shortcut } } elsif(!-f $c->{keyring}) { die("FATAL: nonexistent keyring '$c->{keyring}'"); } ## looks good my $size = &checkFile($c->{keyring}, 1, 16) or die("ALERT: bad size or permission on keyring '$c->{keyring}'"); &msg(3, 'Keyring', "$c->{keyring} (${size}k)"); if($c->{exec_gpgv} =~ /{KEYRING}/) { $c->{exec_gpgv} =~ s/{KEYRING}/$c->{keyring}/; } elsif($c->{exec_gpgv} !~ /\s--keyring\s/) { $c->{exec_gpgv} .= ' --keyring '.$c->{keyring}; } else { $c->{exec_gpgv} .= ' "'.$c->{keyring}.'"'; } # need me ? ## wont need that for gpg, not shure about pgp2 and pgp5 # $ENV{GNUPGHOME} = $c->{workdir}.'/PGP'; # trustedkeys.gpg # $ENV{PGPPATH } = $c->{workdir}.'/PGP'; return(1); } ## http://theoryx5.uwinnipeg.ca/CPAN/data/XML-Simple/Simple.html#EXAMPLES ## attention: remote-config potentially unsafe sub _readConfig { my ($cfg, $file, $trusted) = @ARG; my $cfgminsize = 1; my $cfgmaxsize = 100; ## eval { require XML::Simple; import XML::Simple; }; ## die("FATAL: spfilter depends on module XML::Simple\n$EVAL_ERROR\n") if($EVAL_ERROR); my ($xml,$size); if($file eq '__DATA__') { &msg(2, 'CONFIG', $file, "fallback to built-in xml"); my (@data) = ; my $tmp; while($tmp=pop(@data)) { last if($tmp eq "__END__\n"); } my $string = join(' ', @data); undef(@data); # free memory $size = int((length($string)+512)/1024); die("FATAL: bad size of embedded xml '$file'") if($size < $cfgminsize || $size > $cfgmaxsize); # game over $xml = eval { XMLin($string, suppressempty => 1) }; } elsif(-f $file) { if(!($size=&checkFile($file, $cfgminsize, $cfgmaxsize))) { &msg(0, 'ALERT', "bad size or permission on xml '$file'"); return(0); # try again with __DATA__ } # verify signature of untrusted xml with gpg if(!$trusted && &verifyFile($file) < 0) { &msg(0, 'ALERT', "gpg-signature '$file' FAILED - please investigate"); &msg(2, 'HINT', "use argument '-k NULL to skip signature verification"); return(0); } $xml = eval { XMLin($file, suppressempty => 1) }; } else { die("FATAL: xml '$file' not found"); } if($EVAL_ERROR) # bad xml { &msg(0, 'ALERT', "XML Error in '$file'"); &msg(1, 'ERROR', $EVAL_ERROR); return(0); } ## if(defined($opt{l})) { use Data::Dumper; print Dumper($xml); exit(0); } ## todo: ugly implementation $xml->{version} = '0.00' unless($xml->{version}); # failsave $xml->{date} = '000000' unless($xml->{date}); # failsave if(!$trusted && (($xml->{version}+0) < ($cfg->{version}+0) || ($xml->{date}+0) < ($cfg->{date}+0)) ) { &msg(0, 'WARNING', 'outdated xml', $xml->{version}.'_'.$xml->{date}, "($cfg->{version}_$cfg->{date})"); return(0); } &msg(2, 'Config', $file , $xml->{version}.'_'.$xml->{date}, "(${size}k)"); my $name; if($trusted) # include variables only from local config { foreach $name (sort (keys %{$xml->{config}})) { if($name && !$cfg->{name} && $xml->{config}->{$name}) { # if( ref($xml->{config}->{$name}) eq 'SCALAR' ) $cfg->{$name} = $xml->{config}->{$name}; # &msg(3, 'config', $name,' -> ',$cfg->{$name}); } } } elsif($xml->{update}) # do that right here for now { &liveUpdate($xml->{update}, 'spfilter-config.xml'); ## todo: restart spfilter on $status==1 } ## input sources if($cfg->{opt_s} && $cfg->{opt_s} eq 'LINT' ) { my $lintlist; foreach $name (sort (keys %{$xml->{source}})) { $lintlist .= $name.','; } &msg(0, 'LINT', $lintlist); ## elsif($cfg->{opt_s} =~ /^[-A-Za-z0-9]+_$/ && $name =~ /^$cfg->{opt_s}.+/) { $list .= $name.','; } $cfg->{sources} .= &parseSourceArg(\%SRC, $xml->{source}, $lintlist, $trusted); ## debugging only die('LINT sources: '.$cfg->{sources}); } else { &msg(1, 'DEBUG', "empty variable \$cfg->{opt_s}") unless($cfg->{opt_s}); ## preset section, parse into global %PRESET foreach $name (sort (keys %{$xml->{preset}})) { if(!$PRESET{$name} && $name ne 'comment') { if( ref($xml->{preset}->{$name}) eq 'HASH' ) { my $alias = $xml->{preset}->{$name}->{alias} || 'ERROR'; &msg(4, 'PRESET', $name, "($alias)"); $PRESET{$name} = $alias; } else { &msg(2, 'PRESET', $name, 'UNSUPPORTED type='.ref($xml->{preset}->{$name})); } } } $cfg->{sources} .= &parseSourceArg(\%SRC, $xml->{source}, $cfg->{opt_s}, $trusted); } ## output formats $cfg->{formats} .= &parseFormatArg(\%FMT, $xml->{format}, $cfg->{opt_f}, $trusted); return(1); # success } ## $cfg->{sources} .= &parseSourceArg(\%SRC, $xml->{source}, $opt_s); sub parseSourceArg { my ($src, $xml, $list, $trusted) = @ARG; unless($list) { &msg(2, 'SOURCE', "empty argument list"); return(''); } my $sources = ''; my ($interval,$conflict); my @array = split(/[,\|&\+\s]+/, $list); foreach $name (@array) { next unless($name); ($name,$interval) = split(/:/, $name); if($name =~ /^-/) # exclude from set { $name = substr($name, 1); if(defined($src->{$name})) { &msg(2, 'Exclude', $name); delete($src->{$name}); } else { &msg(2, 'EXCLUDE', $name, 'not currently selected'); } } elsif(defined($PRESET{$name}) && $PRESET{$name}) # predefined set { my $recursive = $PRESET{$name}; $PRESET{$name} = '['.$name.'_RECURSIVE]'; # prevent loop &msg(2, $name, "-> $recursive"); $sources .= &parseSourceArg($src, $xml, $recursive, $trusted); } elsif(defined($xml->{$name})) { # todo: ALIAS always implies CONFLICT if(($conflict=$xml->{$name}->{conflict}) && defined($src->{$conflict})) { &msg(0, 'CONFLICT', "$name -> $src->{$conflict}->{name} (IGNORED)"); } else # Ok { &msg(3, 'SOURCE', "$name (interval=$interval)") if($interval); $src->{$name}->{name} = $name; &pushSource($name, $xml->{$name}, $src->{$name}, $trusted, $interval); $sources .= $name.','; } } elsif( !$CFG{xmllocal} && !defined($src->{$name}) ) { &msg(1, 'WARNING', "source '$name' not found in xml-config"); } ## else { &msg(1, 'DEBUG', "source=$name xmlconfig=$CFG{xmlconfig} xmllocal=$CFG{xmllocal} trusted=$trusted"); } #elsif($trusted && $name =~ /^[.\/][a-zA-Z0-9]/) # experimental, not working #{ # if(! -e $name) { &msg(0, 'WARNING', "$name; NON-EXISTENT file"); } # elsif(! -f $name) { &msg(0, 'WARNING', "$name: NON-REGULAR file"); } # else { &msg(0, 'WARNING', "$name: UNSUPPORTED - not implemented"); } #} } # &msg(4, 'SOURCES', $list,' -> ',$sources); return($sources); } ## pushSource($name, $xml->{$name}, $src->{$name}, $trusted); ## global $CFG{interval} sub pushSource { my ($name, $xml, $src, $trusted, $interval) = @ARG; die('FATAL: missing source name') unless($name); my $url; ## todo: smarter handling for 'alias' and 'filename' $src->{alias} = $xml->{alias} || ''; $src->{filename} = $xml->{filename} || $name; if($CFG{pubdir} && -d "$CFG{pubdir}/input" && $xml->{url_primary}) { ## hack for republishing primary/master sites &msg(2, 'Primary', $xml->{url_primary}); push(@{$src->{url}}, $xml->{url_primary}); } if(ref(\$xml->{url}) eq 'SCALAR') # single { unless($url=$xml->{url}) { &msg(0, 'WARNING', "empty url for '$name' (IGNORE)"); return(); } $url =~ s/{YYMMDD}/$CFG{yymmdd}/; $url =~ s/{FILENAME}/$src->{filename}/; # experimental ## experimental: macro {LATEST} expanded in &fetchSource() if(!$trusted && $url ne 'NULL' && $url !~ /^(http|ftp|rsync|axfr):\/\//) { &msg(0, 'ALERT', "untrusted url '$url' REJECTED ($name)"); return(); } push(@{$src->{url}}, $url); } else # multiple { foreach $url (@{$xml->{url}}) { $url =~ s/{YYMMDD}/$CFG{yymmdd}/; $url =~ s/{FILENAME}/$src->{filename}/; # experimental if(!$trusted && $url !~ /^(http|ftp|rsync|axfr):\/\//) { &msg(0, 'ALERT', "untrusted url '$url' REJECTED ($name)"); next; } else { push(@{$src->{url}}, $url); } } } # &msg(4, 'SOURCE', "$name -> @{$src->{url}}"); if($interval) { $src->{interval} = $interval; } elsif($CFG{interval}) { $src->{interval} = $CFG{interval}; } else { $src->{interval} = $xml->{interval} || 3; } if($xml->{type} && $xml->{type} =~ /^cidr/) { my ($junk,$cidr) = split(/\//, $xml->{type}); $src->{type} = 'cidr'; if($cidr) { $src->{cidr} = int($cidr); } else { $src->{cidr} = 16; } } else { $src->{type} = $xml->{type} || 'addr'; $src->{cidr} = 0; # prevent warnings } ## filled in later $src->{cache_fname} = ''; $src->{cache_ifmod} = 0; $src->{cache_status} = 0; $src->{fetch_fname} = ''; $src->{fetch_status} = 0; $src->{minsize} = $xml->{minsize} || 1; $src->{maxsize} = $xml->{maxsize} || 2000; $src->{conflict} = $xml->{conflict} || ''; # not fully implemented $src->{option} = $xml->{option} || ''; # axfrexpand, notext $src->{regexp_include} = $xml->{regexp_include} || $xml->{grep} || ''; # legacy {grep} $src->{regexp_exclude} = $xml->{regexp_exclude} || ''; if($xml->{tag} && $xml->{tag} ne 'NULL') { $src->{tag} = $xml->{tag}; } elsif(defined($xml->{tag})) { $src->{tag} = ''; } else { $src->{tag} = $name; } $src->{prepend} = $xml->{prepend} || ''; $src->{append} = $xml->{append} || ''; } ## $cfg->{formats} .= &parseFormatArg(\%FMT, $xml->{format}, $opt_f); sub parseFormatArg { my ($fmt, $xml, $list, $trusted) = @ARG; die('parseFormatArg: empty format list') unless($list); my $formats = ''; my @array = split(/[,\|&\+]/, $list); foreach $name (@array) { next unless(defined($xml->{$name})); # still need this ? $fmt->{$name}->{name} = $name; &pushFormat($name, $xml->{$name}, $fmt->{$name}, $trusted); $formats .= $name.','; } # &msg(3, 'FORMAT', "APPEND $list -> $formats"); return($formats); } # &pushFormat($name, $xmlfmt->{$name}, $fmt->{$name}); sub pushFormat { my ($name, $xml, $fmt, $trusted) = @ARG; ## $fmt->{name} = $name; ## $fmt->{default} = $xml->{default}; $fmt->{publish} = $xml->{publish} || 0; $fmt->{type} = $xml->{type} || 'txt'; $fmt->{notation} = $xml->{notation} || 'octet'; $fmt->{magic_update} = $xml->{magic_update} || 0; $fmt->{include} = $xml->{include} || ''; $fmt->{linestart} = $xml->{linestart} || ''; $fmt->{separator} = $xml->{separator} || "\t"; $fmt->{lineend} = $xml->{lineend} || ''; ## $fmt->{search} = $xml->{search} || ''; ## $fmt->{replace} = $xml->{replace} || ''; $fmt->{option} = $xml->{option} || ''; $fmt->{maxlength} = $xml->{maxlength} || 0; $fmt->{secondline} = $xml->{secondline} || ''; $fmt->{secondlinestart} = $xml->{secondlinestart} || ''; ## $fmt->{commentchar} = $xml->{commentchar} || '#'; } #################################################### ## Print Output ## output all messages via this function ## todo: logging via syslog, to named file, via email... ## global $CFG{loglevel}, $CFG{debug} sub msg { my ($level, $tag, @text) = @ARG; return() if($level && $level > $CFG{loglevel} ); my $msg; if($text[0]) { $tag = substr($tag||'UNKNOWN', 0, 7); $msg = substr(join(' ', @text), 0, 256); } else # legacy { $msg = substr($tag, 0, 256); $tag = '['.$PROCESS_ID.']'; # make up something } chomp($msg); print STDERR $tag,' ',$msg,"\n"; ## syslog($priority, 'childfilter %s', $cmd[0]); } ## todo: support for syslog and log to file # level: emerg|alert|crit|err|warning|notice|info|debug # facility: auth,authpriv,console,cron,daemon,ftp,kern,lpr,mail,news,security,syslog,user,uucp,local0..local7 sub logOpen { my $logfile = shift || 'unix'; my $facility = shift || 'user'; if($logfile eq 'unix') { # use Sys::Syslog qw(:DEFAULT setlogsock); ## eval { require Sys::Syslog; import Sys::Syslog; }; # setlogsock($logfile); ## setlogmask $mask_priority # openlog($CFG{project}, 'pid', $facility) or die("FATAL: syslog $logfile $facility: $OS_ERROR"); } else { die("FATAL: logging to '$logfile' not implemented"); } } sub logClose { my $logfile = shift || 'unix'; if($logfile eq 'unix') { closelog(); } } #################################################### ## DB_File handlers # return db type guessed on file extension # supports cdb, gdbm and db, defaults to db sub dbType { my $file = shift || 'NULL'; my $type; if($file eq 'NULL') { $type = 'db'; } elsif($file =~ /\.cdb(\.tmp)?$/) { $type = 'cdb'; } elsif($file =~ /\.gdbm(\.tmp)?$/) { $type = 'gdbm'; } elsif($file =~ /\.db(\.tmp)?$/) { $type = 'db'; } else { die("FATAL: unsupported DB-type for $file"); } return($type); } sub dbOpen { my ($hashptr, $file) = @ARG; my ($type,$package,$handle); # &msg(3, 'DEBUG', "dbOpen $file"); if(!$file || $file eq 'NULL') # use in-memory db { $type = 'db'; # fill something in $package = 'DB_File'; eval { require DB_File; import DB_File; }; die("FATAL: $type:\n$EVAL_ERROR\n") if($EVAL_ERROR); $handle = tie %{$hashptr}, $package; } else { $type = &dbType($file); if(-e $file) # todo: atomic creation { &msg(3, 'UNLINK', $file); &unlinkFile($file); } # else { &msg(3, 'DEBUG', "$file did not exist"); } # debugging if($type eq 'cdb') # CDB saves up to ~50% { $package = 'CDB_File'; eval { require CDB_File; import CDB_File; }; die("FATAL: $type:\n$EVAL_ERROR\n") if($EVAL_ERROR); # &msg(3, 'DEBUG', "create CDB $file"); $handle = new CDB_File ($file, $file.'.tmp') or die("$package new $type: $OS_ERROR"); } elsif($type eq 'gdbm') # gnu dbm, default { $package = 'GDBM_File'; eval { require GDBM_File; import GDBM_File; }; die("FATAL: $type:\n$EVAL_ERROR\n") if($EVAL_ERROR); $handle = tie(%{$hashptr}, 'GDBM_File', $file, &GDBM_WRCREAT, 0644) or die("$package tie $type: $OS_ERROR"); } elsif($type eq 'db') # bsd default, last resort { $package = 'DB_File'; eval { require DB_File; import DB_File; }; die("FATAL: $type:\n$EVAL_ERROR\n") if($EVAL_ERROR); $handle = tie(%{$hashptr}, 'DB_File', $file) or die("$package tie $type: $OS_ERROR"); # $handle = tie %{$hashptr}, 'DB_File', $file, O_CREAT|O_RDWR, 0644, $DB_HASH; # $DB_HASH->{'cachesize'} = 102400 ; } else { die("FATAL: unimplemented db-type: $type \($file\)"); } } &msg(1, 'dbOpen', $file, "($type, $package)"); die("FATAL: cant tie t=$type p=$package f=$file: $OS_ERROR") unless($handle); return($handle); } ########################################### ## helper functions related to filesystem ## check if executable exists in exec_path ## $pathfileargs = &whichFile($name,$path) sub whichFile { my @cmd = split(/[\s]+/, shift); my $path = shift || $CFG{exec_path}; my ($base,$name); if($cmd[0] =~ /^([-+a-zA-Z0-9._\/\\]+)\/([-+a-zA-Z0-9._]]+)$/) # extract basename { ($base,$name) = ($1,$2); } else { $name = $cmd[0] or die('missing filename'); } die("FATAL: illegal char in filename '$name'") unless($name =~ /^[-+a-zA-Z0-9._]+$/); my @path; if($OSNAME eq 'MSWin32') # ActiveState { (@path) = split(/[;,\s]+/, $path); # unsift(@path, '.') unless($path[0] eq '.'); # prepend cwd ## todo: use this function also for xml $name .= '.exe' unless($name =~ /\.(exe|xml|gpg)/); # smart ass } else { (@path) = split(/[:;,\s]+/, $path); } die("FATAL: missing exec_path for '$cmd[0]'") unless($path[0]); my $dir; foreach $dir (@path) { if(-f $dir.'/'.$name) { if(-s $dir.'/'.$name) { die('suid bit set: '.$dir.'/'.$name) if(-u $dir.'/'.$name); die("not executable: $dir/$name ($OSNAME)") if($name !~ /\.(xml|gpg)/ && $OSNAME ne 'cygwin' && ! -x $dir.'/'.$name); if($OSNAME eq 'MSWin32') { $cmd[0] = $dir.'\\'.$name; } else { $cmd[0] = $dir.'/'.$name; } ## todo: return array instead of string, for exec return(untaint(join(' ', @cmd), 'text')); } &msg(1, 'WARNING', "empty file '$name' (IGNORED)"); return(''); } } &msg(3, 'INFO', 'NONEXISTENT', $name, '('.join(',',@path).')'); return(''); } ## experimental, verify file against enclosed gpg-signature ## exploit: attacker could add lines outside of signed content sub verifyFile { my $file = shift || die('missing filename'); if(!$CFG{exec_gpgv} || !$CFG{keyring} || $CFG{keyring} eq 'NULL') { &msg(2, 'Note', "skip verification of '$file'") if($CFG{keyring} eq 'NULL'); return(0); # graceful } &msg(2, 'Verify', $file, "($CFG{keyring})"); my (@array); &readFile($file, \@array) or die("FATAL: cant read file '$file': $!"); # todo: quarantine/rename file on security violations if($array[2] ne "-----BEGIN PGP SIGNED MESSAGE-----\n") { &msg(0, 'ALERT', "$file line #3: missing start of pgp signature"); return(-1); } if(pop(@array) ne "-----END PGP SIGNATURE-----\n" && pop(@array) ne "-----END PGP SIGNATURE-----\n") { &msg(0, 'ALERT', "$file at #EOF: missing end of pgp signature"); return(-1); } # check unsigned part above and below signature if($file =~ /spfilter[-a-z0-9_]*\.xml(\.[0-9]+)?$/) { if( $array[0] !~ /^\<\?xml [-a-z0-9_.='" ]+\?\>\n$/ || $array[1] !~ /^\<[-a-zA-Z0-9_.="' ]+\>\n$/) { &msg(0, 'ALERT', "$file invalid header for *.xml"); &msg(2, 'ALERT', $array[0], $array[1]); return(-1); } } elsif($file =~ /spfilter[-a-z0-9_]*\.pl(\.[0-9]+)?$/) { if( $array[0] !~ /^#!\/usr\/[-a-zA-Z0-9_\/.]+perl[5]?\s+-[wT]+\n$/ || $array[1] != /^my \$[a-zA-Z_]+=<<'_[-a-zA-Z0-9_.]+_';\n$/) { &msg(0, 'ALERT', "$file invalid header for *.pl"); &msg(2, 'ALERT', $array[0], $array[1]); return(-1); } } else { die("ALERT: file '$file' unsupported/unknown type"); } undef(@array); # free up my $cmdline = $CFG{exec_gpgv} or die('\$$CFG{exec_gpgv} undefined'); if($cmdline =~ /{FILE}/) { $cmdline =~ s/{FILE}/$file/; } else { $cmdline .= ' "'.$file.'"'; } my @cmdout; my $retval = &forkExec($cmdline.' 2>&1', \@cmdout, 1); if($retval) { if($retval==1) { &msg(0, 'ALERT', "INVALID SIGNATURE - please investigate"); } elsif($retval==2) { &msg(0, 'ALERT', "INTERNAL GPG ERROR - please investigate"); } else { &msg(0, 'ALERT', "UNKNOWN GPG ERROR - please investigate"); } &msg(1, 'GPGV', substr("@cmdout"||'UNKNOWN',0,256), "(retval=$retval)") if($cmdout[0]); undef(@cmdout); return(-1); # bad } &msg(3, 'Passed', $file, "($CFG{keyring})"); return(1); # good } ## experimental lock against multiple instances ## todo: record fatal errors on die() sub lockFile { my ($c, @text) = @ARG; my $file = $c->{cachedir}.'/'.$c->{project}.'.lock.'.$c->{yymmdd}; if(!$text[0]) # unlock { return(unlink($file)) if(-f $file); return(1); } return(1) if($CFG{debug}); die("FATAL: lockfile already present: $file \(@text\)") if(-e $file); # &msg(3, 'Lock', $file); return(&writeFile($file, \@text, '', '', 0)); } ## @files = &readDir($dir, $glob); sub readDir { my ($dir, $glob) = @ARG; die("FATAL: nonexistent directory '$dir'") unless(-d $dir); opendir(DIR, $dir) || die("FATAL: cant opendir $dir: $OS_ERROR"); my @files = sort grep { /$glob/ && -f "$dir/$ARG" } readdir(DIR); closedir(DIR); return(@files); } ## $lines = &readFile($filename, \@array); sub readFile { my ($file,$arrayptr) = @ARG; open(FILE,"<$file") or die("FATAL: open $file: $OS_ERROR"); # binmode(FILE) if($OSNAME eq 'MSWin32'); # need me ? (@{$arrayptr}) = (); close(FILE); my $lines = $#{$arrayptr}+1; # &msg(3, 'READ', $file, "($lines lines)"); return($lines); } ## $lines = &writeFile($filename, \@cmdout, "# $cmdline\n", "# EOF\n", 0); sub writeFile { my ($filename, $arrayptr, $header, $footer, $append) = @ARG; my $lines = $#{$arrayptr}+1; my $output = new IO::File; if($append) { &msg(2, 'APPEND', $filename ,"($lines lines)"); $output->open($filename, O_WRONLY|O_APPEND|O_EXCL) or die("FATAL: append $filename: $OS_ERROR"); } else { if(!$append && -e $filename) { &msg(3, 'TRASH', $filename); &unlinkFile($filename); } &msg(3, 'WRITE', $filename, "($lines lines)"); $output->open($filename, O_WRONLY|O_CREAT|O_EXCL) or die("FATAL: create $filename: $OS_ERROR"); } if($header||$footer||$append) { print $output $header||'', @{$arrayptr}, $footer||''; } else # binary safe { binmode($output) if($OSNAME eq 'MSWin32'); print $output @{$arrayptr}; } undef($output); return($lines); } sub copyFile { my ($source, $dest, $append) = @ARG; &msg(2, 'Copy', "$source -> $dest"); die("FATAL: cant copy nonexistent source: $source") unless(-e $source); die("FATAL: cant copy nonregular source: $source") unless(-f $source); # require File::Copy; # File::Copy::copy($source, $dest) or die("FATAL: cant copy $source $dest: $OS_ERROR"); # return(1); # good my (@array); return(0) unless(&readFile($source, \@array)); return(&writeFile($dest, \@array, '', '', $append)); } sub unlinkFile { my $file = shift; return(0) unless(-e $file); # shortcut &msg(3, 'UNLINK', $file); die("FATAL: nonregular file: $file") unless(-f $file); unlink($file) or die("FATAL: cant unlink $file: $OS_ERROR"); return(1); } ## used on argument -pubdir=./publish sub copyPack { my $infile = shift or die('copyPack: no \$infile'); my $outfile = shift or die('copyPack: no \$outfile'); die('FATAL: nonexistent or empty '.$infile) unless(-f $infile && -s $infile); unless($CFG{keyring} ne 'NULL' && $CFG{exec_gpgv}) { &msg(0, 'ALERT', 'refuse to compress without gpgv and keyring'); return(''); } ## save some cpu if(-s $outfile && ! &fileNewer($infile, $outfile)) { &msg(2, 'Pack', "skip newer '$outfile'"); return(''); } &msg(1, 'Bzip2', $outfile); my $cmdline = $CFG{exec_bzip} or die('FATAL: \$CFG{exec_bzip} not defined'); if($cmdline =~ /{FILE}/) { $cmdline =~ s/{FILE}/$infile/; } else { $cmdline .= ' '.$infile; } # legacy $cmdline .= ' >'.$outfile; my @cmdout = (); my $retval = &forkExec($cmdline, \@cmdout, 0); if($retval) # bad { &msg(0, 'BZIP2', substr("@cmdout"||'ERROR',0,256),"(retval=$retval)"); return(''); } die('FATAL: nonexistent or empty '.$outfile) unless(-f $outfile && -s $outfile); &msg(3, 'Bzip2', "success '$outfile'"); return($outfile); } ## decompress file on disk sub unpackFile { my $infile = shift; &msg(2, 'Bunzip', $infile); die('FATAL: nonexistent '.$infile) unless(-e $infile); die('FATAL: empty '.$infile) unless(-s $infile); my ($outfile,$ext); if($infile =~ /^(.+)\.$CFG{pack_ext}$/) { $outfile = $1; $ext = $2; } die('invalid packer extension: '.$infile) unless($outfile && $ext); my $cmdline; if($ext eq 'bz2') { $cmdline = $CFG{exec_bunzip} or die('FATAL: \$CFG{exec_bunzip} not defined'); } elsif($ext eq 'gz') { $cmdline = $CFG{exec_gunzip} or die('FATAL: \$CFG{exec_gunzip} not defined'); } else { die('unknown packer extension: '.$ext); } if($cmdline =~ /{FILE}/) { $cmdline =~ s/{FILE}/$infile/; } else { $cmdline .= ' '.$infile; } # legacy my @cmdout = (); my $retval = &forkExec($cmdline.' 2>&1', \@cmdout, 0); if($retval) # bad { &msg(0, 'BUNZIP', substr("@cmdout"||'ERROR',0,256), "(retval=$retval)"); return(''); } unless(-f $outfile && -s $outfile) { &msg(0, 'WARNING', "nonexistent or empty: '$outfile'"); return(''); } &msg(4, 'Bunzip', "success '$outfile'"); return($outfile); } ## same as `cat *` via shell sub catDir { my ($dir, $dest) = @ARG; die("FATAL: cant read nonexistent dir: $dir") unless(-e $dir); die("FATAL: not a directory: $dir") unless(-d $dir); ## todo: exclude /^$CFG{prefix}/ to prevent accidential loops my (@files) = &readDir($dir,'^[^.]'); return(0) if($#files<0); &msg(3, 'catDir', join(',', @files), "-> $dest"); if(-e $dest) { ## todo: check if any file in dir is newer than destination &unlinkFile($dest); } my @array; my $file; my $lines = 0; foreach $file (@files) { $lines += &writeFile($dest, \@array, "# import $CFG{useragent} $dir/$file\n", '', $lines) if(&readFile($dir.'/'.$file, \@array)); } # return($lines); return(1) if($lines); return(0); } ## check filesize (and also ownership and permission) ## all files handled by spfilter must pass this check ## global: $CFG{exec_user} sub checkFile { my $file = shift or die('missing \$filename'); my $minsize = shift || 1; # $DEFAULT{minsize} my $maxsize = shift || 2000; # $DEFAULT{maxsize} my $user_uid = $UID || 0; my $filesize; unless(-e $file) { &msg(0, 'CHECK', "nonexistent file '$file'"); return(0); } unless(-s $file) { &msg(0, 'CHECK', "empty file '$file'"); return(0); } die("FATAL: nonregular file '$file'") unless(-f $file); # &msg(4, 'CHECK', "checkFile($file, min=$minsize, max=$maxsize)"); my ($dev,$ino,$mode,$nlink,$file_uid,$file_gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks) = stat($file); unless($filesize=int(($size+1023)/1024)) { die("FATAL: cant stat file $file (size=$size)\n"); } die("FATAL: file '$file' writable by group or world") if($OSNAME ne 'MSWin32' && $mode & 022); die("FATAL: file '$file' not owned by root or current user: user_uid=$user_uid, file_uid=$file_uid,' exec_uid=",($CFG{exec_uid}||'?'),' exec_user=',($CFG{exec_user}||'?')) if($file_uid && $file_uid != $user_uid && (!$CFG{exec_uid} || $file_uid != $CFG{exec_uid})); if($filesize<$minsize || $filesize>$maxsize) { &msg(0, 'BADSIZE', "$file (size=${filesize}k min=${minsize}k max=${maxsize}k)"); rename($file,"$file.BADSIZE") or die("FATAL: rename($file,$file.BADSIZE): $OS_ERROR"); return(0); } if($filesize < int($minsize*1.2)) { &msg(2, 'SizeLow', $file, "size ${filesize}k less than 20% of minsize ${minsize}k"); } elsif($filesize > int($maxsize*0.8)) { &msg(2, 'SizeBig', $file, "size ${filesize}k more than 80% of maxsize ${maxsize}k"); } &msg(3, 'Check', "$file (uid=$file_uid, ${filesize}k)"); return($filesize); } sub fileDate { my $file = shift; my $format = shift || '%a, %d %b %Y %H:%M:%S GMT'; my $mtime = (stat($file))[9] or return(''); my $modified = strftime($format, gmtime($mtime)); &msg(4, 'MTIME', "$modified ($file)"); ## if($modified =~ /$DEFAULT{taint_text}/) { return($1); } ## die("FATAL: cant untaint ifmod '$modified' /$DEFAULT{taint_text}/"); return(&untaint($modified, 'text')); } ## same as `test $new -nt $old` via shell sub fileNewer { my $newfile = shift or die('fileNewer: missing argument'); my $oldfile = shift or return(1); my $newtime = (stat($newfile))[9] or die("FATAL: cant stat '$newfile'"); my $oldtime = (stat($oldfile))[9] or return(1); ## &msg(3, 'NEWER', "$newfile $newtime $oldfile $oldtime"); return(1) if($newtime>$oldtime); return(0); } ## return uid of user given with argument '-u username' or $CFG{user} sub checkUser { my $name = shift; unless($name) { ## todo: try user 'spfilter' and 'nobody' if run as root &msg(2, 'WARNING', "running '$EXECUTABLE_NAME $PROGRAM_NAME' as root (uid=$UID)") unless($UID||$EUID); return(0); } my $uid_to = getpwnam($name) or die("FATAL: failed getpwnam($name): $OS_ERROR"); my $user_from = getpwuid($UID) or die("FATAL: failed getpwuid($UID): $OS_ERROR"); if($UID && $EUID) # not running root { &msg(2, 'User', "trusting files from user '$name' (uid=$UID)") if($name && $name ne $user_from); # ugly } else { &msg(3, 'User', "current=$user_from($UID), exec_user=$name($uid_to)"); } return($uid_to); } ######################################################### ## some stuff included for portability from alien code ## ######################################################### ## experimental: base64 for http basic authorization ## code integrated for portability ####### 64 encoding from url_get.pl ####### # @(#)url_get.pl 1.18 22 Jan 1996 # @(#)url_get.pl 1.18 /home/magenta/cc/dc/zippy/src/perl/url_get/SCCS/s.url_get.pl # # url_get.pl --- get a document given a WWW URL # # Modified by Jack Lund 7/19/94 to add functionality and deal with HTTP # 1.0 headers # # Hacked by Stephane Bortzmeyer to add "ftp" URLs. # 22 Jan 1994 # # Jack Lund 9/3/93 zippy@ccwf.cc.utexas.edu # # from hget by: # Oscar Nierstrasz 26/8/93 oscar@cui.unige.ch sub to64 { ## changed 'local' to 'my' my ($instring) = @ARG; my ($out) = ""; my ($chunk, $i, $index, $len, $bitstring); my ($basis_64) = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; $len = length($instring); $i = 0; while ($i < $len) { $chunk = pack("a3", substr($instring, $i)); $i += 3; $bitstring = unpack("B*", $chunk); $index = ord(pack("B8", "00".substr($bitstring, 0, 6))); $out .= substr($basis_64, $index, 1); $index = ord(pack("B8", "00".substr($bitstring, 6, 6))); $out .= substr($basis_64, $index, 1); if ($i == $len + 2) { $out .= "="; } else { $index = ord(pack("B8", "00".substr($bitstring, 12, 6))); $out .= substr($basis_64, $index, 1); } if ($i >= $len + 1) { $out .= "="; } else { $index = ord(pack("B8", "00".substr($bitstring, 18, 6))); $out .= substr($basis_64, $index, 1); } } return $out; } ########### ## cidr2octets() integrated for portability ## IPv6 related code removed, and made more robust against bad input ## from Net::CIDR, $Revision: 1.8 $, Copyright 2001 Sam Varshavchik. # cidr2octets() takes @cidr_list and returns a list of leading octets # representing those netblocks. Example: # @octet_list=Net::CIDR::cidr2octets("10.0.0.0/14", "192.68.0.0/24"); # The result is the following five-element array: # ("10.0", "10.1", "10.2", "10.3", "192.68.0"). sub cidr2octets { my (@cidr) = @ARG; my @r; while ($#cidr >= 0) { my $cidr=shift @cidr; $cidr =~ s/\s//g; die unless ($cidr =~ /(.*)\/(.*)/); my ($ip, $pfix)=($1, $2); my @ips= split (/\.+/, $ip); grep { die unless $ARG >= 0 && $ARG <= 255 && $ARG =~ /^[0-9]+$/; } @ips; ## die("bad prefix: $pfix") unless $pfix >= 0 && $pfix <= ($#ips+1) * 8 && $pfix =~ /^[0-9]+$/; ## friendly variant ignoring bogus cidr, could try to fix up instead unless($pfix >= 0 && $pfix <= ($#ips+1) * 8 && $pfix =~/^[0-9]+$/) { print STDERR "cidr2octets($ip/$pfix): incomplete notation\n"; next; } my $i; for ($i=0; $i <= $#ips; $i++) { last if $pfix - $i * 8 < 8; } my @msb=splice @ips, 0, $i; my $bitsleft= $pfix - $i * 8; if ($#ips < 0 || $bitsleft == 0) { push @r, join(".", @msb); next; } my @rr=_cidr2range8(($ips[0], $bitsleft)); while ($#rr >= 0) { my $a=shift @rr; my $b=shift @rr; grep { push @r, join(".", (@msb, $ARG)); } ($a .. $b); } } return @r; } sub _cidr2range8 { my (@c) = @ARG; my @r; while ($#c >= 0) { my $a=shift @c; my $b=shift @c; die unless $a >= 0 && $a <= 255 && $a =~ /^[0-9]+$/; die unless $b >= 0 && $b <= 8 && $b =~ /^[0-9]+$/; my $n= 1 << (8-$b); $a &= ($n-1) ^ 255; push @r, $a; push @r, $a + ($n-1); } return @r; } ### end of code - POD, XML and GPG signature appended on publish ### =head1 NAME SP Filter 0.59 (finally considered beta...) =head1 DESCRIPTION 1) fetch multiple public available ip-based access-lists into the local cache-directory, create diffs and clean out old copies. supports either LWP, wget or rsync, *.bz2 transparently handled. 2) read entries from local cache into memory (hash), convert cidr- netmask into octets and dedupe/consolidate in one single pass. sort entries and write file in the format of your preferred mta, optionally preserve/reimport existing lines (magic_update). =head1 OPTIONS ./spfilter.pl -verbose -format=sendmail,postfix,... SOURCE ... ./spfilter.pl [ -verbose ] [ -debug ] [-format=format,... ] [ -cachedir=./cache ] [ -outdir=(./outdir|outfile|STDOUT) ] [ -workdir=workdir] [ -pubdir=./publish ] [ -user=spfilter ] [ -xmlconf=./spfilter-local.xml ] [ -keyring=(NULL|spfilter-keyring.gpg) ] [ -tiehash=(NULL|/tmp/tiehash.gdbm|/tmp/tiehash.db) ] [ -email=responsible#contact.dom ] [ -zone=localhost,127.0.0.1,43200 ] SOURCE [SOURCE2] ... only the first character after '-' is relevant, the single char arguments from previous versions will still work. keep using the short version in scripts, as the handling of long arguments (wording) may change any time. -c, -cachedir=: directory for cached sources defaults './cache', directory must exist old cached sources will be purged after successful fetch -d, -debug: boolean, for testing and linting, may use multiple -e, -email=: string passed in the HTTP_USER_AGENT, max. 48 chars default: list sources as specified by -source or @ARGV -f, -format=: output format(s), as named in the xml-config default: octets (tab-delimited, no quotes) for mta's: courier, exim, postfix, rblsmtpd, qmail_uce, sendmail for dnsbl: rbldnsd, tinydns, bind and generic 'reverse' for benchmarking: queryperf (from bind/contrib) multiple formats may be specified separated with comas use 'cdb', 'gdbm' or 'db' to compile output into DB_File NOTE: contents of -format may be used as part of USER_AGENT -h, -help: boolean, display built-in manpage (this document) -k, -keyring=: use the named keyring to verify spfilter-config.xml default 'spfilter-keyring.gpg', in './' or '/usr/local/etc' the Makefile will generate the keyring on 'make keyring' specify 'NULL' for keyring to disable gpg-functionality -l, -log: log to syslog, named file or email (not implemented) -o, -outdir=: directory or filename with optional path default './outdir' if exist, or the current workdir '.' specify '-outdir=STDOUT' for use in pipes (with singe format) -p, -pubdir=: directory to publish *.bz2 for redistribution three subdirectories required: ./input, ./diff and ./output example1: http://spfilter.openrbl.org/data/ example2: http://mirror.openrbl.org/spfilter/ note: primary sites should run spfilter one time per day, in the time from 00:30 to 01:30 UTC (GMT) -q, -quiet: decrease verbosity (see also -verbose and -debug) -s, -source=: input sources (legacy, just list them on commandline) default: import all from set DEFAULT set DEFAULT equivalent to: -source=SPEWS,SPAMSITE,PDL for relays add DSBL and/or RSL: -source=DEFAULT,RELAYS set RELAYS equivalent to: -source=DSBL,DSBL_MULTIHOP,RSL please refer to spfilter-config.xml, preset_section NOTE: contents of -source may be used as part of USER_AGENT -t, -tiehash=: tie working hash to in-memory or file-db DEFAULT: none, will trade memory against cpu and disk '-tiehash=NULL': use in-memory db, reduce memory usage by 50% '-tiehash=/tmp/spfilter-tiehash.$$.gdbm': reduce memory by ~70% '-tiehash=/tmp/spfilter-tiehash.$$.db': whatever works better -u, -user=: drop root-privilegies for external program ($CFG{user}) default setuid user 'nobody' if run by root (uid 0) WARNING: this setuid is not save, start spfilter with: su nobody "perl ./spfilter -format=sendmail" -v, -verbose: verbose output increase level of verbosity with multiple -vv (see also -d) -w, -workdir=: chdir into this directory on startup default none (no chdir will be done by spfilter) setting simplifies usage from cron and in pipes (with STDOUT) -z, -zone=: dnsbl-in-a-box (with -f bind and/or tinydns) default -zone=localhost,127.0.0.1,43200 should work everywhere =head1 BUGS - timestamps sometimes based on .yymmdd-extensions, and sometimes the file modification-time (for If-Modified-Since). use the tracker at http://sourceforge.net/projects/spfilter/ code still considered alpha, backup your files as always =head1 FILES ./spfilter-config.xml: definition of sources and formats self-updating copy kept in the directory ./cache ./spfilter-pubring.gpg: verify embedded signatures with gpg files are internally signed with gpg, verify with 'make verify' files will be searched in '.' and in /usr/local/etc files must be owned by root or the user running spfilter and cant have any group- or world-writable permissions. same for files reused from the cache-directory (-cachedir) =head1 Author/License spfilter(at)gmx.net. QPL licence apply =head1 Prerequisites Perl5 with LWP::UserAgent (libwww-perl) or wget in $PATH, XML::Simple (included in tarball), bunzip, rsync recommended and optionally diff for primary sites. mta with support for ip-based access-lists or nameserver. =head1 Installation - check if you have all the necessary executables in $ENV{PATH} by running: `which -a perl rsync wget bunzip` - primary (publishing) sites also need gpgv, diff and bzip2 - make shure to have the perl-module XML::Simple installed (available at CPAN) or use the one included in ./XML/Simple.pm - fetch the Makefile into an empty directory, run 'make 'all' this will create the two subdirectories ./cache and ./outdir, fetches the public-key, generate the keyring and finally also fetches and verify both spfilter-config.xml and spfilter.pl. - run `./spfilter.pl -vd TEST_LIST`, or simply 'make test' - ./spfilter-config.xml: review for your own safety, its signed - ./spfilter-local.xml: used only with '-x ./spfilter-local.xml' - enable output-format for your mta or dnsbl with argument -f - its recommented to let spfilter write into the default ./outdir, and set a symlink from the location your application (mta, nameserver etc) expects. optional: - use argument -s to specify your own set of input-sources - magic_update => 1 preserves existing lines even across updates - count the existing lines and run twice to check the 'magic' - !!! new code: use -v and check the daily output from cron !!! =head1 Configure Input input sources have already been defined in spfilter-config.xml: some of them are: [SPEWS|SPEWS2] SPAMSITE PERMBLOCK PDL RSL [KOREA|CHINA|KRCN] (cant list them all here, check out spfilter-config.xml also at http://spfilter.openrbl.org/code/xml-view.php default setting equivalent to: ./spfilter.pl SPEWS SPAMSITE PDL ./spfilter.pl -s SPEWS,SPAMSITE,PDL ./spfilter.pl DEFAULT ./spfilter.pl several sets of sources have been defined in spfilter-config.xml, they will be recursively expanded to the regular sources. see http://spfilter.openrbl.org/code/xml-view.php#PRESET_SECTION IMPORTANT: you also need to check incoming mail against a realtime dnsbl for open relays and proxies. The persistent ones will end up on DSBL and Wirehub's great PERMBLOCK but (unfortunately) not in realtime. If you dont use relays.osirusoft.com, dnsbl.njabl.org etc. enable DSBL and update daily. please always use rsync:// for DSBL and WIREHUB, dont waste bandwidth. For 'complete' protection also use bl.spamcop.org (via dns) and consider enable KOREA TAIWAN HONGKONG - depending on your location. keys for %SRC in spfilter-config.xml: (only 'url' mandatory) type: /^(addr|cidr|range|reverse|axfr|host)/), to be documented interval (number): reuse cached files up to interval days (3) tag (string): prepend this instead of the name, may be set to 'NULL' SBL hack: if the tag ends with = there will be no space after prepend (string): append string after text (default none) ! deprecated, will be removed, legacy support only ! use the tag "tag" instead, config.xml already updated append (string): append (optional) string and ip after $text url (string): http-, ftp- or rsync- or file-resource: - url's ending with *.bz2 will be decompressed transparently - use relative or absolute path for local sources - macro {YYMMDD} expandes to UTC (GMT) datestamp - macro {FILENAME} expands to the contents of that key conflict (string): warn if this source is already defined - only partially implemented, never trust a dumb machine ;) =head1 Configure Output predefined output-formats in spfilter-config.xml: octets, courier, exim, postfix, qmail_uce, rblsmtpd, sendmail reverse, rbldnsd, tinydns, bind default output format equivalent to '-format=octets' specify multiple formats separated with coma or whitespace keys for %FMT in spfilter-config.xml: (all optional) default (boolean): 0=disabled, 1=enabled (default 0) type (string): 'addr', 'cidr/nn', 'range', 'config', 'rbldns', 'axfr/cname', 'axfr/txt' or 'axfr/a' (default 'octet') linestart (string): prepended to the begin of each line (default none) separator (string): inserted between $addr and $text (default "\t") lineend (string): appended after $text (default none) magic_update (boolean): preserve manually inserted lines silently ignored if output sent to '-outdir=STDOUT' or DB =head1 GPG keys and signatures - spfilter will use only the own keyring (spfilter-keyring.gpg) and will accept any good signatures from the keys listed there. dont add other public-keys unless you know what you are doing! - spfilter-config.xml and spfilter.pl contain embedded gpg-signature verify manually with 'gpgv --verify' as usual if you have the public-key in your trusted keyring (which is deprecated!). use 'make verify' instead, no need to mess up existing keyrings. - build the pubkey: 'make pubkey' will fetch the pubkey from keyserver, build the gpg-keyring for spfilter in a (hopefully) save way. 'make verify' additionally checks the embedded gpg-signatures. - public key for spfilter@openrbl.org available at: http://search.keyserver.net:11371/pks/lookup?op=vindex&template=netensearch&search=spfilter http://pgp.mit.edu:11371/pks/lookup?search=spfilter&op=index&fingerprint=on - if you prefer to build the keyring manually: (all in one line) gpg --no-default-keyring --keyring ./spfilter-keyring.gpg \ ./doc/spfilter-pubkey.asc - - - - ERRATA: old instructions told you to import spfilter's public key. this is not needed anymore as the detached signarure of the *.tgz tarball has been discontinued and deprecated for security reasons: $ gpg --import ./spfilter/doc/spfilter-pubkey.asc $ gpg --verify ./spfilter-$VERSION.tgz.asc ./spfilter-$VERSION.tgz =head1 Global Hashes (as reference, not complete, use the code) %CFG: workdir "." # opw_w debug 0 # opt_d verbose 0 # opt_v interval sources "ONE,TWO,THREE,..." # opt_s and/or @ARGV formats "one,two,three,..." # opt_f email $sources # show in HTTP_USER_AGENT tempfile "" # tie temporary hash, opt_t xmlfile "" # opt_x (additional local config.xml) cachedir "./cache" # opt_c outdir "./outdir" # opt_o pubdir "./publish" # opt_p exec_user "nobody" # should use 'spfilter' if available exec_uid -1 # uid from exec_user exec_path "/bin:/usr/bin:/usr/local/bin" # should be safe exec_http "wget ..." # alternative to Perl::LWP exec_rsync "rsync ..." # recommended pack_ext '(bz2|gz)' # gz not tested exec_bunzip "bunzip ..." # strongly recommended exec_gunzip "gunzip ..." # you tell me if it works ;) exec_bzip "bzip2 ..." # for republishing on primary sites exec_diff "diff ..." # used if available exec_patch "patch ..." # not implemented yet exec_gpgv "gpgv ..." # strongly recommended keyring 'spfilter-keyring.gpg' # $opt_k (use NULL to disable) zone_name "localhost" # $opt_z (first field) zone_addr "127.0.0.1" # $opt_z (second field) zone_ttl "43200" # $opt_z (third field) program "spfilter" version "0.00" date "YYMMDD" useragent "$program/0.00" magic yymmdd "YYMMDD" # always uses UTC (~GMT) count_cached 0++ count_notmodi 0++ count_fetched 0++ %SRC: name # auto-generated, dont mess with url_primary # experimental, for use by redistributing sites only url # multiple tried in order, {FILENAME} and {YYMMDD} expanded interval # interval in days between updates type alias # experimental, use the same file in ./cache filename # explixitely set filename in cache, defaults to $name minsize 1 # min kb, reject anything below 513 bytes (rounded) maxsize 2000 # max kb, protect somewhat against dos conflict # only one single value handled regexp_include # perl-regexp, will be enclosed in =~/.../ regexp_exclude # perl-regexp, will be enclosed in !~/.../ option # experimental axfrexpand, notext, html2text tag # prepend to each line of output, defaults to $name prepend # DEPRECATED hack, comes just bevore $append ;) append # construct url, $addr appended to string cache_status # -1: 304 Not Modified, 0: 404 Error, 1: 200 OK cache_fname # name of cached file cache_ifmod # contains If-Modified-Since date for HTTP cache_fetched # name of fetched file %FMT: name # the key itself, dont set or change type "txt" publish 0 magic_update include "" # include content verbatim in output notation "octet" linestart separator "\t" lineend secondline # print additional lines, for bind and tinydns secondlinestart option # [bindhack|tinydnshack|tcpserverhack] =head1 Windows (ActiveState Perl) - spfilter runs with ActiveState which has all modules already included http://aspn.activestate.com/ASPN/Downloads/ActivePerl/Source http://downloads.activestate.com/ActivePerl/Windows/5.6/ActivePerl-5.6.1.633-MSWin32-x86.msi spfilter-bunzip-cmd.zip contains bunzip2.exe and a cmd-sample - spfilter has been reported on recent versions of cygwin, some more documentation welcome. (check out ./docs) http://cygutils.netpedia.net/ http://webmaster.indiana.edu/perl56/pod/perlcygwin.html http://search.cpan.org/author/COOPERCL/XML-Parser-2.31/ (or http://search.cpan.org/author/MSERGEANT/XML-SAX-0.11/) http://search.cpan.org/dist/XML-Simple/ (or ./XML/Simple.pm) Note: there are reports after 'perl Makefile.PL' the variables PERL, FULLPERL and PERL_CORE may all have assigned '0' (zero) and 'make' will fail badly. (W2K, 2002-11-01) - WARNING: console application only, no colors and mouse support ! mailservers with less than a few thousand mails per day are better off using traditional dnsbl-queries. Windows 2000 with a fixed ip and bind9 may still be used as a dnsbl-server for zones generated by spfilter. nameserver, consulting and support available for serious projects =head1 ToDo List - consistent handling of keywords OK (WHITELIST) and FREEMAIL (MXCHECK) - aggregate input, create index hash from textual description - aggregate output, optionally into cidr or range - modularize the code, split into input.pl, output.pl and spfilter.pm - courses-based selection for sources and formats (contribute!) - update from daily diffs, uses only 2..10% (see /data/input/diff) - documentation (may be submitted via sourceforge project home) (suggestions, patches and working code always welcome) =head1 History & ChangeLog =over 4 =item * 0.59++current (updated 2003-10-12) - major new features wont be implemented as already announced, development will concentrate in Bliab, as time permits - support for rbldns-style sources with expanding excpetions, primary for use with Easynet Dynablock (10mb instead of 50mb) - 'DYNABLOCK' renamed to 'EASYNET_DYNA', uses now rbldns-format - 'PERMBLOCK' renamed to 'EASYNET', old legacy names still work - several minor adjustments in spfilter-config.xml as always - format 'cidr', appends /8,/16,/24 or /32 for use in firewalls =item * 0.59++current (updated 2003-02-16) - Wirehub! officially renamed to Easynet, zones adjusted (2002-05-17) - added official support for SBL via http://mirror.bliab.com - added option 'html2text' for sources, for SBL until they fix it - ommit whitespace between tag and text if tag ends with '=' (for SBL) - key 'prepend' deprecated, include text into key 'tag' instead - added option 'notext' for sources, for ISP_ from blackholes.us - changed predefined text for ISP_ to something more polite - bugfix: make default TTL configurable for tinydns =item * 0.59 (updated 2003-02-01) - 2003-05-10: maintenance, recent spfilter-config.xml integrated - no big changes, but its time to bump the version as always, wait a few days if you dont want to discover new bugs - reject source if Content_Length and Response_Length dont match (LWP only) - bugfix: spfilter died at zero-sized files with Status 200 (LWP only) - first experimental bits for hashed strings needs $ENV{DEBUG_STRHASH} and some code still missing - contributions welcome - AXFR-sources: expand parent block of exceptions (void CNAME's) may be used for blacklisting now, but keep an eye for bugs new sources: DYNABLOCK_EXPAND, NOMORE_EXPAND, FIVETEN_EXPAND =item * 0.58++ (2003-01-11) - Makefile: fixed fetching of pubkey, as noted by Bert Driehius - added format: 'rbldnsd' and 'queryperf' /see ./doc/rbldnsd.txt) - bugfix: make -q really quiet (patch submitted by robhardy) - bugfix: empty retrieved file caused spfilter to die() - bugfix: removed 'make clean' because of non-portable 'rm -d' - spfilter-config.xml embedded into spfilter.pl for safe fallback everything is now contained in one single file (spfilter.pl) this solves the 'chicken and egg' problem with live-update. - live-update functional (requires gpgv in $PATH and keyring.asc) spfilter will use the latest cached xml, if signature passes Note: having 'spfilter-config.xml' in . or /usr/local/etc effectively disables this feature. So does $ENV{NOCACHED} - Windows/ActiveState Perl (MSWin32) tested and supported (32mb cap) - fixed bug with tinydns_uce: new key 'option="tcpserverhack"' - add new keys 'option="bindhack"' and 'option="tinydnshack"' - add new format 'rblsmtpd' for qmail, sets variable $RBLSMTPD - add experimental support for DRBL, requires hacked build_drbl - fixed RELAYCLIENT="" for tcpserver, as noted by Marek Les - Target 'clean' in Makefile removed, 'rm -f' not portable on Linux =item * 0.58 (2002-11-01) - predefined set of sources (see spfilter-config.xml) just list the names (SPAM, RELAYS, DYNAMIC, COUNTRY etc, they will be expanded accordingly. XML-viewer available at: http://spfilter.openrbl.org/code/xml-view.php#PRESET exclude sources from set with leading dash: 'SPAM,-PERMBLOCK' WARNING: listing all will cost lots of ram, cpu and bandwidth! - installation and upgrade simplified, you only need the Makefile 'make update' will fetch and verify the most recent files. - renamed 'LIVE_UPDATE' to 'update->spfilter-config.xml' for the forthcoming database integration. Update-feature now fully working, but still considered experimental. - verification of embedded signatures with gpg completed spfilter refuses to '-p ./publish' without gpgv and keyring WARNING: contents outside of signatures not checked yet ! - spfilter now uses LWP::UserAgent (libwww-perl) by default, with fallback to wget. Set NOLWP=1 to disable, and report why. - argument '-e responsible@contact' will be passed in HTTP_USER_AGENT robots should always provide means to contact the operator. - arguments now processed by Getopt::Long, needs some tweaking - all output done via centralized function, log to syslog planned =item * 0.57 (2002-10-20) - spfilter-config.xml and spfilter-pubkey.gpg may be in /usr/local/etc - embedded signature for spfilter.pl, just like spfilter-config.xml - verify gpg-signature of spfilter-config.xml if gpgv exists in $PATH WARNING: gpg-functionality disabled without spfilter-keyring.gpg - key 'grep' renamed to 'regexp_include', key 'regexp_exclude' added - argument -s optional, just list all sources on the commandline - check path for available binaries (wget, bunzip2, gpg, etc) on startup - experimental key 'primary_url' for use by redistributing primary sites - not-modified-since now works with multiple updates per day - cleanup: XML::Simple.tgz and .htaccess removed from sourceball - detached signature discontinued, too much passphrases to type... =item * 0.56 (2002-10-07) - spfilter has been reported to run on W2K with recent Cygwin - LIVE_UPDATE moved into own section, incompatible with 0.55 - spfilter-config.xml: embedded cleartext-signature integrated - couple of internal structural changes, still compatible =item * 0.55 (2002-10-03) - file spfilter-$VERSION.pl renamed to spfilter.pl - file spfilter-remote.xml renamed to spfilter-config.xml - argument '-p ./publish' republish *.bz2 via http - smart prefix for output-files: use the name of the source if possible, or the key 'filename' if defined - auto-updated config in ./cache/spfilter-config.xml experimental, file has to be copied manually into ../ - argument '-t NULL' <64mb or -t /tmp/file.gdbm' <32mb - new key 'tag': prepended to the output instead of the name - support for blackholes.us, currently defined sources: ISP_CIBERLYNX,ISP_ELI,ISP_HE,ISP_INFLOW,ISP_INTERNAP,ISP_VERIO - select all the ISP_*-sources via (pseudo-)wildcard: 'ISP_' =item * 0.54 (2002-09-19) - header If-Modified-Since used for HTTP if applicable - this made a couple of internal changes necessary: only wget supported, bunzip2 used instead of bzcat - license changed from BSD to QPL, but still open-source - DEFAULT now includes SPEWS Level #1 (instead Level #2) - dnsbl-in-a-box: specify zone and addr via argument -z usage: -f bind,tinydns -z dnsbl.example.com:11.22.33.44 - experimental source: FIVETEN and DYNABLOCK (expand $GENERATE) - experimental key 'grep' added for building of meta-blacklists - FLOWGOAWAY,TAIWAN and HONGKONG now mirrored via HTTP/bz2 - source tests with perl 5.005 and 5.8, on FreeBSD and Linux =item * 0.53 (2002-07-15) - support for cdb, db and gdbm even with multiple formats - built-in manpage outsourced (./doc/spfilter.pod[.html]) =item * 0.52 (2002-06-30) - check, ignore and warn on invalid cidr-mask (SPEWS did it again) - pseudo-source 'DEFAULT' added, for use with 'DEFAULT,...' - send $SRC{name}:$SRC{name} via HTTP_BASIC_AUTH (experimental) - create diff on each successful fetched source - support to import all files from local dir://, see BADFROM - support for name-based lists added, see SPAMLIST_EXTENDED - argument -o also takes filename (if only one format specified) =item * 0.51 (2002-06-18) - support for rsync via fork; setuid and reaper implemented - wirehub's PERMBLOCK now available via rsync, saves 90% - DSBL[_whatever] available via rsync, xml-config updated - multiple url's will be tried in order of listing - set env NORSYNC=1 if you dont have rsync (lame excuse) or dest-port 6666 has been firewalled (as on sourceforge.net) - format sqldump (INSERT INTO ...) added to xml-config =item * 0.50 (2002-06-16) - configuration split up in two different files (local and remote) - config-format changed to xml, spfilter now requires XML::Simple - fallback to older cached copy (max 30 days) on failures from fetch =item * 0.4x (2002-06-11, consolidated) - read sources from local filesystem, use 'file://...' or ./path. - sent output to dup of STDOUT with argument '-o STDOUT' or '-o -' - all textual/informational output will be sent to STDERR only - Support for PDL Dynamic Dialup List (enabled by default) - Support for RSL Visi Relay Stop List (disabled by default) - Support for KOREA and CHINA (http://www.okean.com/asianspamblocks.html) - Support for Wirehub's PERMBLOCK (great list, now enabled by default) - bugfix on getgrnam and chuid suggested by andrew@***.au - tinydns dnsbl output-format added - argument -u: drop privilegies on fork/exec (see $CFG{user}) =item * 0.3x (2002-04-01, consolidated) - magic_update=>1 preserves alien lines in existing datafiles - bug with postfix reported by Ian Vaudrey - quotes removed - major code cleanup & rearangement - initial release, based on fetchspews.pl =back =head1 SEE ALSO homepage: http://spfilter.openrbl.org/ mirror: http://mirror.openrbl.org/spfilter/code/ =cut __DATA__ # this file in the current directory . or /usr/local/etc prevents # spfilter from using the cached 'live' copy at all. # its safe to delete this file, spfilter.pl has all the xml embedded # the cleartext-signature will be verified if gpgv available in path. # - section 'preset' describes aliases for sources # - section 'source' describes input-sources (url) # - section 'format' describes output-formats # changes should be done exclusively in ./spfilter-local.xml # which # may be included with argument '-x ./spfilter-local.xml' # the trusted local config allows to override sources, formats # and most config-variables (not complete, work in progress) # use html-entities for this chars: '<', '>', '&' and '"' # XML-Viewer at http://spfilter.sourceforge.net/code/xml-view.php # Aliases listed here will be expanded recursively # the *_ALL variants are not meant for automated blocking # the *_SAFE variants should be safe for most mailservers # more aliases and sources may be added on request # update local copy below ./cache, verify gpg-signature # file still must be copied manually to the destination spfilter xml-configuration http://spfilter.sourceforge.net/code/xml-view.php 26BDCEF3 984C 6100 1C0E 5813 4077 6C48 051F C28D 26BD CEF3