#!/usr/bin/perl
#
# Id     : qq-rbl.pl
# Author : Leonhard Fellermayr <leosmail@mac.com>
# Purpose: Checks mails for being spam by checking IP's against real
#	   time mail blacklisting databases and looks for standard mail
#	   headers.
# History: Sep 22, 2001

# ----- used modules

use strict;
use Mail::RBL;
use Net::DNS;
use Time::localtime;

# ----- some global vars

# the qmail queue binary/script to pipe data to
my $qmail_queue = "/var/qmail/bin/qmail-scanner-queue.pl";

# file containing nasty words to be searched for
my $spammer_file = "/usr/share/spammers.lst";

my $num_ip_max = 20;			# max number of IPs to be checked

my $logfile = "/tmp/blacklisted.log";	# our logfile
my $logging = 1;			# log blacklisted mails to $logfile;

my $debug = 0;				# when set, read message from STDIN

my $VERSION = '092201-2300';		# current version of this script

# list of rbldns hosts to be used
my @rbl_list = qw(
	Relays.OsiruSoft.com
	dialups.mail-abuse.org
	blackholes.mail-abuse.org
	relays.mail-abuse.org
        );

# ----- qmail failure handler

sub fail {
  my ($exitval, $msg) = @_;
  warn $msg,"\n";
  exit($exitval);
}

# ----- word check - check for nasty words in given array

sub word_check (@) {
  my @spammers = read_spammers ($spammer_file);
  my $pattern = join '|', @spammers;
  $pattern =~ s/\n//g;
  foreach my $header (@_) {
    return 1 if ($header =~ /($pattern)/io);
  }
  return 0;
}

# ----- sub read_spammers ($) - read nasty words from a file

sub read_spammers ($) {
  my @list = ();
  my $sf = shift;
  open (SF, "<$sf");
  foreach (<SF>) {
    push @list, $_;
  }
  close (SF);
  return @list;
}

# ----- sub lookup ($) - perform some DNS lookup on given host names

sub lookup (@) {
  my $exists = 0;
  my $res = new Net::DNS::Resolver;
  foreach my $host (@_) {
    my @mx = mx ($res, $host);
    if ($#{@mx} + 1) {
      $exists++;
    } else {		# additionally check for an A record if no MX present
      my $query = $res->search ($host);
      if ($query) {
        foreach my $rr ($query->answer) {
          if ($rr->type eq "A") {
	    $exists++;
	    last;
	  }
        }
      }
    }
  }
  return 1 if ($exists eq $#{@_} + 1);
  return 0; # (else)
}

# ----- sub get_ips (@) - fetch $num_ip_max IPs from the message header

sub get_ips (@) {
  my @field = ();
  my ($ip_tmp, $ip, $num_ip);
  foreach (@_) {

	("$_" eq "\n") and last;

	if( $_ =~ /^Received:\s+from\s+.*(\[|\()(\d+\.\d+\.\d+\.\d+)(\]|\))/i ) {
		$ip_tmp = $2;
		$ip = "";		
		is_bad_ip("$ip_tmp") and next;

		$ip =~ /\S/ and $ip .= " ";
		$ip .= $ip_tmp;
		$num_ip++;

		last unless ($num_ip < $num_ip_max);
		$ip =~ /\S/ and push @field, $ip;
	}
  }
  return @field;
}

# ----- sub is_bad_ip() - check for RFC1918 addresses and other stuff

sub is_bad_ip {
	my($ip) = $_[0];
	# check for non-IP constructs
        my @parts = split /\./, $ip;
	foreach (@parts) {
	  return 1 if ((length($_) gt 3) or (length($_) lt 1));
	  return 1 if ((length($_) eq 3) and ($_ gt 255));
        }
	my($i);
	my(@invalid_ip) = (
		"10.",
		"172.16.",
		"192.168.",
		"0.",
		"255.",
		"127."
		);
	foreach $i (@invalid_ip) {
		($ip =~ /^\Q$i/ ) and return 1;
	}
	
	return 0;
}

# ----- spam_warn ($) - return spam warning text

sub spam_warn ($) {
  my $txt = shift;
  return ("X-Spam-Warning: $txt\n");
}

# ----- sub process_header (@) - process message header

sub process_header ($) {

  my $header = shift;
  my @header = split /\n(?!\s)/, $header;	# get logical lines

  # extract IPs

  my @ips = get_ips (@header);

  my ($hdrs_ok, $upper, $fnumber, $strange_chars, $authwarn, $recdns_failed, $surespam) = 0;
  my ($from, $subject);
  my (@wordcheck_headers, @dnscheck_headers, @ret) = ();

  # set up RBLs

  my %rbls;
  foreach my $rbl (@rbl_list) {
    $rbls{$rbl} = new Mail::RBL($rbl);
  }

  # parse message header

  foreach (@header) {

	if (/.*\n.*/) {				# put logical lines together
	  $_ =~ s/\n\s+/ /g;			# to one long line
	}

        # 1) look for the presence of standard mail header lines

        $hdrs_ok++ if (/^(From|To|Subject|Message-ID|Date):.*$/i);

	# 2) set up array with headers to be checked against word-blacklist

	if (/^Received: from.*$/) {		# all "Received: from" headers
	  my $this = $_;
	  chomp ($this);
	  $this =~ s/^Received: from //g;	# extract host name
	  $this =~ s/ .*//g;
	  if ($this =~ /.*\@.*/) {		# the return path could be looked up
	    $this =~ s/.*\@//g;
	    push @dnscheck_headers, $this;
	    push @wordcheck_headers, $this;
	  } elsif (!($this =~ /^((\[\d+\.\d+\.\d+\.\d+\])|localhost|unknown)$/i)) {
	    push @wordcheck_headers, $this;
	  }
	}
						# more headers for word_check()
	if (/^(From|Sender|Subject|Reply-To|X-.*):.*$/i) {
	  push @wordcheck_headers, $_;
	}

	# 3) check for suspicious formatting in subject (much uppercase etc.)

        if (/^Subject:.*$/i) {
          $subject = $_;
	  chomp ($subject);
	  my $subject_tmp = $subject;
	  $strange_chars++ if (/.*([\!\$\%]).*/);
	  $subject_tmp =~ s/[^A-Z][A-Z]|[A-Z][^A-Z]//g;
	  $subject_tmp =~ s/[^A-Z]//g;
	  $upper++ if (length($subject_tmp) > 7);
	}

        if (/^(From|Sender):.*$/i) {

	  # 4) check for too many digits within the sender address

	  $fnumber++ if ($_ =~ /.*([0-9]{3,}).*$/);

	  # 5) extract sender domain and put it into @dnscheck_headers

          $from = $_;
	  chomp ($from);
	  my $from_tmp = $from;
	  $from_tmp =~ s/^(.*)([A-Za-z0-9_.-]+@)(([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4})(.*)$/$3/g;
	  push @dnscheck_headers, $from_tmp;
        }

	# 6) check for some well-known and/or suspicious spam headers

	$authwarn++ if (/^(X-Authentication-Warning|Date-warning):.*$/i);

	$surespam++ if (/^(X-Adverti[sz](e)?ment|X-PMFLAGS|X-Other-References):.*$/i);
	$surespam++ if (/^X-Info:.*(Zenith Bulk|EmailShark|Bulk Email).*$/i);

	# 7) note if qmail failed to DNS-lookup some "Received: ..." hosts

	$recdns_failed++ if (/^Received:.*(unknown|unverified).*$/i);

  }

  # ... now parsing of the header is complete.

  # 8) run DNS lookup check on the created array

  if ($debug) { foreach my $entry (@dnscheck_headers) { print "DNS -> $entry\n"; } }

  push @ret, spam_warn ("Could not reverse lookup sender address via DNS.") if (!lookup (@dnscheck_headers));

  # 9) run word_check on the created array

  if ($debug) { foreach my $entry (@wordcheck_headers) { print "WORDCHECK -> $entry\n"; } }

  push @ret, spam_warn ("Message contains nasty words.") if (word_check (@wordcheck_headers) );

  # 10) check extracted IPs against black list

  foreach my $rbl (sort keys %rbls) {
    foreach my $ip (@ips) {
      print "IP-RBL-CHECK: $ip over $rbl\n" if ($debug);
      push @ret, spam_warn ("$ip is listed by $rbl!") if $rbls{$rbl}->check($ip);
    }
  }

  # put the other results all together

  push @ret, spam_warn ("There are some standard mail headers missing.") if ($hdrs_ok ne 5);
  push @ret, spam_warn ("Too much following uppercase letters in subject.") if ($upper);
  push @ret, spam_warn ("Sender address contains many digits - suspicious.") if ($fnumber);
  push @ret, spam_warn ("Strange characters within subject.") if ($strange_chars);
  push @ret, spam_warn ("There already are X-Auth-Warnings.") if ($authwarn);
  push @ret, spam_warn ("Message has passed non-resolvable servers.") if ($recdns_failed);
  push @ret, spam_warn ("There are some official spam headers.") if ($surespam);

  my $infostr = "X-Information: qq-rbl v. $VERSION filtering (c) 2001 by lf was passed.\n";
  $debug ? print $infostr : print QQEOUT $infostr;

  # if message is black and logging is enabled, write essential information to logfile

  if ( ($#{@ret} + 1) and ($logging) ) {
    open (LOGFILE, ">>$logfile");
    my $tm = localtime (time ());
    print LOGFILE $from . " / " . sprintf ("%02d.%02d.%04d %02d.%02d", $tm->mday, $tm->mon + 1, $tm->year + 1900, $tm->hour, $tm->min) . "\n";
    print LOGFILE $subject . "\n";
    foreach my $warning (@ret) {
      print LOGFILE $warning;
    }
    print LOGFILE "------\n";
    close (LOGFILE);
  }

  # now finally hand the added warning headers back

  return (@ret);

} # sub

###########################################################################
#
# main program
#
###########################################################################

my $qq_pid;

if (!$debug) {

  # we get our message contents on fd0 from qmail-smtpd or ofmipd.

  open(SMTPEIN, "<&=0") or fail(54, "dup(fd0) failed (#4.3.0) - $!");

  # Create a pipe so we can wedge ourselves between qmail-smtpd/ofmipd and
  # qmail-queue. We pass fd1 (the message envelope) straight through since
  # we don't care about it for purposes of rblchecks.

  pipe (QQEIN, QQEOUT)  or fail(51, "pipe() failed (#4.3.0) - $!");

  $qq_pid = fork;
  fail(51, "fork() failed (#4.3.0) - $!") unless defined $qq_pid;

}

if ( ($qq_pid == 0) and (!$debug) ) {

  # unset QMAILQUEUE so that we don't get executed again by accident,
  # causing an infinite loop.

  delete $ENV{QMAILQUEUE};
  close QQEOUT;	# don't need this half of the pipe

  # wedge between stdin and the pipe

  open (STDIN, "<&QQEIN") or fail(54, "dup(pipe) failed (#4.3.0) - $!");
  exec $qmail_queue       or fail(51, "exec($qmail_queue) failed (#4.3.0) - $!");

} else {

  if (!$debug) {
    $SIG{'PIPE'} = 'IGNORE';
    close QQEIN;	# don't need this half of the pipe.
  }

  local $/ = "";
  my @results = ();
  my $header = "";

  # get message header
  $debug ? ($header = <STDIN>) : ($header = <SMTPEIN>);

  chomp $header;

  # pass message header along
  $debug ? print "$header\n" : print QQEOUT "$header\n";

  # process header
  @results = process_header ($header);

  # add warnings after original message header
  foreach my $warning (@results) {
    $debug ? print $warning : print QQEOUT $warning;
  }
  undef $/;

  $debug ? print "\n" : print QQEOUT "\n";

  # pass message body along
  if (!$debug) {
    print QQEOUT $_ while (<SMTPEIN>);
  } else {
    print $_ while (<STDIN>);
  }

  if (!$debug) {
    # close everything properly
    close SMTPEIN or fail (54,"close(smtp in) failed (#4.3.0) - $!");
    close QQEOUT  or fail (53,"close(qq out) failed (#4.3.0) - $!");

    # make sure qmail-queue exits okay
    waitpid ($qq_pid,0);
    my ($status) = ($? >> 8);
    fail ($status, "qmail-queue failed ($status). (#4.3.0)") unless $status == 0;
  }

}

exit (0);
