#! /usr/bin/perl -w
#
# $Header: /cvs/scripts/newsreader-count,v 1.5 2002/06/24 09:43:58 richard Exp $
#
# Copyright (C) 2002 Richard Kettlewell
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
#


use strict;
use integer;

my @newsreaderlist = (
		      "knews",
		      "SNews",
		      "trn",
		      "Microsoft Outlook Express",
		      "Pan",
		      "PMINews",
		      "skim",
		      "kexpress",
		      "News Xpress",
		      "MT-NewsWatcher",
		      "xrn",
		      "WinVN",
		      "NewsWatcher-X",
		      "Ameol",
		      "MacSOUP",
		      "SmofList Digest",
		      "Yet Another NewsWatcher",
		      "News Rover",
		      "Nographer",
		      "NotePad Pluggin",
		      "THOR",
		      "CrossPoint",
		     );

my $newsreaderlistpattern = join("|", @newsreaderlist);
my $spool = "/var/spool/news/articles";
my $byfrom = 0;

while(@ARGV > 0 && $ARGV[0] =~ /^-/) {
  local $_ = shift;
  last if /^--$/;
  if(/^--by-from$/ || /^-f$/) {
    $byfrom = 1;
  } elsif(/^--help$/ || /^-h$/) {
    help();
    exit 0;
  } else {
    die "$0: unknown option '$_'\n";
  }
}

my $newsgroup = shift;
$newsgroup =~ s/\./\//g;
my $dir = "$spool/$newsgroup";

my %readers = ();
my %from = ();
my %variants = ();
chdir($dir) or die "$0: chdir $dir: $!\n";
opendir(DIR, ".") or die "$0: opendir $dir: $!\n";
while(defined(my $file = readdir DIR)) {
  next if $file =~ /\D/;
  next unless -f $file;
  open(FILE, "<$file") or do {
    print STDERR "$0: open $file: $!\n";
    next;
  };
  my ($ua, $xn, $m, $mid, $from);
  while(defined($_ = <FILE>)) {
    chomp;
    if(/^User-Agent:\s+(.*)/i) {
      $ua = $1;
    } elsif(/^X-Newsreader:\s+(.*)/i) {
      $xn = $1;
    } elsif(/^X-News-Software:\s+(.*)/i) {
      $xn = $1;
    } elsif(/.*mailer:\s+(.*)/i) {
      $m = $1;
    } elsif(/^Message-ID:\s+<Pine\.[^\.]+\.(\d+\.\d+)/i) {
      $mid = "Pine/$1";
    }
    if(/^From:\s+(.*)/) {
      $from = $1;
    }
    last if /^$/;
  }
  if($byfrom) {
    next if exists $from{$from};
    $from{$from} = 1;
  }
  my $reader;
  if(defined $ua) {
    $reader = $ua;
  } elsif(defined $xn) {
    $reader = $xn;
  } elsif(defined $m) {
    $reader = $m;
  } elsif(defined $mid) {
    $reader = $mid;
  } else {
    $reader = "unknown";
  }
  account($reader);
}
report();
(close STDOUT)
  or die "$0: writing to stdout: $!\n";

sub account {
  local $_ = shift;
  my ($reader, $variant);

  if(/^Mozilla.*compatible; ([^;]+);/) {
    my $rv = $1;
    if($rv =~ /^(\S+)\s+(.*)$/) {
      $reader = $1;
      $variant = $2;
    } else {
      $reader = $rv;
      $variant = "unknown";
    }
  } elsif(/^Mozilla/) {
    if(/Netscape6\/([\d\.]+)/) {
      $reader = "Mozilla";
      $variant = "Netscape6 $1";
      if(/rv:[\d\.]+/) {
	$variant .= " ($&)";
      }
    } elsif(/rv:([\d\.]+)/) {
      $reader = "Mozilla";
      $variant = $1;
    } elsif(/^Mozilla ([34][\d\.\-A-Z]+)/) {
      $reader = "Netscape";
      $variant = $1;
    } else {
      $reader = "Mozilla";
      $variant = "unknown";
    }
  } elsif(/(Gnus) v([\d\.]+)/) {
    $reader = $1;
    $variant = $2;
  } elsif(/(gnus) ([\d\.]+)/) {
    # annoying variant spelling
    $reader = "Gnus";
    $variant = $2;
  } elsif(/^ANT .* Marcel \[ver ([\d\.]+)\]/) {
    $reader = "Marcel";
    $variant = $1;
  } elsif(/^(TIN) \[\S+ (.*)\]/) {
    # XXX are "TIN" and "tin" the same thing?
    $reader = $1;
    $variant = $2;
  } elsif(/Forte (Free )?Agent (\S+)/) {
    $reader = "Forte Agent";
    $variant = (defined $1 ? "Free Agent $2" : $2);
  } elsif(/^(\S+ Gravity|Demon Internet Simple News|CrossPoint\/OpenXP) v(\S+)/) {
    $reader = $1;
    $variant = $2;
  } elsif(/^(MR\/2).* v(\S+)/) {
    $reader = $1;
    $variant = $2;
  } elsif(/^(slrn|knews) \((\S+)/) {
    $reader = $1;
    $variant = $2;
  } elsif(/^(KRN) http:/) {
    $reader = $1;
    $variant = "unknown";
  } elsif(/^Virtual Access by ([^,]+)/) {
    $reader = $1;
    $variant = "unknown";
  } elsif(/^(Telnet) \d+/) {
    $reader = $1;
    $variant = "unknown";
  } elsif(/^(Turnpike|ZIMACS|NN|Sylpheed).*[vV]ersion (\S+)/) {
    $reader = $1;
    $variant = $2;
  } elsif(/^Ameol2$/) {
    $reader = "Ameol";
    $variant = "2.?";
  } elsif(/^($newsreaderlistpattern) +(\S+)/) {
    $reader = $1;
    $variant = $2;
  } elsif(/^([^\/]+)\/(\S+)/) {
    $reader = $1;
    $variant = $2;
  } else {
    $reader = $_;
    $variant = "unknown";
  }
  ++$readers{$reader};
  $variants{$reader} = {} if not exists $variants{$reader};
  ++$variants{$reader}->{$variant};
}

sub report {
  my @readers = sort { $readers{$b} <=> $readers{$a} } keys %readers;

  for my $r (@readers) {
    my $variants = $variants{$r};
    my @variants = sort { $variants->{$b} <=> $variants->{$a}} keys %$variants;
    if(@variants == 1) {
      if($variants[0] eq 'unknown') {
	output(sprintf("%5d %s\n",
		       $readers{$r}, $r));
      } else {
	output(sprintf("%5d %s (%s)\n",
		       $readers{$r}, $r, $variants[0]));
      }
    } else {
      output(sprintf("%5d %s\n", $readers{$r}, $r));
      for my $v (@variants) {
	output(sprintf("     %5d %s\n", $variants->{$v}, $v));
      }
    }
  }
}

sub output {
  (print @_) || die "$0: writing to stdout: $!\n";
}

sub help {
  output("Usage:\n",
	 "  newsreader-count [OPTIONS] newsgroup\n",
	 "\n",
	 "Options:\n",
	 "  -f, --by-from     Count only one newsreader for each distinct\n",
	 "                    from line\n",
	 "  -h, --help        Output a usage summary and terminate\n");
}
