#!/usr/bin/perl # Copyright © 1996, 2000 Jamie Zawinski # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. # # Created: 21-Nov-96. sub main { # read and check the first line. $_ = <>; if ( ! m/^From / ) { die "Not a mailbox file.\n"; } local $headers_done = 0; local $from = ""; local $to_cc = ""; local $ctype = ""; local $resent_from = ""; local $ret_path = ""; local $id = ""; local $moz = ""; local $lines = 0; local $new_lines = 0; local $was_to_cc_p = 0; local $cont_p = 0; local $boundary_count = 0; MSG: while (<>) { if ( m/^From / ) { swallow($from, $lines, $new_lines, $to_cc, $resent_from, $ret_path, $moz, $id); $headers_done = 0; $from = ""; $to_cc = ""; $ctype = ""; $id = ""; $moz = ""; $lines = 0; $new_lines = 0; $was_to_cc_p = 0; $resent_from = 0; $ret_path = 0; $cont_p = 0; $boundary_count = 0; next MSG; } # only count the first part; not attachments. if ($boundary_count > 1) { next; } if ( m/^--------------|^--=======|^-- $/ ) { $boundary_count++; } $cont_p = 0; if ( $was_to_cc_p && m/^[ \t]/ ) { $cont_p = 1; } $was_to_cc_p = 0; if ( $headers_done ) { $lines++; if ( ! m/^[ \t]*$|^[ \t]*>/ ) { $new_lines++; } } elsif ( m/^$/ ) { $headers_done = 1; } elsif ( m/^From:/i ) { ( $from ) = m/^[^:]+:[ \t]*(.*)/; } elsif ( $cont_p || m/^(To|CC):/i ) { local $a, $b; ( $a, $b ) = m/^([^:]+:)?[ \t]*(.*)/; $was_to_cc_p = 1; $to_cc .= " " . $b; } elsif ( m/^Resent-From:/i ) { ( $resent_from ) = m/^[^:]+:[ \t]*(.*)/; } elsif ( m/^Return-Path:/i ) { ( $ret_path ) = m/^[^:]+:[ \t]*(.*)/; } elsif ( m/^X-Mozilla-Flags:/i ) { ( $moz ) = m/^[^:]+:[ \t]*(.*)/; } elsif ( m/^Message-ID:/i ) { ( $id ) = m/^[^:]+:[ \t]*(.*)/; } elsif ( m/^Content-Type:/i ) { ( $ctype )= m/^[^:]+:[ \t]*(.*)/; if ( ! ( $ctype =~ m@text/@i || $ctype =~ m@multipart/@i )) { # if it's a non-text message, consider it all attachment. $boundary_count += 2; } elsif ( $ctype =~ m@name=@i ) { # if the Content-Type header has a name parameter, then # consider it an attachment. $boundary_count += 2; } } } swallow($from, $lines, $new_lines, $to_cc, $resent_from, $ret_path, $moz, $id); # foreach $key (%addr_to_name) { # $foo{$addr_to_name{$key}} = $key; # } # foreach $key (sort(keys %foo)) { # printf "\"%s\"\t\t\"%s\"\n", $foo{$key}, $key; # } # exit; spew(); } $total_msgs = 0; $total_lines = 0; $total_new_lines = 0; $msgn = 0; sub swallow { local ($from, $lines, $new_lines, $to_cc, $resent_from, $ret_path, $moz, $id) = @_; $msgn++; if (hex($moz) & 8) { # message is deleted. return; } #### hack # $_ = $to_cc; # if ( ! m/really-bad-attitude/i ) { # $_ = "$resent_from $ret_path $to_cc"; # if ( ! m/really-bad-attitude/i ) { # print "skipping: $msgn $id ($to_cc)\n"; # return; # } $total_msgs++; $total_lines += $lines; $total_new_lines += $new_lines; local $mbox1 = "^[ \t]*([^ \t()]+)[ \t]*[(][ \t]*([^)]+)[ \t]*[)][ \t]*\$"; local $mbox2 = "^[ \t]*([^<]+)[ \t]*<[ \t]*([^ \t>]+)[ \t]*>[ \t]*\$"; local $name, $addr; $_ = $from; ( $addr, $name ) = m/$mbox1/i; if ( $name eq "" ) { ( $name, $addr ) = m/$mbox2/i; } if ( $name eq "" ) { $addr = $from; $name = ""; } $name =~ s/^[ \t\"]*(.*)/\1/; $name =~ s/[ \t\"]*$//; $addr =~ s/^[ \t\"]*(.*)/\1/; $addr =~ s/[ \t\"]*$//; $addr =~ tr#A-Z#a-z#; if ( $addr =~ m/root\@ronk/ ) { $addr = "jeph"; } # loser. $addr =~ s/@.*$//; if ( $addr eq "dev.null" ) { $addr = "roeber"; } # loser. if ( $addr eq "pmarca" ) { $addr = "marca"; } if ( $addr eq "christine" ) { $addr = "cbegle"; } if ( $name eq "Super-User" ) { $name = ""; } if ( $name eq "root" ) { $name = ""; } $name =~ s/=\?iso-8859-1\?Q\?Jos=E9\?=/Jose/; $name =~ s/=\?iso-8859-1\?Q\?Sch=F6neberger\?=/Schoneberger/; $name =~ s/Private //; if ( $name ne "" ) { $addr_to_name{$addr} = $name; } $total_msgs{$addr} ++; $total_sizes{$addr} += $lines; $total_new_sizes{$addr} += $new_lines; } sub spew { blurb(); print "\n"; print " Total Messages: $total_msgs\n"; print " Total Original Body Lines: $total_new_lines\n"; print " Total Body Lines: $total_lines"; printf " (%d%% noise)\n", ($total_lines ? 100 - int(100 * ($total_new_lines / $total_lines)) : 100); local $key; local $i = 1; sub print_header { print "\n"; print " Total Total Total\n"; print " Messages Original Lines:\n"; print " Posted: Lines:\n\n"; $i = 1; } sub print_line { local ($key) = @_; my $name = $addr_to_name{$key}; if ($name eq "") { $name = $key; } my $s = $total_sizes{$key}; printf " %3d: %-30s %5d %5d %5d (%.2d%% noise)\n", $i++, $name, $total_msgs{$key}, $total_new_sizes{$key}, $total_sizes{$key}, ($s ? (100 - int(100 * ($total_new_sizes{$key} / $s))) : 100); } print "\nSorted by messages posted:"; print_header(); foreach $key (sort( { $total_msgs{$b} <=> $total_msgs{$a} } keys %total_msgs)) { print_line($key); } print "\nSorted by original lines posted:"; print_header(); foreach $key (sort( { $total_new_sizes{$b} <=> $total_new_sizes{$a} } keys %total_new_sizes)) { print_line($key); } print "\nSorted by total lines posted:"; print_header(); foreach $key (sort( { $total_sizes{$b} <=> $total_sizes{$a} } keys %total_sizes)) { print_line($key); } print "\n"; } sub blurb { print "\ \"Total Lines\" does not count attachments, only the main body of the message. Messages which contain only attachments count as 0 lines. \"Original lines\" are the subset of \"Total Lines\" that are not preceeded by \">\". \"Noise\" is the other part -- the quoted material. This script doesn't do anything clever with HTML, so the stats will be off for HTML messages (no detection of quoted material, and bloated line counts.) "; } main(); exit 0;