#!/usr/bin/perl -w # Copyright © 2005-2019 Jamie Zawinski # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. # # This code parses the .emlx files used by Mail.app in MacOS 10.4 - 10.14. # Mail.app stores folders as directories with one file per message. # Prior to 10.7, all the messages were in a single folder; in 10.7 and # later, they are distributed into subdirectories. # # Each file contains three sections: # # - the first line is the length in bytes of the message itself # - the raw message follows # - then comes an XML "plist" which contains various parsed attributes # of the message, and any flags that were subsequently set. # # The most interesting parameter in the plist is "flags", which is a 32 bit # quantity indicating read/unread, replied, and other message status. # Those bits are not officially documented, but we do what we can. # # Usage: # # - print a one line summary of all messages in a folder: # # emlx.pl ~/Library/Mail/*/*/FOLDER.mbox/ # # - print a one line summary of unread messages in a folder: # # emlx.pl --unread DIR # # - print the full text of the unread messages, with most uninteresting # header fields stripped: # # emlx.pl --unread --show DIR # # - print a summary of unread messages in your default Inboxes: # # emlx.pl --unread \ # `find ~/Library/Mail/*/INBOX.mbox/Messages -name '*.emlx'` # # - convert a folder to a standard BSD "mbox" file, sorted by date: # # emlx.pl --mbox --sort DIR > folder.mbox # # - list only the Message-IDs in the folder: # # emlx.pl --ids DIR # # - extract From, To and CC from the message: # # emlx.pl --addrs DIR # # Created: 4-Jul-2005. # Discussion: https://jwz.org/b/B7dv require 5; use diagnostics; use strict; use POSIX; use Date::Parse; my $progname = $0; $progname =~ s@.*/@@g; my ($version) = ('$Revision: 1.10 $' =~ m/\s(\d[.\d]+)\s/s); my $verbose = 0; # returns a hash reference with various symbolic keys set # sub parse_flags($) { my ($bits) = @_; my %result = (); $result{'read'} = ($bits & (1 << 0)) >> 0; $result{'deleted'} = ($bits & (1 << 1)) >> 1; $result{'answered'} = ($bits & (1 << 2)) >> 2; $result{'encrypted'} = ($bits & (1 << 3)) >> 3; $result{'flagged'} = ($bits & (1 << 4)) >> 4; $result{'recent'} = ($bits & (1 << 5)) >> 5; $result{'draft'} = ($bits & (1 << 6)) >> 6; # $result{'initial'} = ($bits & (1 << 7)) >> 7; $result{'forwarded'} = ($bits & (1 << 8)) >> 8; $result{'redirected'} = ($bits & (1 << 9)) >> 9; $result{'attach_count'} = ($bits & (0x3F << 10)) >> 10; # 6 bits $result{'priority'} = ($bits & (0x7F << 16)) >> 16; # 5 bits $result{'signed'} = ($bits & (1 << 23)) >> 23; $result{'is_junk'} = ($bits & (1 << 24)) >> 24; $result{'is_not_junk'} = ($bits & (1 << 25)) >> 25; $result{'font_size'} = ($bits & (0x07 << 26)) >> 26; # 3 bits $result{'junk_recorded'} = ($bits & (1 << 29)) >> 29; $result{'highlight'} = ($bits & (1 << 30)) >> 30; # wtf? $result{'attach_count'} = 0 if $result{'attach_count'} == 63; # delete null values foreach my $key (keys %result) { delete $result{$key} if (! $result{$key}); } # print sprintf("## %b: ", $bits) . join(' | ', keys(%result)) . "\n"; return \%result; } sub showfile($$$$) { my ($file, $unread_p, $list_p, $mbox_p) = @_; local *IN; open (IN, "<$file") || error ("$file: $!"); local $/ = undef; # read entire file my $body = ; close IN; my ($length, $xml); $body =~ m/^(\d+)\s*\n(.*)$/s || error ("$file: unparsable length"); ($length, $body) = ($1, $2); $xml = substr ($body, $length) || error ("$file: unparsable body"); $body = substr ($body, 0, $length); $xml =~ m/^<\?xml version/ || error ("$file: misparsed body"); my %props; my @chunks = split (//i, $xml); shift @chunks; foreach (@chunks) { my ($key, $val) = m@^(.*?)\s*(.*)$@s; $val =~ s@\.*$@@s; $val =~ s@^\s*<([^<>]+)>\s*(.*)\s*\s*$@$2@s; $props{$key} = $val; } my $flags = parse_flags($props{'flags'}); return [0, ''] if ($flags->{'deleted'}); # skip deleted messages return [0, ''] if ($unread_p && $flags->{'read'}); # skip read messages my $attach = $flags->{'attach_count'}; # don't care about these delete $flags->{'priority'}; delete $flags->{'is_not_junk'}; delete $flags->{'junk_recorded'}; delete $flags->{'attach_count'}; my $date = undef; my $out = ''; if ($list_p) { $file =~ s@^.*/@@; my $from = $props{'sender'}; my $subj = $props{'subject'}; $date = $props{'date-sent'}; # These are not always present in props, as of 10.7. ($date) = ($body =~ m/^Date\s*:\s*(.+?)\n/mi) unless $date; ($from) = ($body =~ m/^From\s*:\s*(.+?)\n/mi) unless $from; ($subj) = ($body =~ m/^Subject\s*:\s*(.*?)\n/mi) unless $subj; $subj = '' unless defined($subj); $date = str2time ($date) unless ($date =~ m/^\d+$/s); my $d2 = strftime ("%a %b %d %l:%M %p", localtime ($date)) if $date; $from =~ s@<@<@g; $from =~ s@>@>@g; $from =~ s@&@&@g; $subj =~ s@<@<@g; $subj =~ s@>@>@g; $subj =~ s@&@&@g; my $flags_str = join (', ', keys (%$flags)); $flags_str .= " [$attach]" if ($attach); $from = substr ($from, 0, 20); $subj = substr ($subj, 0, 30); my $line = sprintf ("%-20s %-20s %-30s %s", $d2, $from, $subj, $flags_str); $line =~ s/\s+$//s; $out .= "$line\n"; } else { my $headers; $body =~ m/^(.*?\n)\n(.*)$/s || error ("$file: unparsable headers"); ($headers, $body) = ($1, $2); $body =~ s/\s+$//s; $date = $props{'date-sent'}; if (! $date) { ($date) = ($headers =~ m/^Date\s*:\s*(.+?)\r?\n/mi); ($date) = ($body =~ m/^Date\s*:\s*(.+?)\r?\n/mi) unless $date; $date = str2time ($date); } if ($mbox_p == 1) { if ($headers !~ m/^From /s) { my $d2 = strftime ("%a %b %d %T %Y %z", localtime ($date)); $headers = "From - $d2\n$headers"; } $body =~ s/^(From )/>$1/gm; # mangle $out .= "$headers\n$body\n\n"; } elsif ($mbox_p == 2) { $headers =~ s/\n\s+/ /gs; my $headers2 = ''; foreach (split (/\n/, $headers)) { if (m/^Message-ID:\s*([^\s]+)/si) { $out .= "$1\n"; last; } } } elsif ($mbox_p == 3) { $headers =~ s/\n\s+/ /gs; my $headers2 = ''; foreach (split (/\n/, $headers)) { if (m/^(From|To|CC):\s*(.+)$/si) { my $s = $2; foreach my $ss (split(/\s*,+\s+/, $s)) { $ss =~ s/^\s+|\s+$//gs; $out .= "$ss\n"; } } } } else { $headers =~ s/\n\s+/ /gs; my $headers2 = ''; foreach (split (/\n/, $headers)) { $headers2 .= "$_\n" if (m/^(From|To|CC|Subject|Date):/si); } $body =~ s/^(-{10})/- $1/gm; # quoting for digests my $hr = '-' x 72; $out .= "$headers2\n$body\n\n$hr\n"; } } return [ $date, $out ]; } sub dirfiles($); sub dirfiles($) { my ($dir) = @_; my @files = (); $dir =~ s@/+$@@s; opendir (my $dh, $dir) || error ("$dir: $!"); foreach my $f (sort readdir ($dh)) { next if ($f =~ m/^\./si); $f = "$dir/$f"; if (-d $f) { push @files, dirfiles($f); } elsif ($f =~ m/\.emlx$/si) { push @files, $f; } } return @files; } sub error($) { my ($err) = @_; print STDERR "$progname: $err\n"; exit 1; } sub usage() { print STDERR "usage: $progname [--verbose] " . "[--unread|--all] [--list|--sort|--show|--mbox|--ids] file.emlx ...\n"; exit 1; } sub main() { my @files = (); my $unread_p = 0; my $list_p = 1; my $mbox_p = 0; my $sort_p = 0; while ($#ARGV >= 0) { $_ = shift @ARGV; if (m/^--?verbose$/) { $verbose++; } elsif (m/^-v+$/) { $verbose += length($_)-1; } elsif (m/^--?unread$/) { $unread_p = 1; } elsif (m/^--?all$/) { $unread_p = 0; } elsif (m/^--?show$/) { $list_p = 0; } elsif (m/^--?list$/) { $list_p = 1; } elsif (m/^--?sort$/) { $sort_p = 1; } elsif (m/^--?mbox$/) { $mbox_p = 1; $list_p = 0; $unread_p = 0; } elsif (m/^--?ids?$/) { $mbox_p = 2; $list_p = 0; $unread_p = 0; } elsif (m/^--?addrs?$/) { $mbox_p = 3; $list_p = 0; $unread_p = 0; } elsif (m/^-./) { usage; } else { push @files, (-f $_ ? $_ : dirfiles($_)); } } usage unless ($#files >= 0); # Sort files by write date, now that they are scattered in subdirs. my %date; foreach (@files) { $date{$_} = (stat($_))[9] || 9999999999; } @files = sort { $date{$a} <=> $date{$b} } @files; if ($sort_p) { my @msgs = (); foreach (@files) { push @msgs, showfile ($_, $unread_p, $list_p, $mbox_p); } foreach my $p (sort { $a->[0] <=> $b->[0] } @msgs) { print STDOUT $p->[1]; } } else { foreach (@files) { my $p = showfile ($_, $unread_p, $list_p, $mbox_p); print STDOUT $p->[1]; } } print STDERR "$progname: " . scalar(@files) . " messages\n" if ($verbose); } main(); exit 0;