#!/usr/bin/perl -w # Copyright © 2014 Jamie Zawinski # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. # # Archives all of your Twitter direct messages. # # Create files like "DIR/TwitterHandle.txt" which contain every direct-message # conversation you've had with that person. The file will be updated as new # messages arrive. # # Unfortunately, Twitter seems not to let you retrieve anything more than # about a year old. So for those old messages, you're just screwed. # That's what you get for trusting your data to someone else! # # Usage: # # 1) create a Twitter Application. You only need to do this once. # # 2) Generate a session key for it, by running this program with # "twit-backup.pl --generate-session" and following the instructions. # You only need to do this once. # # 3) twit-backup.pl --user TWITTER-ID BACKUP-DIRECTORY/ # # Created: 12-Jan-2014. require 5; use diagnostics; use strict; use POSIX; use Date::Parse; use Net::Twitter; use open ":encoding(utf8)"; my $progname = $0; $progname =~ s@.*/@@g; my $version = q{ $Revision: 1.3 $ }; $version =~ s/^[^\d]+([\d.]+).*/$1/; my $verbose = 0; my $debug_p = 0; sub load_keys($) { my ($user) = @_; my ($consumer, $consumer_secret, $access, $access_secret); # Read our twitter tokens error ("no \$HOME") unless defined($ENV{HOME}); my $twitter_pass_file = "$ENV{HOME}/.$user-twitter-pass"; if (open (my $in, '<', $twitter_pass_file)) { print STDERR "$progname: read $twitter_pass_file\n" if ($verbose > 1); while (<$in>) { s/#.*$//s; if (m/^\s*$/s) { } elsif (m/^consumer\s*[=:]\s*(.*?)\s*$/si) { $consumer = $1; } elsif (m/^consumer_secret\s*[=:]\s*(.*?)\s*$/si) { $consumer_secret = $1; } elsif (m/^access\s*[=:]\s*(.*?)\s*$/si) { $access = $1; } elsif (m/^access_secret\s*[=:]\s*(.*?)\s*$/si) { $access_secret = $1; } else { error ("$twitter_pass_file: unparsable line: $_"); } } close $in; } error ("no access tokens in $twitter_pass_file\n\n" . "\t\trun: $progname --generate-session\n") unless ($consumer && $consumer_secret && $access && $access_secret); return ($consumer, $consumer_secret, $access, $access_secret); } sub twit_generate_session($) { my ($user) = @_; print STDOUT ("1) Go here: https://dev.twitter.com/apps\n" . " Click on the name of the app that you created.\n" . " It should have the same name as your Twitter account (\"$user\").\n" . "\n" . "2) On the \"Settings\" tab, make sure that the \"Application Type\" is\n" . " \"Read, Write and Access direct messages\". If it isn't, change\n" . " it, then go to the \"Details\" tab and click \"Recreate my access\n" . " token\". Hit reload until you see the change take effect.\n" . "\n" . "3) Go to the \"Details\" tab. Enter the \"Consumer key\" here: " ); my $consumer = <>; chomp ($consumer); error ("That's not a consumer key: \"$consumer\"") unless ($consumer =~ m/^[-_a-zA-Z0-9]{16,}$/s); print STDOUT "4) Enter the \"Consumer Secret\" here: "; my $consumer_secret = <>; chomp ($consumer_secret); error ("That's not a consumer secret: \"$consumer_secret\"") unless ($consumer_secret =~ m/^[-_a-zA-Z0-9]{40,}$/s); print STDOUT "5) Enter the \"Access token\" here: "; my $access = <>; chomp ($access); error ("That's not an access token: \"$access\"") unless ($access =~ m/^[-_a-zA-Z0-9]{40,}$/s); print STDOUT "6) Enter the \"Access token secret\" here: "; my $access_secret = <>; chomp ($access_secret); error ("That's not an access token secret: \"$access_secret\"") unless ($access_secret =~ m/^[-_a-zA-Z0-9]{40,}$/s); my $fn = $ENV{HOME} . "/.$user-twitter-pass"; my $body = ''; if (open (my $in, '<', $fn)) { local $/ = undef; # read entire file $body = <$in>; close $in; } $body .= "CONSUMER:\t $consumer\n" unless ($body =~ s/^((CONSUMER)[ \t]*[=:][ \t]*)([^\n]*)/$1$consumer/mi); $body .= "CONSUMER_SECRET: $consumer_secret\n" unless ($body =~ s/^((CONSUMER_SECRET)[ \t]*[=:][ \t]*)([^\n]*)/$1$consumer_secret/mi); $body .= "ACCESS:\t\t $access\n" unless ($body =~ s/^((ACCESS)[ \t]*[=:][ \t]*)([^\n]*)/$1$access/mi); $body .= "ACCESS_SECRET:\t $access_secret\n" unless ($body =~ s/^((ACCESS_SECRET)[ \t]*[=:][ \t]*)([^\n]*)/$1$access_secret/mi); open (my $out, '>', $fn) || error ("$fn: $!"); print $out $body; close $out; system ("chmod", "og-rw", $fn); print STDOUT "\nDone! $fn has been updated with your\n" . "new access tokens. Keep them secret.\n\n"; } sub twit_backup($$) { my ($user, $outdir) = @_; my $since = time() - (60 * 60 * 24 * 2); # 2 days ago $outdir =~ s@/+$@@gs; error ("no such directory: $outdir") unless (-d $outdir); my ($consumer, $consumer_secret, $access, $access_secret) = load_keys($user); my $nt = Net::Twitter->new ( traits => [qw/OAuth API::RESTv1_1 WrapError/], ssl => 1, # Required as of 7-Jan-2014 consumer_key => $consumer, consumer_secret => $consumer_secret, access_token => $access, access_token_secret => $access_secret, ); # To re-download all of them back to the beginning of time, do this: # $since = undef; # But that shouldn't be necessary, since if the output file doesn't # exist, it always downloads all of them the first time. my %users; my @result = (); my $count = 0; for (my $sentp = 0; $sentp <= 1; $sentp++) { my $last_id = undef; my $done = 0; while (!$done) { my %args; $args{count} = 1000; $args{max_id} = $last_id if $last_id; # It's really easy to get rate-limited... wait it out. # my $ret = $nt->rate_limit_status(); my $lim = $ret->{resources}->{direct_messages} ->{'/direct_messages'}->{remaining}; error ("unable to read rate-limit status") unless defined($lim); if ($lim <= 0) { my $until = $ret->{resources}->{direct_messages} ->{'/direct_messages'}->{reset}; my $secs = $until - time() + 1; print STDERR "$progname: rate-limited until " . strftime ("%I:%M:%S %p", localtime ($until)) . ": sleeping for $secs secs...\n"; sleep ($secs); } $ret = ($sentp ? $nt->sent_direct_messages (\%args) : $nt->direct_messages (\%args)); error ($nt->http_message) unless $ret; print STDERR "$progname: loaded " . scalar(@$ret) . " messages since ". ($last_id || '0') . "\n" if ($verbose > 1); last if (@$ret == 1); # if there's only one message, we're at the end. foreach my $msg (@$ret) { my $id = $msg->{id}; my $from = ($sentp ? $msg->{recipient} : $msg->{sender}); my $uid = $from->{screen_name}; my $name = $from->{name}; my $txt = $msg->{text}; my $date = $msg->{created_at}; $uid = lc($uid); $date = str2time ($date); my $d2 = strftime ("%a %d %b %Y %I:%M %p", localtime ($date)); # Expand the damned URLs. # $txt =~ s%\b(https?://[^<>\"\']+)%{ my $url = $1; my $ok = 0; foreach my $e (@{$msg->{entities}->{urls}}) { if ($e->{url} eq $url) { my $u2 = $e->{expanded_url}; print STDERR "$progname: $url -> $u2\n" if ($verbose > 3); $ok = 1; $url = $u2; last; } } print STDERR "$progname: unhandled: $url\n" if (!$ok && $verbose > 3); $url; }%gsexi; $txt =~ s/\n/\n\t/gs; $txt =~ s/^\s+|\s+$//gs; my $io = $sentp ? '>' : '<'; $txt = "$io $d2\t$name \@$uid\t$txt"; if (! defined($users{$uid})) { my %T; $users{$uid} = \%T; } my $T = $users{$uid}; if (! defined($T->{since})) { # If the file doesn't exist, grab them all. my $outfile = "$outdir/$uid.txt"; $T->{since} = (-f $outfile ? $since : 0); } if ($date <= $T->{since}) { print STDERR "$progname: that's far enough back.\n" if ($verbose); $done = 1; last; } $T->{$id} = [ $date, $txt ]; print STDERR "$progname: $id\t$txt\n" if ($verbose > 3); $last_id = $id; $count++; } } } print STDERR "$progname: $count msgs from " . scalar(keys (%users)) . " users\n" if ($verbose > 1); foreach my $uid (sort keys (%users)) { my @msgs = (); my $T = $users{$uid}; foreach my $id (keys %$T) { next if ($id eq 'since'); push @msgs, $T->{$id}; } print STDERR "$progname: " . scalar(@msgs) . " msgs from \@$uid\n" if ($verbose > 1); @msgs = sort { $a->[0] <=> $b->[0] } @msgs; my $outfile = "$outdir/$uid.txt"; my $old = ''; if (open (my $in, '<:utf8', $outfile)) { local $/ = undef; # read entire file $old = <$in>; close $in; } $old =~ s/\n\t/\r/gs; my @old = split(/\n/, $old); my %old; my $old_count = 0; foreach my $o (@old) { $o =~ s/\r/\n\t/gs; $old{$o} = 1; $old_count++; } my $changed = 0; foreach my $msg (@msgs) { next if ($old{$msg->[1]}); $old .= $msg->[1] . "\n"; print STDERR "$progname: new: " . $msg->[1] . "\n" if ($verbose > 2); $changed++; } if ($changed) { if (! $debug_p) { my $tmp = "$outfile.tmp"; open (my $out, '>:utf8', $tmp) || error ("$outfile: $!"); print $out $old; if (!rename ($tmp, $outfile)) { unlink "$tmp"; error ("mv $tmp $outfile: $!"); } } print STDERR "$progname: " . ($debug_p ? "not writing" : "wrote") . " $outfile" . " ($changed new, $old_count old)\n" if ($verbose); } elsif ($verbose > 1) { print STDERR "$progname: $outfile unchanged\n"; } } } sub error($) { my ($err) = @_; print STDERR "$progname: $err\n"; exit 1; } sub usage() { print STDERR "usage: $progname [--verbose] [--debug] [--user name] dir\n"; print STDERR "usage: $progname --generate-session\n"; exit 1; } sub main() { my $user = $ENV{USER}; my $dir = undef; my $gen_p = undef; while ($#ARGV >= 0) { $_ = shift @ARGV; if (m/^--?verbose$/) { $verbose++; } elsif (m/^-v+$/) { $verbose += length($_)-1; } elsif (m/^--?debug$/) { $debug_p++; } elsif (m/^--?user$/) { $user = shift @ARGV; } elsif (m/^--?gen(erate(-session)?)?$/) { $gen_p = 1; } elsif (m/^-./) { usage; } elsif (! $dir) { $dir = $_; } else { usage; } } if ($gen_p) { twit_generate_session($user); } else { usage unless $dir; twit_backup ($user, $dir); } } main(); exit 0;