#!/usr/bin/perl -w # Copyright © 2012-2013 Jamie Zawinski # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. # # Converts your Facebook stream to an RSS feed. # # Note that anyone who can see this RSS feed can see your entire Facebook # world, so be careful to protect it. # # Usage: # # 1) create a Facebook Application. You only need to do this once. # # 2) Generate a session key for it, by running this program with # "facebook-rss.pl --generate-session" and following the instructions. # You only need to do this once. # # 3) To generate an RSS feed of your entire Facebook stream: # facebook-rss.pl your-app-name output-file.rss # # If you want only your friends (real people) and not pages that you # have liked, then use --friends-only. # # If you want only pages and not people, use --pages-only. # # (This way you can have one RSS feed of each, if you want.) # # There's also a killfile! # # $HOME/.APPNAME-facebook-kill can contain a list of user names. # posts by those users are excluded from the RSS feed. (You could also # just unfriend those people, but ignoring them might be kinder.) # # Killfile entries can be: # # - User Name # - Application Name (to kill all posts via the given application) # - User Name Photos (to kill only photo-posts by "User Name") # - User Name App Name (to kill all posts by a user using a particular app, # e.g. ("John Smith Twitter") to kill only a user's Twitter crossposts) # # # BUGS: # # - Finding friends tagged in non-friends photos is slow, since that # info doesn't show up on "/me/home". # # - It detects checkins made by your friends, but does not detect # them being checked in by others, e.g., "X is with Y and N others # at Z". "/USER/checkins" and "/USER/locations" seems to always # return nothing, and these 3rd-party checkins don't show up on # "/USER/feed" or "/USER/tagged" either. Even using FQL, the # "checkin" table only lets you query by "author_uid" or "page_id", # not by "tagged_uids". # # - No events. Even "/me/events" is always null. FQL "event_member" # queries are always null. # # Created: 4-Jul-2012. require 5; use diagnostics; use strict; use POSIX; use Time::Local; use LWP::Simple; use JSON::Any; use Data::Dumper; no warnings 'utf8'; my $progname = $0; $progname =~ s@.*/@@g; my $version = q{ $Revision: 1.61 $ }; $version =~ s/^[^\d]+([\d.]+).*/$1/; my $verbose = 0; sub url_quote($) { my ($u) = @_; # utf8::encode($u); # Split wide chars into multi-byte sequences. $u =~ s|([^- a-zA-Z0-9.\@_])|sprintf("%%%02X", ord($1))|gse; $u =~ s| |+|gs; return $u; } sub url_unquote($) { my ($url) = @_; $url =~ s/[+]/ /g; $url =~ s/%([a-z0-9]{2})/chr(hex($1))/ige; return $url; } sub html_quote($) { my ($s) = @_; return undef unless defined ($s); $s =~ s/&/&/gs; $s =~ s//>/gs; return $s; } # Read $HOME/.APPNAME-facebook-kill, return a hashref of users to ignore. # sub load_killfile($) { my ($user) = @_; my $file = $ENV{HOME} . "/.$user-facebook-kill"; my %users; if (open (my $in, '<', $file)) { while (<$in>) { s/^\s+|\s+$//g; s/\s*#.*$//s; next unless $_; $users{lc($_)} = 1; } close $in; } return \%users; } sub killp($$) { my ($kill, $string) = @_; return 0 unless $string; my $s = lc($string); my $s2 = $s; $s2 =~ s/[^a-z\d ]//gsi; # nuke non-ASCII and try again $s =~ s/^\s+|\s+$//gsi; $s2 =~ s/^\s+|\s+$//gsi; if (($s && $kill->{$s}) || ($s2 && $kill->{$s2})) { print STDERR "$progname: killfile: $string\n" if ($verbose > 1); return 1; } return 0; } sub load_access_token($) { my ($app) = @_; my $file = $ENV{HOME} . "/.$app-facebook-pass"; my $token = undef; open (my $in, '<', $file) || error ("$file: $!"); while (<$in>) { if (m/^(?:OAUTH2|ACCESS_TOKEN):\s*(.*?)\s*$/s) { $token = $1; } } close $in; error ("no access token in $file\n" . "run: $progname --generate-session") unless $token; return $token; } # The Internets have discovered these: # # Facebook's iPhone app key: 3e7c78e35a76a9299309885393b02d97 # Facebook's iPhone app secret: c1e620fa708a1d5696fb991c1bde5662 # # Facebook's iPad app key: f0c9c86c466dc6b5acdf0b35308e83d1 # Facebook's iPad app secret: 7c036d47372dd5f2df27bfe76d4ae0c4 # # But how do I turn those into an OAuth2 access_token? # # Aha! This works: just searching for "Facebook for iPhone" on # Facebook reveals that the "App ID / API Key" is 6628568379, # and for "Facebook for iPad", 173847642670370. # # So this URL: # # https://graph.facebook.com/oauth/authorize # ?type=user_agent&client_id=6628568379 # &redirect_uri= # http://www.facebook.com/connect/login_success.html&scope=read_stream # # gives us an OAuth2 access_token that has the magic read-permissions of # the iPhone app, which means that it can read posts even by people who # have blocked apps from reading their stream (assuming that you can # read them yourself). sub facebook_generate_session() { my ($app) = @_; my @perms = ('read_stream', 'user_likes', 'friends_likes', 'user_status', 'friends_status', 'user_checkins', 'friends_checkins', 'user_photos', 'friends_photos', 'user_birthday', 'friends_birthday', 'user_location', 'friends_location', 'user_photo_video_tags', 'friends_photo_video_tags', ); print STDOUT ("1) Go to the page of your Facebook app: " . "Click on \"Apps / More\",\n" . " then \"Developer\" then the app that you created.\n" . " Enter its \"App ID / API Key\" here: "); my $app_id = <>; chomp ($app_id); error ("That's not an app ID: \"$app_id\"") unless ($app_id =~ m/^\d{8}\d+$/s); my $data = fb_load ("app id", 0, $app_id); my $name = $data ? $data->{name} : 0; error ("Unknown app ID: $app_id") unless $name; my $url = ('https://graph.facebook.com/oauth/authorize' . '?type=user_agent' . '&client_id=' . $app_id . '&redirect_uri=' . 'http://www.facebook.com/connect/login_success.html' . '&scope=' . join(',', @perms)); print STDOUT "\n2) Go to this URL:\n\n $url\n\n"; print STDOUT "3) Say yes if there are any questions.\n"; print STDOUT "4) You should be redirected to a page saying 'Success'.\n"; print STDOUT " Enter the URL of that page here: "; $url = <>; chomp ($url); my ($token) = ($url =~ m@access_token=([^?&<>]+)@si); error ("no access_token= in that!") unless $token; my $fn = $ENV{HOME} . "/.$name-facebook-pass"; my $body = ''; if (open (my $in, '<', $fn)) { local $/ = undef; # read entire file $body = <$in>; close $in; } if (! ($body =~ s/^((ACCESS_TOKEN|OAUTH2):[ \t]*)([^\n]*)/$1$token/si)) { $body .= "OAUTH2:\t$token\n"; } open (my $out, '>', $fn) || error ("$fn: $!"); print $out $body; close $out; print STDOUT "\nDone! $fn has been updated with your\n" . "new access token.\n\n"; } # Load a URL from Facebook, convert JSON to hashrefs. # Retry a few times if it fails. # sub fb_load($$$) { my ($which, $ignore_errors, $args) = @_; my $url; if ($args =~ m/^http/s) { $url = $args; } else { $url = 'https://graph.facebook.com/'; $args =~ s/\?/&/g; $args =~ s/&/?/; $url .= $args; } my $obj = undef; my $retries = 10; for (my $i = 0; $i < $retries; $i++) { eval { my $json = LWP::Simple::get ($url); if ($json) { my $j = JSON::Any->new; $obj = $j->jsonToObj ($json); } }; last if defined ($obj); print STDERR "$progname: $which failed, retrying...\n" if ($verbose); sleep 5 + $i; } if (! $obj) { error ("$which failed after $retries tries -- $url") unless ($ignore_errors); return undef; } return $obj; } # Run an FQL query and return the result. # sub fql($$$) { my ($which, $token, $query) = @_; $query =~ s/\s+/ /gs; $query = url_quote ($query); my $url = ('https://graph.facebook.com/fql?q=' . $query . '&access_token=' . $token); my $result = fb_load ($which, 0, $url); return $result; } sub truncate_sentence($) { my ($s) = @_; $s =~ s/\n\n.*$//s; # If there is a sentence-end between 30 and 60 chars, stop there. $s =~ s/(.{30}[.?!]\s).*$/$1/s; # Else if there is a word-end between 40 and 60 chars, stop there. $s =~ s/(.{40}[^\s]+\.*).+$/$1 .../s; # Else truncate at 60 chars regardless. $s =~ s/^(.{60}).+/$1 .../s; $s =~ s/\. \.+$/./s; return $s; } # Hack the HTML bodies to make them more readable. # sub rewrite_html($) { my ($body) = @_; # Instead of going through the app proxy, go to the URL directly # so that we get larger inline images. # $body =~ s@\b( https?:// ( www\.facebook\.com | [^/<>\'\"]*?\.fbcdn\.net | fbexternal[^/<>\'\"]+ ) / ( www/ )? ( app_full_proxy\.php | safe_image\.php ) [^<>\"\']+ ) @{ my $url = $1; my ($redir) = ($url =~ m/(?:src|url)=(http[^&]+)/s); if ($redir) { $redir = url_unquote($redir); print STDERR "$progname: $url\n\t-> $redir\n" if ($verbose > 3); $url = $redir; } $url; }@gsexi; # Use larger Tumblr images. $body =~ s@\b(https?://[^/<>\"\']*?\.tumblr\.com[^<>\"\']*?)_250\.jpg @${1}_500.jpg@gsix; # Use larger FB images. $body =~ s@\b(https?://fbcdn-photos-.*?)_s\.jpg\b@${1}_o.jpg@gsix; # Fuckin' Youtube noise. $body =~ s@ ( & | [?&] ) feature= ( player_embedded | youtu\.be | share | youtube_gdata | relmfu ) \b ( & | [?&] )? @$1@gsix; return $body; } # Creates an HTML blob of a userpic and the user's name, floating left, # sub userpic_html($$$$$) { my ($name, $id, $smaller_p, $app, $app_id) = @_; $id = 'https://www.facebook.com/' . $id if ($id && $id !~ m/^http/s); my $size = ($smaller_p ? 50 : 100); my $photo = $id . '/picture?type=' . ($smaller_p ? 'small' : 'normal'); $photo =~ s@^(https?://)www(\.facebook\.com)@$1graph$2@si; my $html = html_quote ($name) || '???'; $html = "" . "
$html" if $photo; $html = "$html" if $id; $app = undef if ($app && $app =~ m/^(Links | Likes | Photos | Status | Facebook \s for \s (iPhone | Android )) $/sx); if ($app) { $app =~ s/_/ /gs; # "Share_bookmarklet" $html .= "
via $app" if $app; } $html = "
" . "$html
"; return $html; } sub parse_date($) { my ($date) = @_; return undef unless $date; my ($yyyy, $mm, $dd, $h, $m, $s, $tz) = # 2012-07-05T19:24:02+0000 ($date =~ m@^(\d{4})-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)([-+]?\d{4})?$@s); if ($yyyy) { error ("unparsable date: $date") unless $yyyy; error ("unparsable date: $date") if ($tz && $tz != 0); $date = timegm ($s, $m, $h, $dd, $mm-1, $yyyy-1900); } else { # Fri, 31 Aug 2012 09:13:42 PDT ($dd, $mm, $yyyy, $h, $m, $s) = ($date =~ m@^[A-Z][a-z][a-z] ,? \s (\d\d) \s ([A-Z][a-z][a-z]) \s (\d{4}) \s (\d\d) : (\d\d) : (\d\d) \s @sx); error ("unparsable date: $date") unless $yyyy; my %monthvals = ('Jan' => 1, 'Feb' => 2, 'Mar' => 3, 'Apr' => 4, 'May' => 5, 'Jun' => 6, 'Jul' => 7, 'Aug' => 8, 'Sep' => 9, 'Oct' => 10, 'Nov' => 11, 'Dec' => 12 ); $mm = $monthvals{$mm} if $mm; $date = timelocal ($s, $m, $h, $dd, $mm-1, $yyyy-1900); } return $date; } # Returns [time_t, permalink, rss_item]. # sub make_rss_entry($$$$$$$) { my ($entry, $friends, $likes, $kill, $mode, $token, $photop) = @_; my $id = $entry->{id}; my $subj = ($entry->{message} || $entry->{story} || $entry->{name} || ''); my $date = $entry->{updated_time} || $entry->{created_time}; my $from = $entry->{from}; my $name = $from->{name}; # Figure out whether this entry is from a friend or a page. # It's kind of tricky because they are not clearly identified. # { my $f2; my $friend_p = 0; my $page_p = 0; if (($f2 = $friends->{$from->{id}})) { # If it's in the friends array, it's definitely a friend. $friend_p = 1; } elsif (($f2 = $likes->{$from->{id}})) { # If it's in the likes array, it's definitely a like. $page_p = 1; } elsif ($from->{category}) { # People don't have categories. But not all pages have categories. $page_p = 1; } else { # Could be either... # If we have a list of friends, and it's not there, assume page, # and vice versa. If it's not in either list, assume page. if ($friends) { $page_p = 1; } elsif ($likes) { $friend_p = 1; } else { $page_p = 1; } } if ($friend_p) { return undef if ($mode eq 'pages'); } elsif ($page_p) { return undef if ($mode eq 'friends'); } } $id =~ s/^.*_//s; # "xxxx_yyyy" => "yyyy" my $app = $entry->{application}; my $app_name = $app ? $app->{name} || $app->{namespace} : undef; my $app_id = $app ? $app->{id} : undef; return undef if (killp ($kill, $name) || ($entry->{picture} && killp ($kill, $name. ' photos')) || ($app->{name} ? (killp ($kill, $app->{name}) || killp ($kill, $name . " " . $app->{name})) : 0) || ($app->{namespace} ? (killp ($kill, $app->{namespace}) || killp ($kill, $name . " " . $app->{namespace})) : 0)); my $post_url = (($entry->{link} && $entry->{link} =~ m@^https?:www\.facebook\.com/@si) ? $entry->{link} : ($photop ? ('https://www.facebook.com/photo.php?fbid=' . $id) : ('https://www.facebook.com/' . $from->{id} . '/posts/' . $id))); my $perm = ($post_url =~ m@^http@s ? "true" : "false"); my $time_t = parse_date ($date); $date = POSIX::strftime ("%a, %d %b %Y %H:%M:%S %Z", localtime ($time_t)); $subj =~ s/^\s+//gs; $subj =~ s/\s+$//gs; if (! $subj && $entry->{picture}) { $subj = 'Photo'; } my $obody = $subj; my $html = html_quote ($subj); $subj = html_quote (truncate_sentence ($subj)); $html =~ s/\n/
/gs; $html =~ s@\b(https?://[^\s]+?[a-z\d/]) ( \s | $ ) @$1$2@gsix; if ($entry->{place}) { my $place = $entry->{place}; my $name = $place->{name} || $place->{title} || '???'; $subj = "Checked in at $name" unless $subj; $html .= ('

Checked in at ' . html_quote ($name) . ''); } if ($entry->{to}) { my @tto = (); my @hto = (); foreach my $tt (@{$entry->{to}->{data}}) { my $name = $tt->{name}; next if ($subj =~ m/^Via \Q$name/si); push @tto, $name; push @hto, ('" . $name . ''); } my $tto = join (', ', @tto); my $hto = join (', ', @hto); if (@tto == 1) { # A single "to" means "writing on wall". # A bunch of "to"s means tagging, I guess. Not in subject. $subj = "--> $tto: $subj"; } $html .= "
→ $hto." if $hto; } # Sometimes "message" is one thing and "story" is "XYZ shared ABC's Event". # However there is no link to said event to be found anywhere. Nice. # my $s2 = $subj || ''; $s2 =~ s/[\s.]+$//s; if ($subj && $entry->{story} && $entry->{story} !~ m/\Q$s2/si) { $html .= "

" . html_quote ($entry->{story}); } # Sometimes posted events have a link but no title or picture. # Sometimes they have no info at all. # Facebook's API is some bullshit. # if (!$entry->{picture} && (($entry->{status_type} && $entry->{status_type} eq 'created_event') || # Gaah, not all "shared_story" contain a shared event. ($entry->{story} && $entry->{story} =~ m/shared .*\'s event|shared an event/si))) { find_event ($entry); } if ($entry->{link} || $entry->{source}) { my $link_url = $entry->{link} || $entry->{source}; my $link_title = $entry->{name} || $entry->{caption} || ''; my $link_cap = ($entry->{caption} && $entry->{name} ? ($entry->{caption} =~ m@^www\.|\.com$@i ? $entry->{name} : $entry->{caption}) : ($entry->{caption} || $entry->{name} || '')); my $link_pic = $entry->{picture}; my $link_desc = $entry->{description}; # Apparently the API will only give us 1 picture even if there # were more in this entry. It's not just "me/home/" -- even # loading just this entry only gives us one picture. $link_cap = '' if ($link_cap eq $link_title); $html .= '

'; $html .= ''; $html .= ('') if ($link_pic); $html .= html_quote ($link_title) if ($link_title); $html .= '
' . html_quote ($link_cap) if ($link_cap); $html .= '
'; $html .= '
' . html_quote ($link_desc) if ($link_desc); $html .= '
' if $link_pic; # Inline Youtube and Vimeo videos. # if ($link_url =~ m@\b(youtube)\.com/.*v[?=/]([^<>?&]+)@si || $link_url =~ m@\b(vimeo)\.com/(\d+)@si || $link_url =~ m@\b(vimeo)\.com/.*/videos/(\d+)@si) { my $id = $2; my $url = ($1 eq 'youtube' ? "http://www.youtube.com/embed/$id?version=3" : "http://player.vimeo.com/video/$id"); my $w = 853; my $h = int ($w / (16 / 9)); $html .= ("

"); } } if ($entry->{tags}) { my @tags = (); foreach my $tag (@{$entry->{tags}->{data}}) { push @tags, ($tag->{id} ? ('' . html_quote ($tag->{name}) . '') : html_quote ($tag->{name})); } my $lcount = $#tags + 1; my $tags = '

With ' . (($#tags > 0 ? join(", ", @tags[0..($#tags-1)]) . " and " : "") . $tags[$#tags] . '.'); $html .= "

$tags"; } if ($entry->{likes}) { my @likes = (); my $max = 10; my $got = $entry->{likes}->{data} ? @{$entry->{likes}->{data}} : 0; my $total = $entry->{likes}->{count}; if ($entry->{likes}->{data}) { my $i = 0; foreach my $like (@{$entry->{likes}->{data}}) { next unless $like->{id}; next if (killp ($kill, $like->{name})); push @likes, ('' . $like->{name} . ''); last if (++$i > $max); } } $total = @likes unless defined($total); # WTF my $likes; if ($total == 0) { $likes = ''; } elsif (@likes == 0) { $likes = "$total like this."; } elsif (@likes == $total) { if ($total == 1) { $likes = $likes[0] . " likes this."; } else { $likes = (join(", ", @likes[0..($#likes-1)]) . " and " . $likes[$#likes] . " " . ($#likes == 0 ? "likes" : "like") . " this."); } } else { $likes = (join(", ", @likes) . " and " . ($total - @likes) . " others like this."); } $html .= "

$likes" if $likes; } if ($entry->{comments} && $entry->{comments}->{count}) { # Apparently "me/home/" will only give us 2 comments on certain entries. # If there are more comments there than we have, re-load the comment # data to get more. my $got = $entry->{comments}->{data} ? @{$entry->{comments}->{data}} : 0; my $more = $entry->{comments}->{count} - $got; my $omore = $more; if ($more) { print STDERR "$progname: " . $entry->{id} . ": loading $more more comments\n" if ($verbose > 1); my $c2 = fb_load ("entry comments", 1, $entry->{id} . "&fields=comments&access_token=$token"); if ($c2 && $c2->{comments} && $c2->{comments}->{data}) { $entry->{comments} = $c2->{comments}; $got = $entry->{comments}->{data} ? @{$entry->{comments}->{data}} : 0; $more = $entry->{comments}->{count} - $got; # Doesn't work at all for "updated their cover photo". # Don't complain if we only got the first 50 comments. print STDERR "$progname: " . $entry->{id} . ": only got $got of " . $entry->{comments}->{count} . " comments!\n" if ($more && $verbose > 1 && $entry->{comments}->{count} <= 50); } else { print STDERR "$progname: " . $entry->{id} . ": failed to load more comments!\n" if ($verbose > 1); } } $html .= "


"; if ($entry->{comments}->{data}) { foreach my $comm (sort { ($a->{created_time} || 0) cmp ($b->{created_time} || 0) } @{$entry->{comments}->{data}}) { my $cfrom = $comm->{from}; next unless defined($cfrom); my $cdate = $comm->{created_time}; my $cid = $comm->{id}; my $cbody = $comm->{message}; my $clike = $comm->{like_count}; $cdate = parse_date ($cdate); $cdate = POSIX::strftime ("%I:%M %p", localtime ($cdate)); my $cname = $cfrom->{name}; my $ccurl = "https://www.facebook.com/" . $cid; my $cfurl = "https://www.facebook.com/" . $cfrom->{id}; if ($cid =~ m@^\d+_\d+_(\d+)$@si) { $ccurl = ($post_url . ($post_url =~ m/[?]/s ? '&' : '?') . 'comment_id=' . $1); } $cbody = html_quote ($cbody); $cbody =~ s@\b(https?://[^\s]+?[a-z\d/])(\s|$) @$1$2@gsix; $cbody = ('

' . '

' . userpic_html ($cname, $cfurl, 1, undef, undef) . "$cdate: " . $cbody . ($clike ? " ($clike like)" : '') . '
'); $html .= $cbody; } } # If we don't have all the comments, say so. $html .= "

($more more comment" . ($more == 1 ? "" : "s") . ".)

" if ($more); } $html = "[blank]" unless $html; $html = userpic_html ($name, $from->{id}, 0, $app_name, $app_id) . $html; $html =~ s/\n\n+/

/gsi; $html = rewrite_html ($html); my $rss = (" \n" . " $post_url\n" . " $name\n" . " $subj\n" . " $obody\n" . " $html]]>\n" . " $date\n" . " \n"); print STDERR "$progname: $name: $date: $subj $post_url\n" if ($verbose > 2); if (0) { my $x = Dumper($entry); if ($x =~ m@shared[^<>]*?event@si) { $entry->{actions} = ''; $entry->{comments} = ''; $entry->{likes} = ''; $x = Dumper($entry); $html =~ s/{id}; my ($a, $b) = ($id =~ m/^(.*)_(.*)/si); if (! $b) { print STDERR "$progname: unparsable ID: $id\n"; return; } my $url = "https://www.facebook.com/$a/posts/$b"; print STDERR "$progname: " . $entry->{id} . ": loading event info\n" if ($verbose > 1); # We get an "Upgrade your browser" error message without this. $LWP::Simple::ua->agent("$progname/$version"); my $body = LWP::Simple::get ($url) || ''; my ($event, $img) = ($body =~ m@]*? HREF= ["'] ( /events [^"']+ ) [^<>]* > \s* ]*? SRC= ["'] ( [^"']+ ) [^<>]* > @six); if (! $event) { if ($body =~ m/You must log in to see this page/si) { print STDERR "$progname: $id: page is protected: $url\n" if ($verbose); } else { # print STDERR "$progname: $id: no event link in $url\n"; #print "######\n" . Dumper($entry) . "\n\n######\n$body\n"; } $entry->{story} .= ' [permission denied]'; return; } my ($title) = ($body =~ m@]*? HREF= ["'] \Q${event}\E [^<>]* > \s* ( [^<>]+ ) \s* @six); $title = "Error: couldn't find title" unless $title; $entry->{link} = $event; $entry->{caption} = $title; $entry->{picture} = $img if $img; } sub make_photo_rss_entry($$$$$$$) { my ($id, $tag, $friends, $likes, $kill, $mode, $token) = @_; my $query = ($id . '?access_token=' . $token); my $entry = fb_load ("photo", 0, $query); if (!$entry || !$entry->{id}) { print STDERR "$progname: no info for photo $id\n" if ($verbose > 1); return undef; } $entry->{message} = ("$tag was tagged in " . $entry->{from}->{name} . "'s photo" . ($entry->{name} ? ", \"" . $entry->{name} . "\"." : "")); return make_rss_entry ($entry, $friends, $likes, $kill, $mode, $token, 1); } # Returns a list of the entries in the output RSS file. # This is necessary because Facebook sometimes leaves shit out of the # stream: you load the stream twice, and some old entries might vanish. # So we don't ever let them vanish, and hopefully after a few runs, # we'll actually have the whole thing. # sub load_rss($) { my ($file) = @_; my $old = ''; if (open (my $in, '<:utf8', $file)) { local $/ = undef; # read entire file $old = <$in>; close $in; } $old =~ s/().*?$@$1@si if (@items); return @items; } sub generate_rss($$$$) { my ($app, $outfile, $mode, $since) = @_; my $token = load_access_token ($app); my $kill = load_killfile ($app); my $result = fb_load ("name", 0, "me?fields=name&access_token=$token"); my $my_name = $result->{name}; my %friends; my %likes; if (1||$mode ne 'pages') { print STDERR "$progname: listing friends...\n" if ($verbose); my @friends = (); my $query = 'me/friends?access_token=' . $token; while ($query) { $result = fb_load ("friends", 0, $query); my $f = $result->{data}; push @friends, @$f; $query = $result->{paging}; $query = $query->{next} if ($query); $query .= "&access_token=$token" if ($query); } foreach my $f (@friends) { $friends{$f->{id}} = $f; } } if (1||$mode ne 'friends') { print STDERR "$progname: listing pages...\n" if ($verbose); my @likes = (); my $query = 'me/likes?access_token=' . $token; while ($query) { $result = fb_load ("likes", 0, $query); my $f = $result->{data}; push @likes, @$f; $query = $result->{paging}; $query = $query->{next} if ($query); $query .= "&access_token=$token" if ($query); } foreach my $f (@likes) { $likes{$f->{id}} = $f; } } # Feed validator demands a . my $rss_url = 'http://www.facebook.com/'; my $rss_title = "${my_name}'s Facebook Feed"; print STDERR "$progname: loading stream...\n" if ($verbose); my $query = ('me/home' . '?limit=' . $since . '&access_token=' . $token); $result = fb_load ("stream", 0, $query); $result = $result->{data}; my @new_entries = (); foreach my $hash ($result ? @$result : ()) { my $entry = make_rss_entry ($hash, \%friends, \%likes, $kill, $mode, $token, 0); next unless $entry; push @new_entries, $entry; } # Checkins don't show up on "me/home" so we have to read them separately. # if ($mode ne 'pages') { print STDERR "$progname: loading checkins...\n" if ($verbose); # Not sure limit= is working here. I'm seeing days-old checkins. $query = ('search?type=checkin' . '&limit=' . $since . '&access_token=' . $token); $result = fb_load ("checkins", 0, $query); $result = $result->{data}; foreach my $hash ($result ? @$result : ()) { my $photop = defined ($hash->{picture}); # yeah, never happens my $entry = make_rss_entry ($hash, \%friends, \%likes, $kill, $mode, $token, 0); next unless $entry; push @new_entries, $entry; } # Likewise, photo tags. Need to use FQL for these! # print STDERR "$progname: loading photo tags...\n" if ($verbose); $result = fql ("photo tags", $token, "SELECT object_id,text,created FROM photo_tag WHERE subject IN (SELECT uid2 FROM friend WHERE uid1 = me()) AND created > $since"); $result = $result->{data}; foreach my $hash ($result ? @$result : ()) { my $id = $hash->{object_id}; my $tag = $hash->{text}; my $entry = make_photo_rss_entry ($id, $tag, \%friends, \%likes, $kill, $mode, $token); next unless $entry; push @new_entries, $entry; } } my @rss = (); my %dups; foreach my $entry (sort { $b->[0] <=> $a->[0] } @new_entries) { my ($tt, $url, $item) = @$entry; if ($dups{$url}) { print STDERR "$progname: skipping dup entry for $url\n" if ($verbose > 1); next; } $dups{$url} = $item; push @rss, $item; } my @old_entries = load_rss ($outfile); foreach my $item (@old_entries) { my ($date) = ($item =~ m@(.*?)<@si); my ($url) = ($item =~ m@]*>(.*?)<@si); next unless $url; my $old = $dups{$url} || ''; $old =~ s/^\s+//s; $old =~ s/\s+$//s; $item =~ s/^\s+//s; $item =~ s/\s+$//s; if (! $old) { print STDERR "$progname: preserved vanished old entry for $url\n" if ($verbose > 2); } else { if ($old eq $item) { print STDERR "$progname: unchanged entry for $url\n" if ($verbose > 2); } else { print STDERR "$progname: updating entry for $url\n" if ($verbose > 2); } next; } $date = parse_date ($date); if ($date && $date < $since) { print STDERR "$progname: expiring old entry for $url\n" if ($verbose > 1); next; } push @rss, $item; } my $count = @rss; my $desc = "$count entries"; my $oentries = join ('', @old_entries); my $nentries = join ('', @rss); if ($oentries eq $nentries) { print STDERR "$outfile: unchanged$desc\n" if ($verbose); } else { my $rss = ("\n" . "\n" . " \n" . " $rss_url\n" . " $rss_title\n" . " $rss_title\n" . " en\n" . $nentries . " \n" . "\n"); $desc .= ", " . keys(%friends) . " friends" if ($mode ne 'pages'); $desc .= ", " . keys(%likes) . " pages" if ($mode ne 'friends'); $desc = " ($desc)"; my $file_tmp = "$outfile.tmp"; open (my $out, '>', $file_tmp) || error ("$file_tmp: $!"); (print $out $rss) || error ("$file_tmp: $!"); close $out; if (!rename ("$file_tmp", "$outfile")) { unlink "$file_tmp"; error ("mv $file_tmp $outfile: $!"); } print STDERR "$progname: wrote $outfile$desc\n" if ($verbose); } } sub error($) { my ($err) = @_; print STDERR "$progname: $err\n"; exit 1; } sub usage() { print STDERR "usage: $progname [--verbose] " . "[ --pages-only | --friends-only]\n" . "\t\t\tappname outfile\n"; print STDERR "usage: $progname [--verbose] --generate-session\n"; exit 1; } sub main() { my ($app, $mode, $gen_p, $file, $since); $mode = 'both'; $since = time() - (60 * 60 * 24); # 1 day ago while ($#ARGV >= 0) { $_ = shift @ARGV; if (m/^--?verbose$/) { $verbose++; } elsif (m/^-v+$/) { $verbose += length($_)-1; } elsif (m/^--?pages?(-only)?$/) { $mode = 'pages'; } elsif (m/^--?friends?(-only)?$/) { $mode = 'friends'; } elsif (m/^--?gen(erate(-session)?)?$/) { $gen_p = 1; } elsif (m/^-./) { usage; } elsif (!$app) { $app = $_; } elsif (!$file) { $file = $_; } else { usage; } } $Data::Dumper::Indent = 1; $Data::Dumper::Terse = 1; $Data::Dumper::Quotekeys = 0; $Data::Dumper::Pair = "\t=> "; $Data::Dumper::Pad = " "; if ($gen_p) { facebook_generate_session(); } else { usage unless $app; usage unless $file; generate_rss ($app, $file, $mode, $since); } } main(); exit 0;