#!/opt/local/bin/perl -w # Copyright © 2012-2021 Jamie Zawinski # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. # # Converts your Facebook stream to an RSS feed. # # Note that anyone who can see this RSS feed can see your entire Facebook # world, so be careful to protect it. # # Usage: # # Before using this, you need to set up your authentication keys, once. # It's a pain in the ass. Do it by running this: # # facebook-rss.pl --generate-session # # and following the instructions. # # To generate an RSS feed of your entire Facebook stream: # facebook-rss.pl your-app-name output-file.rss # # If you want only your friends (real people) and not pages that you # have liked, then use --friends-only. # # If you want only pages and not people, use --pages-only. # # (This way you can have one RSS feed of each, if you want.) # # There's also a killfile! # # $HOME/.APPNAME-facebook-kill can contain a list of user names. # posts by those users are excluded from the RSS feed. (You could also # just unfriend those people, but ignoring them might be kinder.) # # Killfile entries can be: # # - User Name # - Application Name (to kill all posts via the given application) # - Company Name (to kill all app posts by a given creator, e.g. "Zynga") # - User Name Photos (to kill only photo-posts by "User Name") # - User Name Events (to kill only event "going" posts by "User Name") # - User Name App Name (to kill all posts by a user using a particular app, # e.g. ("John Smith Twitter") to kill only a user's Twitter crossposts) # - "Happy Birthday", to kill every post beginning with those two words. # (This one is a special case, not a general-purpose filter). # # # # This script can also archive all of your direct messages: # # facebook-rss.pl your-app-name output-directory # # When used in this form, it will create files like "DIR/User Name.txt" # which contain every private-message conversation you've had with that # person. The file will be updated as new messages arrive. It also # saves any private-message images in an Attachments/ subdirectory. # (This isn't related to RSS, but it was easier to put this feature # here than to have it be a standalone program.) # # # BUGS: # # - As of September 2016, this doesn't work very well any more. # It still works fine for pages, but not for the feed of your personal # friends. Facebook changed the API so that apps can't access that # crap any more -- only their own privileged apps can. So to get your # own feed, that means it needs to scrape HTML, which means it needs # an HTTP login cookie. This code can grab your Facebook cookies out # of Safari. If you don't use Safari... code needs to be written. # (However, it does cache those cookies in the auth file, so you can # copy that auth file to a non-desktop machine that this runs on.) # # - It's REALLY slow. It used to be faster, but Facebook is a moving # target. There used to be a way to just load what's in your news # feed ("/me/home") but that stopped working, so now we have to # construct the feed by getting a list of your friends, and then # loading the most recent things on each of those pages. Presumably # the Facebook mobile apps are doing something more efficient than # that, but I haven't figured out what it is. # # - Finding friends tagged in non-friends photos is slow, since that # info doesn't show up on "/me/home". # # - It detects checkins made by your friends, but does not detect # them being checked in by others, e.g., "X is with Y and N others # at Z". "/USER/checkins" and "/USER/locations" seems to always # return nothing, and these 3rd-party checkins don't show up on # "/USER/feed" or "/USER/tagged" either. Even using FQL, the # "checkin" table only lets you query by "author_uid" or "page_id", # not by "tagged_uids". # # Update: wait, this magically started working? Feb 2015. # # - Events are slow. Even "/me/events" is always null. FQL "event_member" # queries are always null. # # Created: 4-Jul-2012. require 5; use diagnostics; use strict; use POSIX; use Fcntl; use Fcntl ':flock'; # import LOCK_* constants use Time::Local; use Date::Parse; use LWP::UserAgent; use LWP::Simple; use JSON::Any; use Digest::SHA1 (qw{sha1_hex}); use HTTP::Cookies; use Data::Dumper; no warnings 'utf8'; my $progname = $0; $progname =~ s@.*/@@g; my ($version) = ('$Revision: 1.187 $' =~ m/\s(\d[.\d]+)\s/s); my $verbose = 0; my $debug_p = 0; sub url_quote($) { my ($u) = @_; # utf8::encode($u); # Split wide chars into multi-byte sequences. $u =~ s|([^- a-zA-Z0-9.\@_])|sprintf("%%%02X", ord($1))|gse; $u =~ s| |+|gs; return $u; } sub url_unquote($) { my ($url) = @_; $url =~ s/[+]/ /g; $url =~ s/%([a-z0-9]{2})/chr(hex($1))/ige; return $url; } sub html_quote($) { my ($s) = @_; return undef unless defined ($s); $s =~ s/&/&/gs; $s =~ s//>/gs; return $s; } # Convert any HTML entities to Unicode characters. # sub html_unquote($) { my ($s) = @_; return HTML::Entities::decode_entities ($s); } # Read $HOME/.APPNAME-facebook-kill, return a hashref of users to ignore. # sub load_killfile($) { my ($user) = @_; my $file = $ENV{HOME} . "/.$user-facebook-kill"; my %users; if (open (my $in, '<', $file)) { while (<$in>) { s/^\s+|\s+$//g; s/\s*#.*$//s; next unless $_; $users{lc($_)} = 1; } close $in; } return \%users; } sub kill_string_p($$) { my ($kill, $string) = @_; return 0 unless $string; my $s = lc($string); $s =~ s/^\s+|\s+$//gsi; return 1 if ($s && $kill->{$s}); $s =~ s/[^a-z\d ]//gsi; # nuke non-ASCII and try again $s =~ s/^\s+|\s+$//gsi; return 1 if ($s && $kill->{$s}); return 0; } sub kill_entry_p($$) { my ($kill, $entry) = @_; return 0 unless $entry; my $id = $entry->{object_id} || $entry->{id}; my $name = ($entry->{from} ? $entry->{from}->{name} : '???'); if (kill_string_p ($kill, $name)) { print STDERR "$progname: killfile: $id: name: $name\n" if ($verbose > 1); return 1; } if ($entry->{picture} && kill_string_p ($kill, $name . ' photos')) { print STDERR "$progname: killfile: $id: photos: $name\n" if ($verbose > 1); return 1; } my $txt = $entry->{message} || $entry->{story} || $entry->{name} || ''; my $txt2 = $txt; if ($entry->{htmlp}) { $txt = $entry->{txt}; $txt2 = $txt; $txt2 =~ s/^[^\n]+\n+//s; # omit first line } # There does not seem to be a more reliable way to match these. # Shares tend to look the same as explicitly-posted URLs. if ($txt =~ m/^\Q$name\E shared /s && kill_string_p ($kill, $name . ' shares')) { print STDERR "$progname: killfile: $id: shared link: $name\n" if ($verbose > 1); return 1; } # Special case to allow killfiling of any entries whose bodies begin with # "Happy Birthday" because the all-day flood of those is really annoying. # With scraping: "NAME1 > NAME2 Happy birthday" # if ($txt2 =~ m/^\s*((happy|happiest(\s+of))\s*)+birthday/si && kill_string_p ($kill, 'Happy Birthday')) { print STDERR "$progname: killfile: $id: birthday ($name)\n" if ($verbose > 1); return 1; } my $app = $entry->{application}; if ($app) { foreach my $a ($app->{name}, $app->{company}, $app->{namespace}, app_company ($app->{id})) { next unless $a; if (kill_string_p ($kill, $a)) { print STDERR "$progname: killfile: $id: app: $name ($a)\n" if ($verbose > 1); return 1; } if (kill_string_p ($kill, "$name $a")) { print STDERR "$progname: killfile: $id: app: $name ($a)\n" if ($verbose > 1); return 1; } } } return 0; } sub load_access_token($) { my ($app) = @_; my $file = $ENV{HOME} . "/.$app-facebook-pass"; my $token = undef; my $secret = undef; my $cookie = undef; if (open (my $in, '<', $file)) { while (<$in>) { if (m/^(?:OAUTH2|ACCESS_TOKEN):\s*(.*?)\s*$/s) { $token = $1; } elsif (m/^(?:SECRET):\s*(.*?)\s*$/s) { $secret = $1; } elsif (m/^(?:COOKIE):\s*(.*?)\s*$/s) { $cookie = $1; } } close $in; } error ("no access token in $file\n\n" . "\t\t run: $progname --generate-session\n") unless $token; print STDERR "$progname: read $file\n" if ($verbose > 1); return ($token, $secret, $cookie); } my $cookies_loaded_p; sub load_safari_cookies($$) { my ($ua, $match) = @_; my $count = 0; my $jar = HTTP::Cookies->new(); # You gotta be fucking kidding me. # http://www.securitylearn.net/2012/10/27/cookies-binarycookies-reader/ my $file = $ENV{HOME} . "/Library/Cookies/Cookies.binarycookies"; open (my $in, '<:raw', $file) || error ("$file does not exist"); my $buf = ''; my $out; read ($in, $buf, 4); $out = unpack ("A*", $buf); # big-endian $out =~ m/^cook$/s || error ("$file: give me a cookie"); read ($in, $buf, 4); my $npages = unpack ("N*", $buf); # big-endian # Read page sizes my @page_sizes = (); for (my $i = 0; $i < $npages; $i++) { read ($in, $buf, 4); my $size = unpack ("N*", $buf); # big-endian $page_sizes[$i] = $size; } # Read pages my @pages = (); for (my $i = 0; $i < $npages; $i++) { read ($in, $buf, $page_sizes[$i]); $pages[$i] = $buf; } close ($in); # Parse pages for (my $i = 0; $i < $npages; $i++) { my $page = $pages[$i]; my $ptr = 0; $buf = substr ($page, $ptr, 4); $ptr += 4; my $tag = unpack ("N*", $buf); # big-endian error ("unparsable page $i: tag $tag") unless ($tag == 256); $buf = substr ($page, $ptr, 4); $ptr += 4; my $ncookies = unpack ("L*", $buf); # little-endian for (my $j = 0; $j < $ncookies; $j++) { $buf = substr ($page, $ptr, 4); $ptr += 4; my $ptr2 = unpack ("L*", $buf); # little-endian my $start = $ptr2; my @fields = (); for (my $k = 0; $k < 8; $k++) { $buf = substr ($page, $ptr2, 4); $ptr2 += 4; $fields[$k] = unpack ("L*", $buf); # little-endian } my ($csize, undef, $flags, undef, $dom_off, $name_off, $path_off, $val_off) = @fields; foreach ($dom_off, $name_off, $path_off, $val_off) { $_ += $start; # increment each } my $dom = substr ($page, $dom_off, $name_off - $dom_off - 1); my $name = substr ($page, $name_off, $path_off - $name_off - 1); my $path = substr ($page, $path_off, $val_off - $path_off - 1); my $val = substr ($page, $val_off, $csize + $start - $val_off - 1); foreach ($dom, $name, $path, $val) { $_ =~ s/\000.*$//s; # null-terminate each } if ($dom =~ m/$match/si) { print STDERR ("$progname: cookie:" . " domain=$dom" . " name=$name" . " path=$path" . " value=$val" . "\n") if ($verbose > 2); my $secure = 0; my $version = 0; my $port = undef; my $maxage = 100000; $jar->set_cookie ($version, $name, $val, $path, $dom, $port, $path, $secure, $maxage); $count++; } } } $ua->cookie_jar ($jar); $cookies_loaded_p = ($count > 0); print STDERR "$progname: loaded $count Safari cookies\n" if ($verbose > 1 || !$cookies_loaded_p); } sub load_saved_cookies($) { my ($app) = @_; return if $cookies_loaded_p; my (undef, undef, $cookie) = load_access_token($app); my $ua = $LWP::Simple::ua; if ($cookie) { my $jar = $ua->cookie_jar(); if (! $jar) { $jar = HTTP::Cookies->new(); $ua->cookie_jar ($jar); } my $secure = 0; my $version = 0; my $port = undef; my $maxage = 100000; my $path = '/'; my $dom = '.facebook.com'; my $count = 0; foreach my $c (split (/;\s+/s, $cookie)) { my ($name, $val) = ($c =~ m/^([^=]+)=(.*)$/s); error ("unparsable saved cookie: $c") unless $name; $jar->set_cookie ($version, $name, $val, $path, $dom, $port, $path, $secure, $maxage); $count++; } $cookies_loaded_p = ($count > 0); print STDERR "$progname: loaded $count saved cookies\n" if ($verbose > 1 || !$cookies_loaded_p); } if (! $cookies_loaded_p) { load_safari_cookies ($ua, '\bfacebook\.com$'); save_cookies ($app, $cookie); } } sub save_cookies($;$) { my ($name, $ocookie) = @_; return unless $cookies_loaded_p; my @cookies = (); my $ua = $LWP::Simple::ua; my $jar = $ua->cookie_jar(); $jar->scan (sub { my ($version, $key, $val, $path, $domain, $port, $path_spec, $secure, $maxage, $discard) = @_; push @cookies, "$key=$val" if ($domain =~ m/\bfacebook\.com$/si); }); return unless @cookies; my $c = join ('; ', @cookies); if ($ocookie && $ocookie eq $c) { print STDERR "$progname: cookies unchanged\n" if ($verbose > 1); return; } my $fn = $ENV{HOME} . "/.$name-facebook-pass"; my $body = ''; open (my $in, '<', $fn) || error ("$fn: $!"); { local $/ = undef; # read entire file $body = <$in>; close $in; } if (! ($body =~ s/^((COOKIE):[ \t]*)([^\n]*)/$1$c/mi)) { $body .= "COOKIE:\t$c\n"; } open (my $out, '>', $fn) || error ("$fn: $!"); print $out $body; close $out; system ("chmod", "og-rw", $fn); print STDERR "$progname: wrote $fn: saved cookie\n" if ($verbose > 1); } # The Internets have discovered these: # # Facebook's iPhone app key: 3e7c78e35a76a9299309885393b02d97 # Facebook's iPhone app secret: c1e620fa708a1d5696fb991c1bde5662 # # Facebook's iPad app key: f0c9c86c466dc6b5acdf0b35308e83d1 # Facebook's iPad app secret: 7c036d47372dd5f2df27bfe76d4ae0c4 # # But how do I turn those into an OAuth2 access_token? # # Aha! This works: just searching for "Facebook for iPhone" on # Facebook reveals that the "App ID / API Key" is 6628568379, # and for "Facebook for iPad", 173847642670370. # # So this URL: # # https://graph.facebook.com/oauth/authorize # ?type=user_agent&client_id=6628568379 # &redirect_uri= # http://www.facebook.com/connect/login_success.html&scope=read_stream # # gives us an OAuth2 access_token that has the magic read-permissions of # the iPhone app, which means that it can read posts even by people who # have blocked apps from reading their stream (assuming that you can # read them yourself). # # Update: No longer works as of Sep 2016. # https://www.facebook.com/iphone now says "content isn't available right # now". It still has the same ID, which you can see with: # https://graph.facebook.com/iphone?access_token=... # but trying to authorize it says "does not look like a valid app ID." sub facebook_generate_session($) { my ($app) = @_; my @perms = (#'read_stream', #'read_mailbox', # deprecated #'user_posts', #'user_events', # The commented-out ones are no longer valid: #'rsvp_event', #'user_likes', #'friends_likes', #'user_status', #'friends_status', #'user_checkins', #'friends_checkins', #'user_photos', #'friends_photos', #'user_birthday', #'friends_birthday', #'user_location', #'friends_location', #'user_photo_video_tags', #'friends_photo_video_tags', #'user_videos', #'friends_videos', #'publish_stream', #'user_groups', #'user_managed_groups', 'manage_pages', 'publish_pages', #'publish_actions', 'ads_management', 'ads_read', # Removed Feb 2021 #'read_audience_network_insights', #'read_insights', #'pages_manage_cta', #'pages_manage_instant_articles', 'pages_show_list', #'read_page_mailboxes', # Added Sep 2020, removed Feb 2021. Needed again, June 2021? 'pages_manage_posts', 'publish_to_groups', # This too? ); print STDOUT (' 1) Go to https://developers.facebook.com/apps and create an "App". You only need to do this once. 2) Click on the name of the app that you created. Enter its "App ID" here: '); my $app_id = <>; chomp ($app_id); error ("That's not an App ID: \"$app_id\"") unless ($app_id =~ m/^\d{8,}$/s); my $data = fb_load ("app id", $app_id); my $name = $data ? $data->{namespace} || $data->{name} : 0; error ("Unknown app ID: $app_id") unless $name; $name = lc($name); $name =~ s/\s+//s; print STDOUT ("3) Enter its \"App Secret\" here: "); my $app_secret = <>; chomp ($app_secret); error ("That's not an app secret: \"$app_secret\"") unless ($app_secret =~ m/^[\da-f]{24,}$/si); print STDOUT ("4) Select the \"Facebook Login / Settings\" tab and " . "enter the URL in the\n" . "\"Valid OAuth Redirect URIs field\"," . " a URL on your web site: "); my $site = <>; chomp ($site); error ("That's not a URL: $site") unless ($site =~ m@^https?://@s); error ("That's not an app secret: \"$app_secret\"") unless ($app_secret =~ m/^[\da-f]{24,}$/si); my $url = ('https://graph.facebook.com/oauth/authorize' . '?type=user_agent' . '&client_id=' . $app_id . '&redirect_uri=' . url_quote($site) . '&scope=' . join(',', @perms)); print STDOUT "\n"; print STDOUT "5) Go to this URL in a browser that is logged in to Facebook:". "\n\n $url\n\n"; print STDOUT "6) Say yes if there are any questions.\n"; print STDOUT "7) You should be redirected to a page saying 'Success'.\n"; print STDOUT " or just '0'.\n"; print STDOUT " If it complains about invalid scopes, remove them\n"; print STDOUT " and try again.\n"; print STDOUT " Enter the URL of that page here: "; $url = <>; chomp ($url); my ($token) = ($url =~ m@access_token=([^?&<>]+)@si); error ("There's no \"access_token=\" in that URL!\n" . "\t\t Maybe try it again with JavaScript disabled?") unless $token; $token = extend_token ($token, $app_id, $app_secret); my $fn = $ENV{HOME} . "/.$name-facebook-pass"; my $body = ''; if (open (my $in, '<', $fn)) { local $/ = undef; # read entire file $body = <$in>; close $in; } if (! ($body =~ s/^((ACCESS_TOKEN|OAUTH2):[ \t]*)([^\n]*)/$1$token/mi)) { $body .= "OAUTH2:\t$token\n"; } if (! ($body =~ s/^((SECRET):[ \t]*)([^\n]*)/$1$app_secret/mi)) { $body .= "SECRET:\t$app_secret\n"; } open (my $out, '>', $fn) || error ("$fn: $!"); print $out $body; close $out; system ("chmod", "og-rw", $fn); print STDOUT "\nDone! $fn has been updated with your\n" . "new access token. Keep it secret.\n\n"; } # Load a URL from Facebook, convert JSON to hashrefs. # Retry a few times if it fails. # Duplicated in dna/utils/fbmirror.pl # Duplicated in dna/facebook/cover-sync.pl # # #### This should not be being called any more. We only scrape HTML. # my $fb_first_time_p = 1; sub fb_load($$;$$) { my ($description, $args, $post_args, $ignore_errors) = @_; $ignore_errors = 0 unless defined ($ignore_errors); #use Devel::StackTrace; print STDERR "\n\n#####\n" . Devel::StackTrace->new->as_string; die "shitfuck"; my $url; if ($args =~ m/^http/s) { $url = $args; } else { $url = 'https://graph.facebook.com/'; $args =~ s/\?/&/g; $args =~ s/&/?/; $url .= $args; } if ($post_args) { foreach my $k (keys %$post_args) { # Leave image data alone next if ($k eq 'source' || $k eq 'video_file_chunk'); my $v = $post_args->{$k}; utf8::encode ($v); # Unpack wide chars to multi-byte UTF-8. $post_args->{$k} = $v; } } my $obj = undef; my $err = undef; my $start = time(); my $delay = 2; my $retries = ($debug_p ? 1 : 5); $retries = 1 if ($ignore_errors > 1); my $ua = $LWP::Simple::ua; $ua->agent("$progname/$version"); $ua->timeout ($post_args ? 30 : 10); my $i; for ($i = 0; $i < $retries; $i++) { my $res = undef; $err = undef; my $t = strftime("%l:%M:%S %p", localtime); if ($post_args) { print STDERR "$progname: $t: POST $url\n" if ($verbose > 3); $res = $ua->post ($url, Content_Type => 'form-data', Content => $post_args); } else { print STDERR "$progname: $t: GET $url\n" if ($verbose > 3); $res = $ua->get ($url); } my $ret = ($res && $res->code) || 'null'; my $json = ($res && $res->content) || ''; rate_limit_throttle ($url, $res); if ($json && $json =~ m/^access_token=/s) { $obj = $json; # You complete asshats! Non-JSON response! } elsif ($json) { eval { my $j = JSON::Any->new; $obj = $j->jsonToObj ($json); }; } if ($ret ne '200') { $err = "Error $ret: $description: "; if ($obj && ref($obj) eq 'HASH' && $obj->{error} && $obj->{error}->{message}) { $err .= $obj->{error}->{message}; if ($obj->{error}->{error_user_msg}) { $err .= ". " . $obj->{error}->{error_user_title} . ": " . $obj->{error}->{error_user_msg}; } } elsif ($obj && ref($obj) eq 'HASH' && $obj->{error_msg}) { $err .= $obj->{error_msg}; } else { $err .= $res->message; } $obj = undef; $retries = 0 if ($ret eq '404'); # Believe 404 # Believe this 400 error too. $retries = 0 if ($err && $err =~ m/does not have the capability/si); $retries = 0 if ($err && $err =~ m/does not have permission/si); $retries = 0 if ($err && $err =~ m/due to missing permissions/si); $retries = 0 if ($err && $err =~ m/\bdeprecated\b/si); $retries = 0 if ($err && $err =~ m/Session has expired/si); $retries = 0 if ($err && $err =~ m/you're temporarily blocked/si); $retries = 0 if ($err && $err =~ m/activity related to copyright/si); $retries = 0 if ($err && $err =~ m/App must be on whitelist/si); $retries = 0 if ($err && $err =~ m/API is no longer available/si); # $retries = 0 if ($err && $err =~ m/Application request limit reached/si); # No, it's ok to retry posts, because of the "500" check below. #$retries = 0 if ($post_args); # Never retry when posting } last if defined ($obj); sleep ($delay) if ($retries); $delay++; } if (!$obj || $err) { $err = "$description failed" unless $err; # $ignore_errors = 0 if ($err =~ m/request limit reached/si); if (! $ignore_errors) { if ($fb_first_time_p && $err =~ m/^Error 4\d\d/si && $err !~ m/request limit reached/si) { # A 500 error means something other than bogus token. # So does "request limit". $err .= ("\n" . "\t\t This might mean your access_token is bogus.\n" . "\t\t Check your ~/.APPNAME-facebook-pass file.\n" . "\t\t"); } $err .= " after $i tries in " . (time() - $start) . " secs -- $url"; # Sometimes Facebook returns 500 but posts the thing anyway. # So we have to assume that it posted, I guess. # # But sometimes the 500 error comes from LWP itself, meaning # connection timed out or refused. And in that case, it's safe # for us to retry. # if ($err =~ m/^Error 500/si && $err !~ m/Can't connect to .*facebook\.com/si && $description eq 'posting') { print STDERR ("$progname: Error 500! Praying it posted anyway!\n" . "$err\n\n" . "\"" . ($post_args->{message} || $post_args->{description} || $post_args->{caption} || '') . "\"\n----------------------\n\n"); my %ret = ( 'id' => 'err_500' ); return \%ret; } if ($err =~ m/^Error 400: .*deemed abusive/si) { # This means we tried to upload a video and it was rejected by # content ID. print STDERR "$progname: $err\n"; return undef; } # if ($err =~ m/request limit reached/si) { # print STDERR "$progname: ## $err\n"; # exit 1; # } error ($err); } return undef; } $fb_first_time_p = 0; return $obj; } # Duplicated in dna/utils/fbmirror.pl # Duplicated in dna/facebook/cover-sync.pl # my $printed_rate_limit_p = 0; sub rate_limit_throttle($$) { my ($url, $res) = @_; # https://developers.facebook.com/apps/72575175403/rate-limit-details/app/ my $usage = $res->header('X-App-Usage') || ''; my ($call) = ($usage =~ m/"call_count":(\d+)/si); my ($cpu) = ($usage =~ m/"total_cputime":(\d+)/si); my ($total) = ($usage =~ m/"total_time":(\d+)/si); $call = 0 unless defined($call); $cpu = 0 unless defined($cpu); $total = 0 unless defined($total); my $max = 50; # Sleep if any of the numbers are higher than this my $n = $call > $cpu ? $call : $cpu; $n = $n > $total ? $n : $total; if ($n > $max) { my $sleep = ($n - $max) * 10; my $max2 = 60 * 5; # Sleep no more than this many seconds per URL $sleep = $max2 if ($sleep > $max2); my $t = strftime("%l:%M:%S %p", localtime); print STDERR "$progname: $t: $url\n"; print STDERR "$progname: $t: rate limit: $usage;" . " sleeping for $sleep secs...\n\n"; $printed_rate_limit_p = 1; sleep ($sleep); } elsif ($printed_rate_limit_p) { my $t = strftime("%l:%M:%S %p", localtime); $usage = 'no X-App-Usage header' unless $usage; print STDERR "$progname: $t: rate limit over: $usage\n\n"; $printed_rate_limit_p = 0; } } # Like fb_load, but takes multiple results like: # URL1 => { data:[ ...PAGE1... ], paging:{ next: URL2 } } # URL2 => { data:[ ...PAGE2... ], paging:{ next: URL3 } } # URL3 => { data:[ ...PAGE3... ] } # and returns a list of: # ( PAGE1, PAGE2, PAGE3 ) # sub fb_load_paged($$;$$) { my ($description, $query, $max_pages, $ignore_errors) = @_; my @data = (); my $n = 1; while (defined($query)) { print STDERR "$progname: loading $description...\n" if ($verbose); my $result = fb_load ($description, $query, undef, $ignore_errors); my $d = $result->{data}; my @d = @$d if defined($d); push @data, @d; my $p = $result->{paging}; print STDERR "$progname: got " . scalar(@d) . " items\n" if ($verbose > 2); $p = undef if ($max_pages && $n >= $max_pages); last unless defined ($p); my ($token) = ($query =~ m/\baccess_token=([^?&]+)/s); error ("$description: no token: $query") unless $token; $query = $p->{next}; $query .= "&access_token=$token" if $query; $n++; $description =~ s/, page \d+$//s; $description .= ", page $n"; } return @data; } # Run an FQL query and return the result. # "fql is deprecated for versions v2.1 and higher". # sub fql($$$;$) { my ($description, $token, $query, $ignore_errors) = @_; $query =~ s/\s+/ /gs; $query = url_quote ($query); my $url = ('https://graph.facebook.com/fql?q=' . $query . '&access_token=' . $token); my $result = fb_load ($description, $url, undef, $ignore_errors); return $result; } sub extend_token($$$) { my ($token, $app_id, $secret) = @_; error ("can't extend token: no SECRET! Re-run --generate-session") unless $secret; my $url = ('oauth/access_token' . '?client_id=' . $app_id . '&client_secret=' . $secret . '&grant_type=fb_exchange_token' . '&fb_exchange_token=' . $token); my $ret = fb_load ("extend token", $url); my $extended_token = undef; if (ref($ret)) { # Is it a hash? $extended_token = $ret->{access_token}; } elsif ($ret) { # Or a string? ($extended_token) =~ ($ret =~ m@^.*\baccess_token=([a-z\d]+).*$@s); } error ("unparsable extend_token response: " . Dumper($ret)) unless $extended_token; return $extended_token; } # Check the expiration of the app's token, and extend it if it is about # to expire. As of June 2013 or so, Facebook now refuses to give out # unlimited tokens. They start out with a 60 day expiration, and you # have to trade them in for a new token periodically. In some contexts, # tokens will auto-extend just because you have made use of them, but # apparently this is not one of those contexts. # sub token_expiration($) { my ($token) = @_; my $result = fb_load ('expiry', ('debug_token' . '?input_token=' . $token . '&access_token=' . $token), undef, 2); # When using the Facebook iPhone app secret, we get "You must provide an # app access token or a user access token that is an owner or developer # of the app". So, fuck it, just ignore the error. # if (! $result) { # print STDERR "$progname: unable to get token info\n" if ($verbose > 4); # return undef; # } error ("unable to get token expiration info") unless ($result && $result->{data}); my $expires = $result->{data}->{expires_at}; my $app_id = $result->{data}->{app_id}; error ("unable to get token app id") unless ($app_id); return ($expires, $app_id); } sub check_token($$$) { my ($app, $token, $secret) = @_; my ($expires, $app_id) = token_expiration ($token); # return unless defined($expires); # already warned if (! defined($expires)) { # already warned # print STDERR "$progname: unknown token expiration\n" if ($verbose > 4); } else { my $days = int (($expires - time()) / (60 * 60 * 24)); print STDERR "$progname: token expires in $days days: " . localtime($expires) . "\n" if ($verbose > 4); return if ($days > 2); } error ("no app_id for $app") unless $app_id; my $t2 = extend_token ($token, $app_id, $secret); if ($token eq $t2) { print STDERR "$progname: token unchanged\n" if ($verbose > 4); return; } my ($expires2) = token_expiration ($token); if (defined($expires) && $expires == ($expires2 || 0)) { print STDERR "$progname: token expiration unchanged\n" if ($verbose > 4); return; } if (defined ($expires)) { my $days = int (($expires2 - time()) / (60 * 60 * 24)); print STDERR "$progname: now expires in $days days: " . localtime($expires) . "\n" if ($verbose > 4); } my $fn = $ENV{HOME} . "/.$app-facebook-pass"; open (my $in, '<', $fn) || error ("$fn: $!"); local $/ = undef; # read entire file my $body = <$in>; close $in; ($body =~ s/^((ACCESS_TOKEN|OAUTH2):[ \t]*)([^\n]*)/$1$t2/si) || error ("$fn: unable to update token"); open (my $out, '>', $fn) || error ("$fn: $!"); print $out $body; close $out; print STDERR "$progname: wrote $fn: extended token\n" if ($verbose > 1); } sub truncate_sentence($) { my ($s) = @_; $s =~ s/\n\n.*$//s; # If there is a sentence-end between 30 and 60 chars, stop there. $s =~ s/(.{30}[.?!]\s).*$/$1/s; # Else if there is a word-end between 40 and 60 chars, stop there. $s =~ s/(.{40}[^\s]+\.*).+$/$1 .../s; # Else truncate at 60 chars regardless. $s =~ s/^(.{60}).+/$1 .../s; $s =~ s/\. \.+$/./s; return $s; } # Hack the HTML bodies to make them more readable. # sub rewrite_html($) { my ($body) = @_; # Instead of going through the app proxy, go to the URL directly # so that we get larger inline images. # $body =~ s@\b( https?:// ( [a-z\d]*\.facebook\.com | [^/<>\'\"]*?\.fbcdn\.net | fbexternal[^/<>\'\"]+ ) / ( www/ )? ( app_full_proxy\.php | safe_image\.php | l\.php ) [^<>\"\']+ ) @{ my $url = $1; my $ourl = $url; my ($redir) = ($url =~ m/(?:src|url|u)=(http[^&]+)/s); $url = url_unquote($redir) if $redir; print STDERR "$progname: $ourl\n\t-> $url\n" if ($verbose > 4 && $url ne $ourl); $url; }@gsexi; # Use larger Tumblr images. $body =~ s@\b(https?://[^/<>\"\']*?\.tumblr\.com[^<>\"\']*?)_250\.jpg @${1}_500.jpg@gsix; # Use larger FB images. # This stopped working: now there's hash bullshit on the URLs. $body =~ s@\b ( https?:// (?: fbcdn-photos- | [^/]+\.fbcdn\.net/ ) .*? ) _[sq] \. ( [a-z\d]+ \b (:? [^?&] | $ ) ) @${1}_o.$2@gsix; # Fuckin' Youtube noise. $body =~ s@ ( & | [?&] ) feature= ( player_embedded | youtu\.be | share | youtube_gdata | relmfu ) \b ( & | [?&] )? @$1@gsix; # Facebook uses the syntax @[NNNN:MM:text] for links to FB people/pages. # NNNN is the page ID, and I don't know what MM is. # $body =~ s% \@\[ ( \d+ ) : ( \d+ ) : ( [^\[\]]+ ) \] %{ my $id = $1; my $txt = $3; $id = 'https://www.facebook.com/' . $id; $txt =~ s@\\@@gs; "$txt"; }%gsexi; return $body; } # Get the larger version of the photo via the graph API, since it's no # longer always possible to do it by just hacking the URL extension: # they now sometimes add hash bullshit onto the end. Motherfuckers. # sub enlarge_fb_pic($$$) { my ($entry, $url, $token) = @_; return undef unless $url; # Rewrite it by extension, if we can; also de-proxy it. $url = rewrite_html ($url); # If the linked URL is a Youtube thumbnail, add an embed to that video. # if ($url =~ m@^https?://[^/.]+\.ytimg\.com/vi/([^/]+)@si && !$entry->{embed_html}) { my $id = $1; my $u1 = "http://www.youtube.com/watch?v=$id"; my $u2 = "http://www.youtube.com/embed/$id?version=3"; $entry->{link} = "@si; # close that shit $html .= $link_embed; # Pre-formatted Facebook video. } elsif ($link_url =~ m@\b(youtube)\.com/.*v[?=/]([^<>?&]+)@si || $link_url =~ m@\b(vimeo)\.com/(\d+)@si || $link_url =~ m@\b(vimeo)\.com/.*/videos/(\d+)@si) { # Inline Youtube and Vimeo videos. my $id = $2; my $url = ($1 eq 'youtube' ? "http://www.youtube.com/embed/$id?version=3" : "http://player.vimeo.com/video/$id"); my $w = 853; my $h = int ($w / (16 / 9)); $html .= ("

"); } elsif ($link_pic && $link_pic =~ m@\.mp4(\?|$)@si) { my $w = 640; my $h = 360; # Not sure about this: ($w, $h) = ($1, $2) if ($link_pic =~ m@&rl=(\d\d\d+)&vabr=(\d\d\d+)&@si); $html .= (""); } elsif ($link_pic) { $html .= ('' . '' . ''); } if ($link_title || $link_cap || $link_desc) { $html .= ''; $html .= '
' . html_quote ($link_title) if $link_title; $html .= '
' . html_quote ($link_cap) if $link_cap; $html .= '
' . html_quote ($link_desc) if $link_desc; $html .= '
'; } $html .= '
' if ($link_pic || $link_embed); } if ($entry->{tags} || $entry->{with_tags}) { my %friendp; my @tags = (); foreach my $tag (($entry->{tags} ?@{$entry->{tags}->{data}} : ()), ($entry->{with_tags}?@{$entry->{with_tags}->{data}}:())) { my $link = ($tag->{link} || ($tag->{id} ? 'https://www.facebook.com/' . $tag->{id} : undef)); if ($tag->{id}) { next if ($tagged{$tag->{id}}); $tagged{$tag->{id}} = 1; } my $name = html_quote ($tag->{name}); $name = '' . $name . '' if $link; $friendp{$name} = 1 if ($friends && $tag->{id} && $friends->{$tag->{id}}); push @tags, $name; } if (@tags) { move_friends_to_front (\%friendp, \@tags); my $tags = '

With ' . (($#tags > 0 ? join(", ", @tags[0..($#tags-1)]) . " and " : "") . $tags[$#tags] . '.'); $html .= "

$tags"; } } if ($entry->{likes}) { my @likes = (); my $max = 10; my $got = $entry->{likes}->{data} ? @{$entry->{likes}->{data}} : 0; my $total = $entry->{likes}->{count}; if ($entry->{likes}->{data}) { my $i = 0; foreach my $like (@{$entry->{likes}->{data}}) { next unless $like->{id}; if (kill_string_p ($kill, $like->{name})) { print STDERR "$progname: omitting killfiled liker $name\n" if ($verbose > 2); next; } my $link = ($like->{link} || 'https://www.facebook.com/' . $like->{id}); my $name = $like->{name} || $like->{id}; push @likes, ('' . html_quote ($name) . ''); last if (++$i > $max); } } $total = @likes unless defined($total); # WTF my $likes; if ($total == 0) { $likes = ''; } elsif (@likes == 0) { $likes = "$total like this."; } elsif (@likes == $total) { if ($total == 1) { $likes = $likes[0] . " likes this."; } else { $likes = (join(", ", @likes[0..($#likes-1)]) . " and " . $likes[$#likes] . " " . ($#likes == 0 ? "likes" : "like") . " this."); } } else { $likes = (join(", ", @likes) . " and " . ($total - @likes) . " others like this."); } $html .= "

$likes" if $likes; } my $comments_html = ''; if ($entry->{comments}) { # Apparently "me/home/" will only give us 2 comments on certain entries. # If there are more comments there than we have, re-load the comment # data to get more. my $got = $entry->{comments}->{data} ? @{$entry->{comments}->{data}} : 0; my $count = $entry->{comments}->{count} || 0; my $more = ($count ? $count - $got : 0); my $omore = $more; if ($more) { print STDERR "$progname: " . $entry->{id} . ": loading $more more comments\n" if ($verbose > 2); my $c2 = fb_load ("entry comments", $entry->{id} . "&fields=comments&access_token=$token", undef, 1); if ($c2 && $c2->{comments} && $c2->{comments}->{data}) { $entry->{comments} = $c2->{comments}; $got = $entry->{comments}->{data} ? @{$entry->{comments}->{data}} : 0; $more = $entry->{comments}->{count} - $got; # Doesn't work at all for "updated their cover photo". # Don't complain if we only got the first 50 comments. print STDERR "$progname: " . $entry->{id} . ": only got $got of " . $entry->{comments}->{count} . " comments!\n" if ($more && $verbose > 3 && $entry->{comments}->{count} <= 50); } else { print STDERR "$progname: " . $entry->{id} . ": failed to load more comments!\n" if ($verbose > 2); } } $comments_html .= "


"; if ($entry->{comments}->{data}) { foreach my $comm (sort { ($a->{created_time} || 0) cmp ($b->{created_time} || 0) } @{$entry->{comments}->{data}}) { my $cfrom = $comm->{from}; next unless defined($cfrom); my $cdate = $comm->{created_time}; my $cid = $comm->{id}; my $cbody = $comm->{message}; my $clike = $comm->{like_count}; $cdate = parse_date ($cdate); $cdate = POSIX::strftime ("%I:%M %p", localtime ($cdate)); my $cname = $cfrom->{name}; my $ccurl = 'https://www.facebook.com/' . $cid; my $cfurl = ($cfrom->{link} || 'https://www.facebook.com/' . $cfrom->{id}); my $cfid = $cfrom->{id}; if ($cid =~ m@^\d+_\d+_(\d+)$@si) { my $id2 = $1; $ccurl = ($post_url . ($post_url =~ m/[?]/s ? '&' : '?') . 'comment_id=' . $id2); } $cbody = comment_add_tags ($comm->{message}, $comm->{message_tags}, $friends); if ($comm->{likes}) { $cbody .= ('
(' . $comm->{likes} . ($comm->{likes} == 1 ? ' like' : ' likes') . ')'); } $cbody = ('

' . '

' . userpic_html ($cname, $cfid, 1, undef, undef) . "$cdate: " . $cbody . ($clike ? " ($clike like)" : '') . '
'); $comments_html .= $cbody; } } # If we don't have all the comments, say so. $comments_html .= "

($more more comment" . ($more == 1 ? "" : "s") . ".)

" if ($more); } $html = "[blank]" unless $html; my $app_name = $app ? $app->{name} || $app->{namespace} : undef; my $app_id = $app ? $app->{id} : undef; $html = userpic_html ($name, ($from->{id} || $from->{link}), 0, $app_name, $app_id) . $html; $html =~ s/\n\n+/

/gsi; $name = html_quote ($name); $subj = html_quote ($subj); foreach my $h ($html, $comments_html) { $h = rewrite_html ($h); } # Lose random control characters, WTF. foreach ($subj, $obody, $html, $comments_html) { s/[\000-\010\013-\037]//gs; } my $body_hash = "$name\n$subj\n$html"; utf8::encode ($body_hash); # Unpack wide chars to multi-byte UTF-8. $body_hash = sha1_hex($body_hash); $html .= $comments_html; $html =~ s@\bm\.facebook\.com@www.facebook.com@gs; $html =~ s@(facebook\.com/[^<>\"\'?]+)\?refid[^<>\"\'?]+@$1@gs; $html =~ s@(facebook\.com/[^<>\"\'?]+\?id=\d+)[^<>\"\'?]+@$1@gs; my $rss = (" \n" . " $post_url\n" . " $name\n" . " $subj\n" . " " . html_quote($obody) . "\n" . " $html]]>\n" . " $date\n" . " \n"); print STDERR "$progname: $name: $date: $subj $post_url\n" if ($verbose > 3); if (0) { my $x = Dumper($entry); if ($x =~ m@shared[^<>]*?event@si) { $entry->{actions} = ''; $entry->{comments} = ''; $entry->{likes} = ''; $x = Dumper($entry); $html =~ s/{id}; # # my ($a, $b) = ($id =~ m/^(.*)_(.*)/si); # if (! $b) { # print STDERR "$progname: unparsable ID: $id\n"; # return; # } # my $url = "https://www.facebook.com/$a/posts/$b"; # # print STDERR "$progname: " . $entry->{id} . ": loading event info\n" # if ($verbose > 2); # # # We get an "Upgrade your browser" error message without this. # $LWP::Simple::ua->agent("$progname/$version"); # # my $body = LWP::Simple::get ($url) || ''; # my ($event, $img) = # ($body =~ m@]*? HREF= ["'] ( /events [^"']+ ) [^<>]* > \s* # ]*? SRC= ["'] ( [^"']+ ) [^<>]* > # @six); # if (! $event) { # if ($body =~ m/You must log in to see this page/si) { # print STDERR "$progname: $id: page is protected: $url\n" # if ($verbose > 1); # } else { ## print STDERR "$progname: $id: no event link in $url\n"; # } # $entry->{story} .= ' [permission denied]'; # return; # } # my ($title) = # ($body =~ m@]*? HREF= ["'] \Q${event}\E [^<>]* > \s* # ( [^<>]+ ) \s* # @six); # $title = "Error: couldn't find title" unless $title; # # $entry->{link} = $event; # $entry->{caption} = $title; # $entry->{picture} = $img if $img; #} sub make_photo_rss_entry($$$$$$$$$) { my ($via, $id, $tag, $friends, $likes, $kill, $mode, $token, $pct) = @_; my $query = ($id . '?access_token=' . $token); my $entry = fb_load ("photo", $query); if (!$entry || !$entry->{id}) { print STDERR "$progname: no info for photo $id\n" if ($verbose > 2); return undef; } $entry->{message} = ("$tag was tagged in " . $entry->{from}->{name} . "'s photo" . ($entry->{name} ? ", \"" . $entry->{name} . "\"." : "")); $entry->{checkin_p} = 1; # Kludge return make_rss_entry ($via, $entry, $friends, $likes, $kill, $mode, $token, 1, 0, $pct); } # expands the first URL relative to the second. # sub expand_url($$) { my ($url, $base) = @_; $url =~ s/^\s+//gs; # lose whitespace at front and back $url =~ s/\s+$//gs; my ($proto) = ($base =~ m@^([^:/]+):@); # Protocol-relative $url =~ s@^:?//@$proto://@gs; if ($url =~ m@^[a-z]+:|^//@si) { print STDERR "$progname: absolute URL: $url\n" if ($verbose > 4); } else { $base =~ s@(\#.*)$@@; # strip anchors $base =~ s@(\?.*)$@@; # strip arguments $base =~ s@/[^/]*$@/@; # take off trailing file component my $tail = ''; if ($url =~ s@(\#.*)$@@) { $tail = $1; } # save anchors if ($url =~ s@(\?.*)$@@) { $tail = "$1$tail"; } # save arguments my $base2 = $base; $base2 =~ s@^(([a-z]+:)?//[^/]+)/.*@$1@si # url is an absolute path if ($url =~ m@^/@); my $ourl = $url; $url = $base2 . $url; $url =~ s@/\./@/@g; # expand "." 1 while ($url =~ s@/[^/]+/\.\./@/@s); # expand ".." $url .= $tail; # put anchors/args back print STDERR "$progname: relative URL: $ourl --> $url\n" if ($verbose > 4); } return $url; } # Load an HTML page, with retries. Requires Facebook HTML cookies. # my $reloaded_cookies_p = 0; sub fb_scrape_html($$$;$) { my ($desc, $app, $url, $ignore_errors_p) = @_; load_saved_cookies($app); error ("can't scrape HTML without cookies") unless $cookies_loaded_p; my $ua = $LWP::Simple::ua; my $start = time(); my $delay = 2; my $retries = ($debug_p ? 1 : 20); my $err = undef; my $obj = undef; my $i; for ($i = 0; $i < $retries; $i++) { $err = undef; my $res = $ua->get ($url); my $ret = ($res && $res->code) || 'null'; $obj = ($res && $res->content) || ''; if ($obj =~ m/The page you requested cannot be displayed/s) { #print STDERR "###### FFFFFF $url\n"; #use Devel::StackTrace; print STDERR "\n\n#####\n" . Devel::StackTrace->new->as_string; $obj = ''; $ret = 404; } if ($ret ne '200') { $err = "Error $ret: $desc: "; $err .= $res->message; $obj = undef; # Believe these errors. $retries = 0 if ($ret eq '404'); $retries = 0 if ($ret eq '400'); } if ($obj && $obj =~ m/Facebook - Log In or Sign Up|You must log in first/si) { $obj = undef; if ($reloaded_cookies_p) { $err = "unable to log in even after reloading Safari cookies"; } else { # Read the saved cookie from the .pass file. my (undef, undef, $ocookie) = load_access_token($app); # Re-parse the cookies from Safari and load them into $ua. load_safari_cookies ($ua, '\bfacebook\.com$'); # Write the cookies from $ua to the .pass file, if changed. save_cookies ($app, $ocookie); $reloaded_cookies_p++; $i--; # one more try } } last if defined ($obj); sleep ($delay) if ($retries); #$delay++; } if (!$obj || $err) { $err = "$desc failed" unless $err; $err .= " after $i tries in " . (time() - $start) . " secs -- $url"; return undef if ($ignore_errors_p); error ($err); } elsif ($verbose > 1 && $i) { print STDERR "$progname: $url succeeded after $i tries\n"; } utf8::decode ($obj); # Pack multi-byte UTF-8 to wide chars. return $obj; } sub shitfuckery() { my $delay = 60 * 60 * 6; print STDERR "$progname: $$: blocked, sleeping for $delay\n"; sleep ($delay); error ("$$: shit"); exit (1); } # Screen-scrape HTML to get the list of friends, since the graph API no # longer works for that. # sub fb_scrape_friends($) { my ($app) = @_; my $description = 'scraping friend list HTML'; my $url = 'https://m.facebook.com/me/friends'; my $url0 = $url; my %dups; my @result = (); my $total = 0; my $pages = 0; while ($url) { $pages++; print STDERR "$progname: $description page $pages...\n" if ($verbose); my $obj = fb_scrape_html ($description, $app, $url); if ($obj =~ m/(Temporarily Blocked)/s) { shitfuckery(); } $obj =~ s@(]*href="([^<>\"]+)"@si); my ($name) = ($chunk =~ m@]*>([^<>]+)@si); next unless $name; next unless $url; next unless ($url =~ m@^/([a-z\d.]+)(\?.*)?$@s); next if ($url =~ m/\.php/s); $url =~ s/\?.*$//s; my $id = $url; # ugh... $id =~ s@^/@@s; $url = "https:n//www.facebook.com$url"; $count++; $total++; $name = html_unquote($name); my $u = { link => $url, name => $name, id => $id, }; push @result, $u unless ($dups{$url}); $dups{$url} = 1; } #last if $debug_p; #### last unless $count; error ("looping scraping friends!") if ($count > 50); # No longer works: $url = "$url0?startindex=$total"; $obj =~ m@href=[\"\']([^\"]+/friends\?unit_cursor[^\"\']*)@si || last; $url = expand_url ($1, $url0); } # It's 2020 and I have no friends now #error ("only $total friends?") if ($total < 100); return @result; } # Screen-scrape HTML to get the current user's news feed, since the graph # API no longer works for that. # sub fb_scrape_newsfeed($$$$) { my ($description, $app, $name, $user) = @_; my $ua = $LWP::Simple::ua; load_saved_cookies ($app); error ("can't scrape HTML without cookies") unless $cookies_loaded_p; my $url = $user->{link}; $url =~ s@^(https?://)[^./:]+\.@${1}m.@gsi; print STDERR "$progname: scraping $description...\n" if ($verbose); my $obj = fb_scrape_html ($description, $app, $url); if ($user->{id} =~ m/[a-z]/si) { my ($u2) = ($obj =~ m@href=\"/messages/thread/(\d+)@s); # "Message" link ($u2) = ($obj =~ m@href=\"/mbasic/more/\?owner_id=(\d+)@s) # "More" link unless $u2; ($u2) = ($obj =~ m@href=\"/photo\.php\?[^<>\"]*\bid=(\d+)@s) # first photo unless $u2; ($u2) = ($obj =~ m@href=\"/profile/picture/view/\?profile_id=(\d+)@s) # first profile photo unless $u2; ($u2) = ($obj =~ m@href=\"/story\.php\?[^<>]*&id=(\d+)@s) # first story link unless $u2; ($u2) = ($obj =~ m@href=\"/a/profile\.php\?unfan[^<>]*&id=(\d+)@s) unless $u2; ($u2) = ($obj =~ m@href=\"/page/follow_mutator\?page_id=(\d+)@s) unless $u2; ($u2) = ($obj =~ m@href=\"/pages/more/(\d+)/@s) unless $u2; if ($u2) { print STDERR "$progname: " . $user->{id} . " => $u2\n" if ($verbose > 3); $user->{id} = $u2; } else { # print STDERR "$progname: no UID for " . $user->{id} . "\n$obj\n\n"; print STDERR "$progname: no UID for " . $user->{id} . "\n"; shitfuckery(); } } my $obody = $obj; $obj =~ s/^.*?structured_composer_[^<>]*>//gs; # The class names seem to be generated, and change. # So first, delete the header; # Then extract IDs from the "Full Story" links; # then load each of those independently. my @stories = (); $obj =~ s%(]*)> \s* Full \s+ Story \s* )%{ my ($a, $b) = ($1, $2); my ($href) = ($b =~ m/HREF="(.*?)"/si); $href = '' unless $href; my ($oid) = ($href =~ m/story_fbid=(\d{9}\d+)/si); ($oid) = ($href =~ m/top_level_post_id\.(\d{9}\d+)/si) if (!$oid); ($oid) = ($href =~ m/fbid=(\d{9}\d+)/si) if (!$oid); # This looks like an ID but it is not. # Seen on an "updated their cover photo" post. # ($oid) = ($href =~ m/story_fbid\.(\d{9}\d+)/si) if ($href && !$oid); # Fuck it. # error ("$url: no story in Full Story: $a\n\n$obj") unless $oid; push @stories, $oid if $oid; $a; }%gsexi; my $now = time(); my @result = (); foreach my $oid (@stories) { my $u2 = "https://m.facebook.com/$oid"; print STDERR "$progname: scraping $description $oid...\n" if ($verbose > 1); my $chunk = fb_scrape_html ("$description $oid", $app, $u2, 1); if (! $chunk) { print STDERR "$progname: $description $oid: null response\n" if ($verbose > 1); next; } # Nuke everything before this class: the header $chunk =~ s/(^ .*?

]* \b id=\" [^\"<>]* \b m_story_permalink_view \b [^<>]* > )//sx; # Nuke everything from here on: the Like buttons and comments. $chunk =~ s/(
]* \b id=\"ufi_ .* )$//sx; $chunk =~ s@]*>@

@gsi; my ($date, $app) = # 6 hrs · Twitter ($chunk =~ m@ ]*> ([^<>]+) \s* (?: [^a-z\d<>]* \s* )* ([^<>]*) @sx); $date = '' unless $date; $app =~ s/\s+$//s if $app; if ($date =~ m/^(\d+) days?/) { $date = $now - ($1 * 60 * 60 * 24); } elsif ($date =~ m/^(\d+) hrs?/) { $date = $now - ($1 * 60 * 60); } elsif ($date =~ m/^(\d+) mins?/) { $date = $now - ($1 * 60); } elsif ($date =~ m/^yesterday at ([^<>\"]*)/si) { $date = str2time($1) - (60 * 60 * 24); } elsif ($date) { $date =~ s/ at //gs; $date = str2time($date); } $date = $now unless $date; my ($title, $body) = ($chunk =~ m@]*>(.*?)(.*)$@si); if (! $body) { # next; ($title, $body) = ('', $chunk); } foreach ($title, $body) { s@.*@@gsi; s@]*>@\n@gsi; s@]*>@\n\n@gsi; s@<[^<>]*>@@gs; s/\s+/ /gs; } # Nuke any A tags and their contents point to Like, React, Share, etc. $chunk =~ s@(]*>(.*?))@{ my ($a, $b) = ($1, $2); my ($href) = ($a =~ m/href="(.*?)"/si); $a = '' if ($href && $href =~ m!^\#!s); $href =~ s!^https?://[a-z\d]+\.facebook\.com!!s if $href; if ($b =~ m/^(previous|prev|next)$/si || ($href && $href =~ m!^/( a/like\.php | reactions/ | composer/ | pokes/ | privacy/ | nfx/basic/ | save/ | removefriend | friendship | friendlists | mobile/suggest | a/subscriptions | a/language\. | language\. | [^/?]+\?timecutoff | photos/xtag_faces | # [^/?&]+/albums/ | # pages/\? | help/ | settings/ | bugnub/ | policies/ | safetycheck/ | # browse/ | # keep browse/users/ hashtag/ | edits/ | tokenizer/ | home\.php | logout )!six)) { print STDERR "$progname: $description $oid: nuked link $href\n" if ($verbose > 1); $a = ''; } $a; }@gsexi; # Sanitize HTML with a chainsaw $chunk =~ s@^[^<>]+>@@gsi; # opened body already inside a tag # omit these tags $chunk =~ s///gsi; $chunk =~ s@@@gsi; $chunk =~ s@]*>@@gsi; $chunk =~ s@<((P|BR|IMG|WBR)\b[^<>]+)\s*/>@<$1>@gsi; # no /> $chunk =~ s@@<$1>@gsi; $chunk =~ s@]*>@

@gsi; $chunk =~ s@\s*(

\s*)+@$1@gsi; $chunk =~ s@\s*(
\s*)+@$1@gsi; $chunk =~ s@\s*(]*>\s*)*$@@gsi; $chunk =~ s@\s*]*>\s*$@@gsi; $chunk =~ s@\s*(]*>\s*)*$@@gsi; #$chunk =~ s@\b((HREF|SRC)=\")/@$1https://www.facebook.com/@gsi; $chunk =~ s@]*/rsrc\.php/[^<>]*>@@gsi; $chunk =~ s@.*?@@gsi; $chunk =~ s@>( +[.\267])+@> @gs; # FFS $chunk =~ s@\s+Friends( of Friends)?\s*$@@si; my $txt = $chunk; $txt =~ s/\x{200e}//gs; # Left to right mark, dafuq $txt =~ s/

/\n\n/gsi; $txt =~ s/
/\n/gsi; $txt =~ s/<[^<>]*>//gs; $txt =~ s/[ \t]+/ /gm; $txt =~ s/^[ \t]+|[ \t]+$//gm; $txt =~ s/(\n\n)\n+/$1/gs; $txt =~ s/^\s+|\s+$//gs; $txt = html_unquote($txt); my ($share) = ($chunk =~ m@\bHREF="([^<>\"]*/photos/[^<>\"]+)"@si); ($share) = ($chunk =~ m@\bHREF="([^<>\"]*/photo\.php\?[^<>\"]+)"@si) unless $share; ($share) = ($chunk =~ m@\bHREF="([^<>\"]*/story\.php\?[^<>\"]+)"@si) unless $share; $share = undef if ($share && $share =~ m/\Q$oid\E/s); $share = expand_url ($share, "https://www.facebook.com/") if $share; # If there is a "View Full Size" link, replace an existing IMG tag # with that. It is a link to the JPG. # if ($chunk =~ m@]*? \b HREF = "([^<>\"]*?)" [^<>\"]*? > \s* View \s Full \s Size \s* @six) { my $u3 = $1; $chunk =~ s@]*? \Q$oid\E [^<>]*? > @@six || error ("$description $oid: no IMG to upgrade\n$chunk\n"); print STDERR "$progname: $description $oid: upgraded img to $u3\n" if ($verbose > 1); } #if (!$share && $txt =~ m/^\s*\Q$name\E shared a memory/si) { # $share = "https://m.facebook.com/$oid"; # It's a self-share #} $share = html_unquote($share) if $share; # Expand the URLs, check them in debug mode. my %checked; $chunk =~ s@(]*HREF=\")([^\"<>]+)(\"[^<>]*>)@{ my ($a, $h, $b) = ($1, $2, $3); $h = expand_url ($h, "https://m.facebook.com/"); # Sometimes the "www" link doesn't work but the "m" link does. Fuckers. #$h =~ s!^https?://m\.(facebook\.com)!https://www.$1!si; if (0 && $debug_p && !$checked{$h}) { $checked{$h} = 1; my $h2 = html_unquote($h); if ($h2 =~ m!/l\.php|/video_redirect!s) { print STDERR "$progname: " . $user->{name} . ": skipping: $h2\n"; } else { print STDERR "$progname: " . $user->{name} . ": checking: $h2\n"; my $res = $ua->get ($h2); my $ret = ($res && $res->code) || 'null'; if ($ret ne '200') { $chunk =~ s/{name} . ": URL failed: $ret: $h2\n\n$chunk"); } } } "$a$h$b"; }@gsexi; if (0) { my $chunk2 = $chunk; $chunk2 =~ s@', '/tmp/a.html'); print $out $chunk2; close $out; my $err = `validate.pl /tmp/a.html 2>&1`; error ($err) if ($err =~ m/[^\s]/s); } error ("rate limited: $name") if ($txt =~ m/You Can't Use This Feature Right Now/si || $txt =~ m/We limit how often you can post, comment or do other/si); # if ($txt =~ m/^\s*\d+\s* friends posted on .* timeline/si) { # # We can get a new one of these for *each* timeline post: # # the number is different, but each URL points to a different # # post on the timeline, not to the timeline itself. # print STDERR "$progname: skipping timeline post \"$txt\"\n" # if ($verbose > 1); # next; # } my $entry = { from => $user, updated_time => $date, message => $chunk, txt => $txt, id => $oid, link => "https://www.facebook.com/$oid", application => $app ? { name => $app } : undef, htmlp => 1, link => $share, }; if ($chunk =~ m@]+\?fref=[^\"<>]+)[^<>]*>(.*?)@si) { # First fref is the posting user. my $url = $1; my $name = $2; $name = html_unquote($name); $url = html_unquote($url); $url =~ s/\?.*$//s; $url =~ s@^(https?://)m\.@$1www.@s; if ($name ne $user->{name}) { # $entry->{to} = $user; # $entry->{from} = { link => $url, name => $name }; $entry->{tagged_by} = { link => $url, name => $name }; # Ugh. } } push @result, $entry; } if (! @result) { $obj =~ s/(<(a|br|div)\b)/\n $1/gs; if (@stories) { print STDERR "no items among " . scalar(@stories) . " stories: $url\n" . "############\n$obody\n"; } } return @result; } sub fb_scrape_pic($$$$) { my ($description, $app, $id) = @_; my $url = "https://m.facebook.com/$id"; print STDERR "$progname: scraping $description $id...\n" if ($verbose > 1); my $obj = fb_scrape_html ($description, $app, $url, 1) || ''; my ($title) = ($obj =~ m@\s*(.*?)@si); my ($full) = ($obj =~ m/]*HREF="([^\"<>]*?)">\s*View Full Size/si); # If there is a link to the full size image, that is actually a link # to the jpeg, not to a wrapper page. # if ($full) { $full = html_unquote($full); print STDERR "$progname: found $id full pic $full\n" if ($verbose > 3); return { description => $title, picture => $full }; } my ($src) = ($obj =~ m@ \b SRC=" ( [^\"<>?&]*? fbcdn\.net/ (?: v/ [^\"<>?&]*? \.(jpg|gif|png) \b | safe_image\.php ) [^\"<>]* ) " @six); if ($src) { $title = html_unquote ($title); $src = html_unquote ($src); print STDERR "$progname: found $id medium pic $src\n" if ($verbose > 3); return { description => $title, picture => $src }; } else { print STDERR "$progname: no pic for $url\n" if ($verbose > 2); return undef; } } # Permutes the array (reference) in place. "Fisher Yates Shuffle". # sub shuffle_array($) { my ($array) = @_; return $array unless (scalar(@$array) > 0); for (my $i = @$array; --$i;) { my $j = int rand ($i+1); next if ($i == $j); @$array[$i,$j] = @$array[$j,$i]; } } sub fb_load_newsfeed($$$$$$$$$) { my ($app, $mode, $since, $friends, $pages, $kill, $token, $save_cb, $return_listref) = @_; # Ok, so this crud stopped working on 4-Mar-2015: # # my $query = ('v2.2/me/home' . # #'?limit=1000' . # now throttles to 8 at a time, 35 total? # '?limit=100' . # now "500 please reduce" if more than this. # # but sometimes it just gives 500 and no # # error message. Sometimes limit=8 will work, # # sometimes limit=25 will work, then it changes # # again. Is it overloaded? WTF? # '&since=' . $since . # '&access_token=' . $token); # # my @e = fb_load_paged ("new feed", $query); # # # To debug something specific that has expired out of the stream: # # @e = fb_load_paged ("stream", "USER?fields=posts&access_token=$token"); # # # print STDERR "$progname: got " . scalar(@e) . " items\n" if ($verbose > 1); # Let's do it the hard way. my $total = (($mode ne 'pages' ? scalar(keys %$friends) : 0) + ($mode ne 'friends' ? scalar(keys %$pages) : 0)); my $n = 0; # my @e; my %last_updated; my $total_entries = 0; my $total_skipped = 0; for (my $i = 0; $i < 2; $i++) { if ($mode ne ($i == 0 ? 'pages' : 'friends')) { my $table = ($i == 0 ? $friends : $pages); # Let's do them in random order, in case we repeatedly hit a timeout # and never make it to the end of the alphabet. my @ids = keys %$table; shuffle_array (\@ids); foreach my $id (@ids) { my $user = $table->{$id}; my $name = $user->{name}; #next if ($debug_p && $name !~ m/DNA Lounge/s); #### debugging if ($name && $kill && kill_string_p ($kill, $name)) { print STDERR "$progname: omitting killfiled $name\n" if ($verbose > 1); next; } # This still works for "pages" but stopped working on "friends" in # Sep 2016 when FB stopped accepting the credentials for the iOS # app. "Normal" apps can't read your timeline (the feed of your # friends' posts) and there is no way to give them that permission. # June 2018: hitting rate limiting all the time when scraping pages # via graph API. So let's do them with HTML too, sigh. my $pct = sprintf("%.1f%%", ($n+1) * 100 / $total); my @f; eval { if (1 || $table == $friends) { @f = fb_scrape_newsfeed ("$name posts ($pct)", $app, $name, $user); } else { @f = fb_load_paged ("$name posts ($pct)", # "$id/feed" . # by page, or on page "$id/posts" . # by page only # '?limit=20' . # Doesn't work # '?since=' . $since . # Doesn't work # Necessary as of Feb 2018 '&fields=from,to,link,name,' . 'message,message_tags,object_id,' . 'story,story_tags,with_tags,' . 'picture,description,caption,' . 'created_time,source' . '&access_token=' . $token, 1, # Limit result pages 1); # ignore errors } }; if ($@) { my $e = $@; $e .= "\n" unless ($e =~ m/\n$/s); print STDERR "$progname: ERROR: $e"; next; } # Implement "since" by hand, sigh. # my $skipped = 0; foreach my $entry (@f) { my $date = $entry->{updated_time} || $entry->{created_time}; my $time_t = parse_date ($date, 1) || 0; $last_updated{$name} = $time_t if ($time_t > ($last_updated{$name} || 0)); my $txt = $entry->{txt}; if ($name && $kill && $txt && $table == $friends && # $txt =~ m/^[^\n]+ with \Q$name/i && $entry->{tagged_by} && kill_string_p ($kill, "$name tagged")) { print STDERR "$progname: omitting killfiled tagged $name\n" if ($verbose > 1); next; } # Whyyyyyyy if ($table == $pages && !$entry->{from}) { $entry->{from}->{name} = $name; } # "Tagged in a life event" doesn't have a date on it. if (!$date || $time_t >= $since) { #push @e, $entry; push @$return_listref, $entry; $total_entries++; } else { $skipped++; $total_skipped++; } } print STDERR "$progname: got " . scalar(@f) . " items total" . ($skipped ? "; skipped $skipped old" : "") . "\n" if ($verbose > 1); $n++; $save_cb->(); } } } if ($verbose > 2) { print STDERR "\nLast updated:\n\n"; my $now = time(); foreach my $name (sort { $last_updated{$a} <=> $last_updated{$b}} keys %last_updated) { my $d = $now - $last_updated{$name}; $d = int ($d / (60 * 60 * 24)); print STDERR " $name\t\t$d days ago\n"; } print STDERR "\n"; } print STDERR "$progname: feed has $total items total_entries," . " $total_skipped skipped\n" if ($verbose > 1); # Goes into @$return_listref # return @e; return undef; } # Returns a list of the entries in the output RSS file. # This is necessary because Facebook sometimes leaves shit out of the # stream: you load the stream twice, and some old entries might vanish. # So we don't ever let them vanish, and hopefully after a few runs, # we'll actually have the whole thing. # sub load_rss($) { my ($file) = @_; my $old = ''; if (open (my $in, '<:utf8', $file)) { local $/ = undef; # read entire file $old = <$in>; close $in; } $old =~ s/().*?$@$1@si if (@items); return @items; } my $lockfile = undef; my $lock_fd = undef; # Must be global or it is GC'ed and closed sub obtain_lock($$) { my ($outfile, $mode) = @_; return unless ($mode =~ m/^(both|pages|friends)$/s); $lockfile = "$outfile.LCK"; open ($lock_fd, '+>>', $lockfile) || error ("writing $lockfile: $!"); if (! flock ($lock_fd, LOCK_EX | LOCK_NB)) { my $age = time() - (stat($lock_fd))[9]; $lock_fd = undef; # Indicate that we have no lock # If we haven't been locked that long, exit silently. exit (1) if ($verbose == 0 && $age < 60 * 60 * 3); $age = sprintf("%d:%02d:%02d", $age/60/60, ($age/60)%60, $age%60); if ($debug_p) { print STDERR "already locked for $age: $lockfile\n"; } else { error ("already locked for $age: $lockfile"); } } else { my $now = time(); utime ($now, $now, $lockfile); print STDERR "$progname: locked $lockfile\n" if ($verbose); } } # Delete the lockfile when exit() is called. # This does not run if the process is killed with a signal. # END { if ($lock_fd) { # Not strictly necessary: the lock is released on exit or unlink. flock ($lock_fd, LOCK_UN) || error ("unlocking $lockfile: $!"); print STDERR "$progname: rm $lockfile\n" if ($verbose > 1); unlink $lockfile; $lock_fd = undef; } } sub generate_rss($$$$) { my ($app, $outfile, $mode, $since) = @_; obtain_lock ($outfile, $mode); my $now = time(); my $last_write = $now; my $write_every = 10; # Write the file every N seconds my ($token, $secret, $ocookie) = load_access_token ($app); # check_token ($app, $token, $secret); my $kill = load_killfile ($app); my $result; my %friends; my %likes; if (1||$mode ne 'pages') { if (0) { foreach my $endpoint ('friends', 'taggable_friends') { foreach my $f (fb_load_paged ($endpoint, "me/$endpoint?fields=id,link,name" . '&access_token=' . $token)) { $friends{$f->{id}} = $f; } } } else { foreach my $f (fb_scrape_friends($app)) { $friends{$f->{id}} = $f; } } print STDERR "$progname: " . scalar(keys %friends) . " friends\n" if ($verbose); } # if (1||$mode ne 'friends') { if ($mode ne 'friends') {#### # my @likes = fb_load_paged ("pages", # 'me/likes?fields=id,name,link' . # '&access_token=' . $token); # foreach my $f (@likes) { $likes{$f->{id}} = $f; } # print STDERR "$progname: " . scalar(@likes) . " pages\n" # if ($verbose); my $offset = 0; my $n = 1; while (1) { # my $url = ('https://www.facebook.com/ajax/pagelet/generic.php/' . # 'PagesBrowserLikedPagelet?dpr=0&__a=0&data={"offset":' . # $offset . '}'); my $url = ('https://m.facebook.com/pages/launchpoint/pages_request/' . '?category=liked_pages&offset=' . $offset); # print STDERR "$progname: loading $url\n" if ($verbose > 1); print STDERR "$progname: scraping page list html page $n...\n" if ($verbose); my $ua = $LWP::Simple::ua; my $res = $ua->get ($url); my $body = ($res && $res->content) || ''; if ($body =~ m/"errorSummary":"(.*?)"/si) { error ("scrape liked pages: $n: $url: $1"); } my $c2 = 0; # foreach my $chunk (split (/\}/, $body)) { foreach my $chunk (split (/\\u003Ca/, $body)) { # my ($pid) = ($chunk =~ m@"pageID":\s*(\d+)@si); # my ($pn) = ($chunk =~ m@"pageName":\s*"(.*?)"@si); my ($pid) = ($chunk =~ m@href=\\\"\\/([^<>\\\"]+)\\@si); my ($pn) = ($chunk =~ m@>([a-z]+[^<>\\\"\']+)@si); $pn = undef if ($pn && $pn =~ m/Show More/si); next unless ($pid && $pn); $pid = $1 if ($pid =~ m/\?id=(\d+)/si); $likes{$pid} = { id => $pid, name => $pn, link => 'https://www.facebook.com/' . $pid, }; $offset++; $c2++; } last unless $c2; $n++; } print STDERR "$progname: $offset pages\n" if ($verbose); } my @new_entries = (); my @e = (); # Callback to periodically dump out the .rss file. If we get an error # (such as a rate limit) at least dump out what we have. # my $save_cb = sub() { $now = time(); if ($now > $last_write + $write_every) { my @new_entries2 = (); my $total = @e; my $n = 0; foreach my $hash (@e) { my $pct = sprintf("%.1f%%", ($n+1) * 100 / $total); $n++; my $entry = make_rss_entry ('friends', $hash, \%friends, \%likes, $kill, $mode, $token, 0, 0, $pct); next unless $entry; push @new_entries2, $entry; } generate_rss_save ($app, $outfile, $mode, $since, $token, \%friends, \%likes, \@new_entries2); save_cookies ($app, $ocookie); $last_write = $now; } }; fb_load_newsfeed ($app, $mode, $since, \%friends, \%likes, $kill, $token, $save_cb, \@e); my $total = @e; my $n = 0; foreach my $hash (@e) { my $pct = sprintf("%.1f%%", ($n+1) * 100 / $total); $n++; my $entry = make_rss_entry ('friends', $hash, \%friends, \%likes, $kill, $mode, $token, 0, 0, $pct); next unless $entry; push @new_entries, $entry; } # Checkins don't show up on "me/home" so we have to read them separately. # But sometimes they do? # And sometimes the "stream" entry and the "checkin" entry don't have # the same URLs, so we get dups. But it's a choice between getting dups, # and missing things entirely. # my $checkins_p = 0; ##### apparently this doesn't work any more if ($mode ne 'pages' && $checkins_p) { # Not sure limit= is working here. I'm seeing days-old checkins. my $query = ('search?type=checkin' . # '&limit=' . $since . '&since=' . $since . '&access_token=' . $token); my @d = fb_load_paged ("checkins", $query); $result = \@d; foreach my $hash ($result ? @$result : ()) { # The $hash->{id} in a checkin doesn't work as /USER/posts/ID # and doesn't have a photo in it, so we can't identify it as # a photo post either. So instead let's get the graph info # about it (which does have a photo) and use that. # # Except it doesn't always have the photo. WTF. # my $oid = $hash->{id}; $hash = fb_load ("checkin", "$oid?&access_token=$token"); my $photop = defined ($hash->{picture}); $hash->{checkin_p} = 1; # Kludge my $entry = make_rss_entry ('checkins', $hash, \%friends, \%likes, $kill, $mode, $token, $photop, 0, undef); next unless $entry; push @new_entries, $entry; } } # Likewise, being tagged on photos. Need to use FQL for these! # How will I accomplish this when they finally turn FQL off? # my $photos_p = 0; # No longer works as of Sep 2016 if ($mode ne 'pages' && $photos_p) { print STDERR "$progname: loading photo tags...\n" if ($verbose); $result = fql ("photo tags", $token, "SELECT object_id,text,created FROM photo_tag WHERE subject IN (SELECT uid2 FROM friend WHERE uid1 = me()) AND created > $since", 1); # ignore errors $result = $result->{data}; foreach my $hash ($result ? @$result : ()) { my $id = $hash->{object_id}; my $tag = $hash->{text}; my $entry = make_photo_rss_entry ('photos', $id, $tag, \%friends, \%likes, $kill, $mode, $token, undef); next unless $entry; push @new_entries, $entry; } } $total = scalar(keys %friends); # Let's also list events that our friends are attending. # There doesn't seem to be a timestamp for *when* they clicked # "attending" on the event, so let's just leave them in the # feed for all future events. # my $events_p = 1; #### This no longer works with non-FB apps, Sep 2016. if (0 && $mode ne 'pages' && $events_p) { print STDERR "$progname: loading events...\n" if ($verbose); my $now = time(); my $n = 0; foreach my $id (keys %friends) { my $entry = $friends{$id}; my $name = $entry->{name}; my $fid = $entry->{id}; if (kill_string_p ($kill, $name) || kill_string_p ($kill, "$name events")) { print STDERR "$progname: omitting events for killfiled $name\n" if ($verbose > 2); next; } my $pct = sprintf("%.1f%%", ($n+1) * 100 / $total); foreach my $event (fb_load_paged ("$name events ($pct)", "$id/events" . '?access_token=' . $token, 1, # Limit result pages 1)) { # ignore errors my $title = $event->{name}; my $eid = $event->{id}; my $loc = $event->{location}; my $time = $event->{start_time}; my $going = $event->{rsvp_status}; my $time_t = parse_date ($time); my $timestr = POSIX::strftime ("%a, %b %d, %I:%M %p", localtime ($time_t)) if ($time_t); if (kill_string_p ($kill, $title)) { print STDERR "$progname: omitting killfiled event $name\n" if ($verbose > 2); next; } $going = 'maybe attending' if ($going eq 'unsure'); $going =~ s/^(.)/\U$1/s; my $desc = "$going: $title"; my $desc2 = $desc; $desc2 .= "\n At: $loc" if $loc; $desc2 .= "\n On: $timestr" if $timestr; if ($time_t < $now) { print STDERR "$progname: omitting past event: $desc\n" if ($verbose > 2); next; } if ($time_t > $now + (60 * 60 * 24 * 30 * 3)) { print STDERR "$progname: omitting far future event: $desc\n" if ($verbose > 2); next; } # Fake up a newsfeed entry for this. my %EE; my $E = \%EE; $E->{id} = "${fid}_${eid}"; # unique enough? $E->{from} = $entry; $E->{updated_time} = $time; $E->{message} = $desc2; $E->{caption} = $title; $E->{link} = 'https://www.facebook.com/events/' . $eid; $E = make_rss_entry ('events', $E, \%friends, \%likes, $kill, $mode, $token, 0, 0, undef); push @new_entries, $E; } $n++; } } # Let's lump posts to "groups" in with posts to "pages". #### This one still uses the API, not mobile scraping. # my $groups_p = 1; $groups_p = 0; #### Need to rewrite this to scrape, if I still give a shit if ($mode ne 'friends' && $groups_p) { # Save first generate_rss_save ($app, $outfile, $mode, $since, $token, \%friends, \%likes, \@new_entries); my @groups = fb_load_paged ("groups", 'me/groups?fields=id,name' . '&access_token=' . $token); #error ("no groups!") unless @groups; foreach my $g (@groups) { my $name = $g->{name}; my $id = $g->{id}; foreach my $hash (fb_load_paged ("group \"$name\"", "$id/feed" . '?since=' . $since . '&access_token=' . $token,, 1, # Limit result pages 0)) { # ignore errors my $entry = make_rss_entry ('groups', $hash, \%friends, \%likes, $kill, $mode, $token, 0, $name, undef); next unless $entry; push @new_entries, $entry; } } } generate_rss_save ($app, $outfile, $mode, $since, $token, \%friends, \%likes, \@new_entries); save_cookies ($app, $ocookie); } sub generate_rss_save($$$$$$) { my ($app, $outfile, $mode, $since, $token, $friends, $pages, $new_entries) = @_; # my $result = fb_load ("name", "me?fields=name&access_token=$token"); # my $my_name = $result->{name}; my $my_name = $app; my @rss = (); my %dups; foreach my $entry (sort { $b->[0] <=> $a->[0] } @$new_entries) { my ($tt, $url, $ehash, $item) = @$entry; if ($dups{$url}) { print STDERR "$progname: skipping dup entry for $url\n" if ($verbose > 1); next; } if ($dups{$ehash}) { print STDERR "$progname: skipping dup entry for $ehash ($url)\n" if ($verbose > 1); next; } $item =~ s/^\s+//s; $item =~ s/\s+$//s; $dups{$url} = $item; $dups{$ehash} = $item; push @rss, $item; if ($item =~ m/Malachi/si) { my $o = $item; $o =~ s/\n/\n\n/gs; #### print STDERR "$progname: #### $url $o ADD " . localtime() . "\n"; } } my $old_count = 0; my $updated_count = 0; my $unchanged_count = 0; my $expired_count = 0; my @old_entries = load_rss ($outfile); foreach my $item (@old_entries) { my ($date) = ($item =~ m@(.*?)<@si); my ($url) = ($item =~ m@]*>(.*?)<@si); next unless $url; my $dup = $dups{$url} || ''; $dup =~ s/^\s+//s; $dup =~ s/\s+$//s; $item =~ s/^\s+//s; $item =~ s/\s+$//s; # If the old and new entries differ only in their date, keep the old one. # Otherwise Feedly sometimes shows it to you again, even though the guid # is the same. # my ($odate) = ($dup =~ m@([^<>]+)@si); if ($odate && $date && $date ne $odate) { $item =~ s@()([^<>]+)@$1$odate@si; $date = $odate; } $date = parse_date ($date, 1); if ($date && $date < $since) { $expired_count++; print STDERR "$progname: expired old entry for $url\n" if ($verbose > 1); } elsif (! $dup) { $old_count++; push @rss, $item; if ($item =~ m/Malachi/si) { my $o = $item; $o =~ s/\n/\\n/gs; #### print STDERR "$progname: #### $url $o SAVED " . localtime() . "\n"; } print STDERR "$progname: preserved vanished old entry for $url\n" if ($verbose > 2); } elsif ($dup eq $item) { $unchanged_count++; if ($item =~ m/Malachi/si) { my $o = $item; $o =~ s/\n/\\n/gs; #### print STDERR "$progname: #### $url $o UNCHANGED " . localtime() . "\n"; } print STDERR "$progname: unchanged entry for $url\n" if ($verbose > 2); } elsif (!$date) { $expired_count++; if ($item =~ m/Malachi/si) { my $o = $item; $o =~ s/\n/\\n/gs; #### print STDERR "$progname: #### $url $o EXP ND " . localtime() . "\n"; } print STDERR "$progname: expired dateless entry for $url\n" if ($verbose > 1); } else { $updated_count++; if ($item =~ m/Malachi/si) { my $o = $item; $o =~ s/\n/\\n/gs; #### print STDERR "$progname: #### $url $o UPDATED " . localtime() . "\n"; } print STDERR "$progname: updated entry for $url\n" if ($verbose > 2); } } my $total_count = @rss; my $new_count = ($total_count - $old_count - $updated_count - $unchanged_count); my $desc = "$total_count entries"; $desc .= "; $new_count new"; $desc .= "; $updated_count updated"; $desc .= "; $unchanged_count unchanged"; $desc .= "; $old_count preserved"; $desc .= "; $expired_count expired"; my $oentries = join ('', @old_entries); my $nentries = join ('', @rss); print STDERR "$progname: WARNING: no entries: $outfile\n$desc\n" unless $total_count; if ($oentries eq $nentries) { print STDERR "$outfile: unchanged ($desc)\n" if ($verbose); } else { # Feed validator demands a . my $rss_url = 'http://www.facebook.com/'; my $rss_title = "${my_name}'s Facebook Feed"; # Validator wants this but it's useless. # my $self = $base_url . $file; my $rss = ("\n" . "\n" . " \n" . " $rss_url\n" . " $rss_title\n" . " $rss_title\n" . " en\n" . # " \n" . $nentries . " \n" . "\n"); $desc .= "; " . keys(%$friends) . " friends" if ($mode ne 'pages'); $desc .= "; " . keys(%$pages) . " pages" if ($mode ne 'friends'); $desc = " ($desc)"; my $file_tmp = sprintf("%s.%08x", $outfile, rand(0xFFFFFFFF)); open (my $out, '>', $file_tmp) || error ("$file_tmp: $!"); (print $out $rss) || error ("$file_tmp: $!"); close $out; if (!rename ("$file_tmp", "$outfile")) { unlink "$file_tmp"; error ("mv $file_tmp $outfile: $!"); } print STDERR "$progname: wrote $outfile$desc\n" if ($verbose); } } # Just generate a list of all friends and liked pages. For diffing. # sub list_friends($$) { my ($app, $outfile) = @_; my ($token, $secret) = load_access_token ($app); check_token ($app, $token, $secret); my $result = fb_load ("name", "me?fields=name&access_token=$token"); my $my_name = $result->{name}; my @friends = (); my @pages = (); my %dups; if (0) { foreach my $endpoint ('friends', 'taggable_friends') { foreach my $f (fb_load_paged ($endpoint, "me/$endpoint?fields=link,name" . '&access_token=' . $token)) { my $name = $f->{name}; my $url = $f->{link} || '?'; $url =~ s@app_scoped_user_id/@@s; push @friends, "$name\t$url" unless ($dups{$name}); $dups{$name} = 1; } } } else { my @ff = fb_scrape_friends($app); # It's 2020 and I have no friends now # error ("no friends!") unless @ff; foreach my $f (@ff) { push @friends, $f->{name} . "\t" . $f->{link}; } } my @pp = fb_load_paged ("pages", 'me/likes?fields=link,name' . '&access_token=' . $token); error ("no pages!") unless @pp; foreach my $f (@pp) { push @pages, $f->{name} . "\t" . $f->{link}; } @friends = sort { lc($a) cmp lc($b) } @friends; @pages = sort { lc($a) cmp lc($b) } @pages; my $body = join("\n", @friends) . "\n\n" . join("\n", @pages) . "\n"; if ($outfile) { my $tmp = "$outfile.tmp"; open (my $out, '>:utf8', $tmp) || error ("$outfile: $!"); print $out $body; if (!rename ($tmp, $outfile)) { unlink "$tmp"; error ("mv $tmp $outfile: $!"); } } else { print STDOUT $body; } } # If the URL is a known URL-shortener, open it to return the real URL. # If 'force_p', then chase all redirects even if unrecognized. # sub unpack_tinyurl($;$); sub unpack_tinyurl($;$) { my ($url, $force_p) = @_; my $re = '^https?://( tinyurl\.com | bit\.ly | j\.mp | t\.co | youtu\.be | goo\.gl | fb\.me | ow\.ly | glob\.ly | lnk\.to | apple\.co | amzn\.eu | amzn\.to | geni\.us | hyperurl\.co | po\.st | radi\.al | smarturl\.it | spoti\.fi | sptfy\.com | smarturl\.it | umgt\.de | itun\.es | jwz\.org/b | dnalounge\.com/b )/'; # We can rewrite this one without hitting the network. return $url if ($url =~ s@^\Qhttp://youtu.be/\E@http://www.youtube.com/watch?v=@si); if (!$force_p) { return $url unless ($url =~ m@$re@sx); } my $ua = LWP::UserAgent->new (max_redirect => 0); $ua->agent ("$progname/$version"); $ua->timeout (5); # Don't try too hard on "t.co". my $res = $ua->get ($url); my $ret = ($res && $res->code) || 'null'; if ($ret eq '301' || $ret eq '302') { my $loc = $res->header('Location'); print STDERR "$progname: unpacked: $url => $loc\n" if ($verbose > 4); if ($loc) { $url = $loc; return $url unless ($url =~ m@$re@sx); return unpack_tinyurl($url); } } return $url; } # Generate a list of URLs linked to by the "about" section of all liked pages: # Twitter, Instagram, etc. # sub list_urls($) { my ($app) = @_; my ($token, $secret) = load_access_token ($app); check_token ($app, $token, $secret); foreach my $f (fb_load_paged ("pages", 'me/likes?fields=link,name' . '&access_token=' . $token)) { my $name = $f->{name}; my $url = $f->{link}; next unless $url; $url =~ s@/+$@@s; $url .= '/about/'; print STDERR "$progname: scraping $url...\n" if ($verbose); my %urls; my $body = fb_scrape_html('about', $app, $url, 1); next unless $body; foreach my $a (($body =~ m@]+)>@gsi)) { my ($href) = ($a =~ m/\bhref=[\"\'](.*?)[\"\']/si); next unless $href; $href = html_unquote($href); ($href) = ($href =~ m@\bl\.php\?u=([^?&]+)@si); next unless $href; $href = url_unquote($href); my ($domain) = ($href =~ m@https?://([^/]+)@s); next if ($domain && $domain =~ m/\b( here\.com | smarturl\.it )\b/six); $href = unpack_tinyurl($href); $href =~ s@\?hl=.*$@@s; ($domain) = ($href =~ m@https?://([^/]+)@s); next if ($domain && $domain =~ m/\b( apple\.com | google\.com | amazon.com )\b/six); $urls{$href} = 1; } if (scalar(keys %urls)) { print STDOUT "$name\t" . join("\t", sort (keys %urls)) . "\n"; } } } sub load_one($$$) { my ($app, $url, $outfile) = @_; my ($token, $secret) = load_access_token ($app); my $kill = load_killfile ($app); check_token ($app, $token, $secret); my $result = fb_load ("name", "me?fields=name&access_token=$token"); my $my_name = $result->{name}; my %friends; # just ignore these for debugging my %likes; if (1) { my @friends = fb_load_paged ("friends", 'me/friends?fields=id,name,link' . '&access_token=' . $token); foreach my $f (@friends) { $friends{$f->{id}} = $f; } print STDERR "$progname: " . scalar(@friends) . " friends\n" if ($verbose); } my $mode = 'all'; my $id; $id = "${1}_${2}" if (!$id && $url =~ m@/(\d+)/posts/(\d+)@si); ($id) = ($url =~ m@fbid=(\d+)@si) unless $id; ($id) = ($url =~ m@php\?v=(\d+)@si) unless $id; ($id) = ($url =~ m@posts/(\d+)@si) unless $id; ($id) = ($url =~ m@set=[a-z]\.(\d+)@si) unless $id; ($id) = ($url =~ m@/([\d_]+)/?$@si) unless $id; error ("no ID in $url") unless $id; my $hash = fb_load ("debug", "$id?access_token=$token"); # $kill = undef; my $entry = make_rss_entry ('one', $hash, \%friends, \%likes, $kill, $mode, $token, 0, 0, undef); error ("no entry: $url") unless $entry; my ($time_t, $post_url, $ehash, $rss) = @$entry; error ("no entry: $url") unless $rss; my ($html) = ($rss =~ m@@s); error ("no HTML: $url") unless $html; $html =~ s@(<(P|BR|DIV|IMG|A)\b)@\n$1@gs; $html =~ s@(

)@\n$1\n@gs; $html =~ s@(]*>)@$1\n@gs; $html =~ s@( STYLE=)@\n$1@gs; $html =~ s/^/ /gm; $html = "" . localtime($time_t) . "
$url

\n\n$html\n"; if ($outfile) { my $tmp = "$outfile.tmp"; open (my $out, '>:utf8', $tmp) || error ("$outfile: $!"); print $out $html; if (!rename ($tmp, $outfile)) { unlink "$tmp"; error ("mv $tmp $outfile: $!"); } } else { print STDOUT $html; } } # Save any images that were parts of a Facebook conversation thread. # Returns the rewritten message to have an HREF to the local file name. # sub download_message_attachments($$$) { my ($outdir, $date, $msg) = @_; my ($date2, $name, $msg2) = ($msg =~ m/^(.*?)\t(.*?)\t(.*)$/s); my @lt = localtime($date); my $year = strftime ("%Y", @lt); my $time = strftime ("%m-%d-%H:%M:%S", @lt); $outdir .= '/Attachments'; my $ua = $LWP::Simple::ua; $ua->agent("$progname/$version"); $ua->timeout (10); my $count = 0; $msg2 =~ s@\b(https://[^\[\]\s\"\'(){}]+)@{ my ($url) = $1; if ($url =~ m! https:// (?: fbcdn- [^/]+ | [^/]+ \.fbcdn\.net | [^/]+ \.fbsbx\.com ) / [^?&]+? \.( p?jpe?g | gif | png | mov | mpe?g | m4[av] | mp[34] ) \b !six || # None of these seem to work, though. They require a cookie, and # I don't know how to get at them via the graph API. $url =~ m! https:// [^/]+ \.facebook\.com/ .*? /messaging/attachment.php\? !six ) { my $suf = $1 || 'mp4'; # assume "attachment.php" are only video I guess # since we don't have the content-type yet. my $fn0 = "$year/$time-$name" . ($count ? "-$count" : "") . ".$suf"; my $fn = "$outdir/$fn0"; my $href = ""; if (-f $fn) { print STDERR "$progname: $fn exists\n" if ($verbose > 1); $url = $href; } else { my $start = time(); my $retries = ($debug_p ? 1 : 20); my $delay = 2; my $err = undef; my $res = undef; my $body = undef; my $i; for ($i = 0; $i < $retries; $i++) { $res = $ua->get ($url); my $ret = ($res && $res->code) || 'null'; $err = ($ret eq '200' ? undef : "Error $ret"); $body = $res ? $res->content : undef; $err = "null body" if (!$err && !$body); last unless defined($err); sleep ($delay); $delay += 2; } my $len = length($body); if ($err) { print STDERR "$progname: $err after $i tries in " . (time() - $start) . " secs -- $url\n"; } elsif ($debug_p) { print STDERR "$progname: not writing: $fn ($len bytes)\n"; $url = $href; } else { mkdir ("$outdir") unless (-d "$outdir"); mkdir ("$outdir/$year") unless (-d "$outdir/$year"); open (my $out, '>:raw', $fn) || error ("$fn: $!"); print $out ($body); close $out; utime ($date, $date, $fn); print STDERR "$progname: wrote $fn ($len bytes)\n" if ($verbose); $url = $href; } } $count++; } else { print STDERR "$progname: not downloading URL $url\n" if ($verbose > 1); } $url; }@gsexi; return "$date2\t$name\t$msg2"; } sub load_messages($$$) { my ($app, $outdir, $since) = @_; $outdir =~ s@/+$@@gs; error ("no such directory: $outdir") unless (-d $outdir); my ($token, $secret) = load_access_token ($app); check_token ($app, $token, $secret); my $result = fb_load ("name", "me?fields=name&access_token=$token"); my $my_name = $result->{name}; # To re-download all of them back to the beginning of time, do this: # $since = undef; # But that shouldn't be necessary, since if the output file doesn't # exist, it always downloads all of them the first time. $since -= (60 * 60 * 24 * 30); # do a whole month # Can't ask for more than 100 per page, but it seems to blow up at # 225 items total, so use a smaller quantum to get as many as possible. my $maxlim = 10; my $args = ("me/threads" . "?limit=$maxlim" . "&messages.limit=$maxlim" . "&fields=id,link,updated_time,participants,messages" . "&access_token=$token"); # This does nothing: $args .= "&since=$since" if $since; # This does nothing: $args .= "&messages.since=$since" if $since; my @threads = fb_load_paged ("threads", $args, 0, # Don't limit result pages 1); # ignore errors foreach my $thread (@threads) { my $id = $thread->{id}; my $url = $thread->{link}; my $date = $thread->{updated_time}; my @names = (); my $pp = $thread->{participants}; $pp = $pp ? $pp->{data} : undef; foreach my $p ($pp ? @$pp : ()) { my $n = $p->{name} || $p->{id} || '???'; next if ($n eq $my_name); push @names, $n; } if (@names > 2) { @names = @names[0 .. 2]; push @names, "etc"; } my $title = join(', ', @names); $title =~ s/^\s+|\s+$//gs; $title = '?' unless $title; my $outfile; { my $n2 = $title; $n2 =~ s@[/:\\*?~]@ @gs; # illegal in file names $n2 =~ s/^\s+|\s+$//gs; $n2 = '?' unless $n2; $outfile = "$outdir/$n2.txt"; } my $since2 = $since; $since2 = undef if (! -f $outfile); # If this thread was last updated earlier than $since, we don't # need to process it at all. # if ($since2) { my $updated = parse_date ($thread->{updated_time}); if ($updated < $since2) { my $d = time() - $updated; $d = int ($d / (60 * 60 * 24)) . " days"; print STDERR "$progname: skipping $title: updated $d ago\n" if ($verbose > 1); next; } } my @mout = (); my $msgs = $thread->{messages}; my $mcount = 0; while (1) { my $next = $msgs->{paging} ? $msgs->{paging}->{next} : undef; $msgs = $msgs ? $msgs->{data} : undef; last unless ($msgs && @$msgs); $mcount += scalar(@$msgs); print STDERR "$progname: thread: $title ($mcount, $id)\n" if ($verbose > 1); my $last_date = time(); foreach my $msg ($msgs ? @$msgs : ()) { my $mid = $msg->{id}; my $mfrom = $msg->{from}->{name} || $msg->{from}->{id} || '?'; my $mdate = $msg->{created_time}; my $mbody = $msg->{message} || ''; if ($msg->{attachments}) { foreach my $d (@{$msg->{attachments}->{data}}) { my $url = $d->{image_data} ? $d->{image_data}->{url} : undef; if (!$url && $d->{id}) { my $id2 = $mid; $id2 =~ s/^m_//gs; $url = ("https://www.facebook.com" . "/ajax/messaging/attachment.php" . "?attach_id=" . $d->{id} . "&mid=" . $id2); # "&hash=..." but it seems unnecessary } if (! $url) { print STDERR "$progname: unparsable attachment: " . Dumper($msg); } else { $url =~ s@^.*\.facebook\.com/l\.php\?.*\bu=([^&]+).*$@{ url_unquote ($1); }@sexi; $mbody .= "\n$url"; } } } if ($msg->{shares}) { foreach my $d (@{$msg->{shares}->{data}}) { my $title = $d->{name} || ''; my $url = $d->{link} || ''; $url =~ s@^.*\.facebook\.com/l\.php\?.*\bu=([^&]+).*$@{ url_unquote ($1); }@sexi; $mbody .= "\nShare: \"$title\" $url"; } } $mdate = parse_date ($mdate); $last_date = $mdate if ($mdate < $last_date); my $d2 = POSIX::strftime ("%a %d %b %Y %I:%M %p", localtime ($mdate)); $mbody =~ s/\n/\n\t/gs; $mbody =~ s/^\s+|\s+$//gs; $mbody = "$d2\t$mfrom\t$mbody"; push @mout, [ $mdate, $mbody ]; } # If we have paged back through history far enough to reach # messages older than $since, we can stop. # if ($since2 && $last_date < $since2) { my $d = time() - $last_date; $d = int ($d / (60 * 60 * 24)) . " days"; print STDERR "$progname: bailing on $title: $d ago\n" if ($verbose > 1); last; } last unless $next; $next =~ s/\b(limit=)\d+/${1}$maxlim/gs; # bigger chunks $msgs = fb_load ("messages", $next, undef, 1); # This is terrible. Some threads have IDs like "t_01234567890" # but some have base64 blobs like "t_ulTK711pxxp4LpKp1f/fOQ". # Problem is, that slash means you can't use it as a graph # component. And if you encode it as %2F, it's decoded anyway # and you get the same 400 error. So there's simply no way to # load anything beyond the first page of a thread that happens # to have a literal "/" in its ID. # # But if there are less than $maxlim messages total then we # didn't need to go into paging anyway, so suppress the warning. # print STDERR "$progname: unloadable paging: $next\n" if (!defined($msgs) && $mcount >= $maxlim); } @mout = sort { $a->[0] <=> $b->[0] } @mout; my $old = ''; if (open (my $in, '<:utf8', $outfile)) { local $/ = undef; # read entire file $old = <$in>; close $in; } $old =~ s/\n\t/\r/gs; my @old = split(/\n/, $old); my %old; my $old_count = 0; foreach my $o (@old) { $o =~ s/\r/\n\t/gs; $old{$o} = 1; $old_count++; } my $changed = 0; foreach my $msg (@mout) { my $date = $msg->[0]; $msg = $msg->[1]; next if ($old{$msg}); $msg = download_message_attachments ($outdir, $date, $msg); next if ($old{$msg}); $old .= $msg . "\n"; print STDERR "$progname: new: $msg\n" if ($verbose > 2); $changed++; } if ($changed) { if (! $debug_p) { my $tmp = "$outfile.tmp"; open (my $out, '>:utf8', $tmp) || error ("$outfile: $!"); print $out $old; if (!rename ($tmp, $outfile)) { unlink "$tmp"; error ("mv $tmp $outfile: $!"); } } print STDERR "$progname: " . ($debug_p ? "not writing" : "wrote") . " $outfile" . " ($changed new, $old_count old)\n" if ($verbose); } elsif ($verbose > 1) { print STDERR "$progname: $outfile unchanged\n"; } } } sub delete_old_posts($) { my ($app) = @_; my $since = time() - (60 * 60 * 24 * 30 * 6); # N months my ($token, $secret) = load_access_token ($app); # check_token ($app, $token, $secret); my $result = fb_load ("id", "me?fields=id&access_token=$token"); my $uid = $result->{id}; my @posts = fb_load_paged ("posts", "$uid/posts" . '?fields=' . 'story,message,description,' . 'created_time,updated_time' . "&access_token=$token", 0, # Don't limit result pages 0); foreach my $p (@posts) { my $id = $p->{id}; my $date = str2time($p->{updated_time} || $p->{created_time}); my $txt = $p->{message} || $p->{description} || $p->{story} || '?'; $txt =~ s/^(.{40}).+$/$1.../s; my $ds = $date ? strftime("%d-%b-%Y", localtime($date)) : 'ERROR'; if (! $date) { error ("unparsable: " . Dumper($p)); } elsif ($date >= $since) { print STDERR "$progname: keeping: $id: $ds: $txt\n" if ($verbose); } elsif ($debug_p) { print STDERR "$progname: would have deleted: $id: $ds: $txt\n" if ($verbose); } else { my $ret = fb_load ("delete", "$id?access_token=$token", { method => 'delete' }, 2); if ($ret) { print STDERR "$progname: deleted: $id: $ds: $txt\n" if ($verbose); } else { print STDERR "$progname: deleting $id FAILED: $ds: $txt\n"; } } } } sub error($) { my ($err) = @_; print STDERR "$progname: $err\n"; exit 1; #use Devel::StackTrace; print STDERR "\n\n#####\n" . Devel::StackTrace->new->as_string; die $err; } sub usage() { print STDERR "usage: $progname [--verbose]\n" . "\t\t\t[ --pages-only | --friends-only | --messages | --list | --delete ]\n" . "\t\t\t[ --debug [ URL ]]\n" . "\t\t\tappname outfile\n"; print STDERR "usage: $progname --generate-session\n"; exit 1; } sub main() { my ($app, $mode, $gen_p, $file, $since, $debug_url); binmode (STDOUT, ':utf8'); binmode (STDERR, ':utf8'); $LWP::Simple::ua->agent("$progname/$version"); $mode = 'both'; $since = time() - (60 * 60 * 24 * 2); # 2 days ago while ($#ARGV >= 0) { $_ = shift @ARGV; if (m/^--?verbose$/) { $verbose++; } elsif (m/^-v+$/) { $verbose += length($_)-1; } elsif (m/^--?pages?(-only)?$/) { $mode = 'pages'; } elsif (m/^--?friends?(-only)?$/) { $mode = 'friends'; } elsif (m/^--?messages?$/) { $mode = 'messages'; } elsif (m/^--?urls?$/) { $mode = 'urls'; } elsif (m/^--?list?$/) { $mode = 'list'; } elsif (m/^--?delete?$/) { $mode = 'delete'; } elsif (m/^--?gen(erate(-session)?)?$/) { $gen_p = 1; } elsif (m/^--?debug$/) { $debug_p++; my $a = $ARGV[0]; if ($a && $a =~ m/^https?:/s) { $debug_url = $a; shift @ARGV; } } elsif (m/^-./) { usage; } elsif (!$app) { $app = $_; } elsif (!$file) { $file = $_; } else { usage; } } $Data::Dumper::Indent = 1; $Data::Dumper::Terse = 1; $Data::Dumper::Useqq = 1; $Data::Dumper::Quotekeys = 0; $Data::Dumper::Pair = "\t=> "; $Data::Dumper::Pad = " "; # Don't print "bless( do{\(my $o = 0)}, 'JSON::PP::Boolean' )" for "false". $JSON::PP::true = 1; $JSON::PP::false = 0; $file = undef if ($file && $file eq '-'); if ($verbose > 4) { my $ua = $LWP::Simple::ua; $ua->add_handler("request_send", sub { shift->dump; return }); $ua->add_handler("response_done", sub { shift->dump; return }); } if ($gen_p) { facebook_generate_session($app); } elsif ($mode eq 'list') { usage unless $app; list_friends ($app, $file); } elsif ($mode eq 'urls') { usage unless $app; usage if $file; list_urls ($app); } elsif ($mode eq 'delete') { usage unless $app; usage if $file; delete_old_posts ($app); } else { usage unless $app; usage unless ($file || $debug_url); if ($debug_url) { load_one ($app, $debug_url, $file); } elsif ($mode eq 'messages') { load_messages ($app, $file, $since); } else { generate_rss ($app, $file, $mode, $since); } } } eval { main(); }; if ($@) { my $e = $@; $e .= "\n" unless ($e =~ m/\n$/s); my $t = strftime ("%I:%M %p", localtime); print STDERR "$progname: $t: $e\n"; exit (1); } exit 0;