#!/usr/bin/perl -w # Copyright © 2007, 2008 Jamie Zawinski # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. # # Given an RSS file, creates a Podcast XML file from it, inlining any # movies or MP3 files that it finds in the content of the original feed. # # Created: 23-Jul-2007. require 5; use diagnostics; use strict; my $progname = $0; $progname =~ s@.*/@@g; my $version = q{ $Revision: 1.6 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; my $verbose = 0; # Returns true if the two files differ (by running "cmp") # sub cmp_files($$) { my ($file1, $file2) = @_; my @cmd = ("cmp", "-s", "$file1", "$file2"); print "$progname: executing \"" . join(" ", @cmd) . "\"\n" if ($verbose > 2); system (@cmd); my $exit_value = $? >> 8; my $signal_num = $? & 127; my $dumped_core = $? & 128; error ("$cmd[0]: core dumped!") if ($dumped_core); error ("$cmd[0]: signal $signal_num!") if ($signal_num); return $exit_value; } # If the two files differ: # mv file2 file1 # else # rm file2 # sub rename_or_delete($$) { my ($file, $file_tmp) = @_; my $changed_p = cmp_files ($file, $file_tmp); if ($changed_p) { if (!rename ("$file_tmp", "$file")) { unlink "$file_tmp"; error ("mv $file_tmp $file: $!"); } print STDERR "$progname: wrote $file\n" if ($verbose); } else { unlink "$file_tmp" || error ("rm $file_tmp: $!\n"); print STDERR "$progname: $file unchanged\n" if ($verbose > 1); print STDERR "$progname: rm $file_tmp\n" if ($verbose > 2); } } # Write the given body to the file, but don't alter the file's # date if the new content is the same as the existing content. # sub write_file_if_changed($$) { my ($outfile, $body) = @_; local *OUT; my $file_tmp = "$outfile.tmp"; open(OUT, ">$file_tmp") || error ("$file_tmp: $!"); print OUT $body || error ("$file_tmp: $!"); close OUT || error ("$file_tmp: $!"); rename_or_delete ("$outfile", "$file_tmp"); } sub url_quote($) { my ($u) = @_; $u =~ s|([^-a-zA-Z0-9.\@/_\r\n])|sprintf("%%%02X", ord($1))|ge; return $u; } sub html_quote($) { my ($u) = @_; $u =~ s/&/&/g; $u =~ s//>/g; $u =~ s/\"/"/g; return $u; } sub cheesecast($$) { my ($infile, $outfile) = @_; local *IN; open (IN, "<$infile") || error ("$infile: $!"); my $rss = join(' ', ); close IN; $rss =~ s/\n/ /gsi; $rss =~ s/(<(entry|item)\b)/\n$1/gsi; my @items = split("\n", $rss); shift @items; my @new_items = (); foreach my $item (@items) { $_ = $item; my ($title) = m@]*>([^>]*)@s; $title = 'untitled' unless $title; my ($author) = m@]*>\s*]*>([^<>]*)@s; ($author) = m@]*>([^<>]*)@s unless $author; # my $subtitle = ''; # my $summary = ''; my ($date) = m@]*>([^<>]*)@s; ($date) = m@]*>([^<>]*)@s unless ($date); $date = '' unless ($date); # my $keywords = ''; #### my ($html) = m@]*>\s*(.*?)@s; ($html) = m@]*>\s*(.*?)@s unless ($html); ($html) = m@]*>\s*(.*?)@s unless ($html); $title =~ s@*\s*(\s*)?$@$1@gs; if (! defined($html)) { print STDERR "$progname: $infile: no body for \"$title\"\n"; next; } $html =~ s@@$1@gs; $html =~ s@<@<@gs; $html =~ s@>@>@gs; $html =~ s@&@&@gs; my @urls = (); $html =~ s!\b(http:[^\'\"\s]+)!{push @urls, $1; $1;}!gxse; print STDERR "$progname: $title: " . ($#urls+1) . " urls\n" if ($verbose > 1); my ($vid_mov, $vid_wmv, $vid_flv, $vid_fla, $vid_mp3, $vid_m4a, $vid_m4v); foreach my $url (@urls) { print STDERR "$progname: url: $url\n" if ($verbose > 2); if ($url =~ m@\.(mov|mp4)$@) { $vid_mov = $url; } elsif ($url =~ m@\.(wm[va])$@) { $vid_wmv = $url; } elsif ($url =~ m@\.(flv)$@) { $vid_flv = $url; } elsif ($url =~ m@\.(mp3)$@) { $vid_mp3 = $url; } elsif ($url =~ m@\.(m4a)$@) { $vid_m4a = $url; } elsif ($url =~ m@\.(m4v)$@) { $vid_m4v = $url; } elsif ($url =~ m@youtube\.com/get_video@) { my ($id) = ($url =~ m@id=([^<>?&;\"\']+)@si); $vid_fla = "http://www.youtube.com/v/$id"; } elsif ($url =~ m@youtube\.com/v/([^/&;\s]+).*$@) { $vid_fla = "http://www.youtube.com/v/$1"; } } $html =~ s@]*>@\n\n@gsi; $html =~ s@@\n@gsi; $html =~ s@<[^<>]*>@ @gsi; my $summary = html_quote ($html); my ($duration) = ($html =~ m@Duration:\s+([\d:]+)@si); $duration = 0 unless defined ($duration); my $length = 0; my ($vid, $mp3); if ($vid_mov) { $vid = [ $vid_mov, 'video/quicktime' ]; } elsif ($vid_wmv) { $vid = [ $vid_wmv, 'video/x-ms-wmv' ]; } elsif ($vid_fla) { $vid = [ $vid_fla, 'application/x-shockwave-flash' ]; } elsif ($vid_flv) { $vid = [ $vid_flv, 'video/flv' ]; } elsif ($vid_m4v) { $vid = [ $vid_m4v, 'video/m4v' ]; } if ($vid_mp3) { $mp3 = [ $vid_mp3, 'audio/mpeg' ]; } elsif ($vid_m4a) { $mp3 = [ $vid_m4a, 'audio/mpeg' ]; } print STDERR "$progname: no usable URLs!\n" if ($verbose > 1 && !($vid || $mp3)); foreach my $pair ($vid, $mp3) { next unless $pair; my ($url, $ct) = @$pair; print STDERR "$progname: using: $url\n" if ($verbose > 1); $url =~ s/&/&/g; push @new_items, join ("\n ", "", "$title", "$author", # "$subtitle", "($ct) $summary", "", "$url", "$date", "$duration", # "$keywords", ""); } } $_ = $rss; s/<(entry|item)\b.*$//gs; my ($channel_title) = m@]*>([^<>]*)@s; my ($base_url) = m@]*text/html\b[^<>]*href=['\"]([^<>'\"]+)['\"]@s; ($base_url) = m@]*>([^<>]*)@s unless $base_url; # my $copyright = ''; # my $channel_subtitle = ''; my ($channel_author) = m@]*>\s*]*>([^<>]*)@s || ''; # my $channel_summary = ''; my $channel_desc = $channel_title; my $channel_owner = $channel_author; my $channel_email = 'unknown@example.com'; # my $channel_logo = ''; my $channel_cat = 'unknown'; my $output = join ("\n", ("", "", "", "$channel_title", "$base_url", "en-us", # "$copyright", # "$channel_subtitle", "$channel_author", # "$channel_summary", "$channel_desc", "", "$channel_owner", "$channel_email", "", # "", "", "", join ("\n", @new_items), "", "", "")); write_file_if_changed ($outfile, $output); } sub error($) { my ($err) = @_; print STDERR "$progname: $err\n"; exit 1; } sub usage() { print STDERR "usage: $progname [--verbose] infile outfile\n"; exit 1; } sub main() { my ($in, $out); while ($#ARGV >= 0) { $_ = shift @ARGV; if ($_ eq "--verbose") { $verbose++; } elsif (m/^-v+$/) { $verbose += length($_)-1; } elsif (m/^-./) { usage; } elsif (! defined($in)) { $in = $_; } elsif (! defined($out)) { $out = $_; } else { usage; } } cheesecast ($in, $out); } main(); exit 0;