#!/usr/bin/perl -w # Copyright © 2006, 2007 Jamie Zawinski # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. # # Downloads the large images from a "gallery" gallery. # This is necessary because the file structure used by that abhorrent # software is stupid and non-hierarchical, meaning there's no "wget" # incantation that can mirror it without mirroring the entire site. # # (The shitty software to which I refer is http://gallery.sourceforge.net/) # # Created: 29-Dec-2006. require 5; use diagnostics; use strict; my $progname = $0; $progname =~ s@.*/@@g; my $version = q{ $Revision: 1.3 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; my $verbose = 1; # like system() but checks errors. # sub safe_system { my (@cmd) = @_; print STDOUT "$progname: executing " . join(' ', @cmd) . "\n" if ($verbose > 3); system @cmd; my $exit_value = $? >> 8; my $signal_num = $? & 127; my $dumped_core = $? & 128; error ("$cmd[0]: core dumped!") if ($dumped_core); error ("$cmd[0]: signal $signal_num!") if ($signal_num); # error ("$cmd[0]: exited with $exit_value!") if ($exit_value); print STDERR "$progname: '" . join(" ", @cmd) . "' exited with $exit_value!\n" if ($exit_value); } sub parse_images($$$) { my ($base_url, $url, $body) = @_; my @imgs = (); my $head_re = qr/$base_url/; $body =~ s/\s+/ /gs; $body =~ s/]*href=\"([^<>\"]+)\"@si; next unless $url2; next unless ($url2 =~ m@^$head_re/([^/]+)$@); $url2 =~ s/\?.*$//; next if ($dup{$url2}); $dup{$url2} = 1; push @imgs, $url2; print STDERR "$progname: queue: $url2\n" if ($verbose > 2); } error ("no images on $url") unless ($#imgs >= 0); return @imgs; } sub download_images(@) { my (@imgs) = @_; foreach my $img (@imgs) { $img =~ s@/gallery/@/albums/@; $img .= ".jpg"; my ($file) = ($img =~ m@^.*/([^/]+)$@); if (-f $file) { print STDERR "$progname: $file already exists\n"; } else { print STDERR "$progname: img: $img\n" if ($verbose); safe_system ("wget", "-q", $img); } } } sub download_gallery($) { my ($base_url) = @_; my $url = $base_url; my @imgs = (); print STDERR "$progname: parsing $url\n" if ($verbose > 1); my $body = `wget -qO- "$url"`; @imgs = parse_images ($base_url, $url, $body); $body =~ s/\s+/ /gs; $body =~ s/]*href=\"([^<>\"]+)\"@si; next unless $url2; next if ($url2 !~ m/\?page=/); next if ($dup{$url2}); $dup{$url2} = 1; push @pages, $url2; } foreach $url (@pages) { print STDERR "$progname: parsing $url\n" if ($verbose > 1); $body = `wget -qO- "$url"`; push @imgs, parse_images ($base_url, $url, $body); } my $n = $#pages+2; print STDERR "$progname: $n page" . ($n == 1 ? "" : "s") . "\n" if ($verbose); download_images (@imgs); } sub error($) { my ($err) = @_; print STDERR "$progname: $err\n"; exit 1; } sub usage() { print STDERR "usage: $progname [--verbose] gallery-url\n"; exit 1; } sub main() { my $url; while ($#ARGV >= 0) { $_ = shift @ARGV; if ($_ eq "--verbose") { $verbose++; } elsif (m/^-v+$/) { $verbose += length($_)-1; } elsif (m/^-./) { usage; } elsif (!$url) { $url = $_; } else { usage; } } usage unless $url; download_gallery ($url); } main(); exit 0;