#!/usr/bin/perl -w # Copyright © 2007 Jamie Zawinski # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. # # Does some sanity-checking on the ID3 tags in a given directory to attempt # to detect typos and trivial spelling differences (e.g., "Foo" vs "The Foo") # # Created: 14-Sep-2007. require 5; use diagnostics; use strict; use MP3::Tag; my $progname = $0; $progname =~ s@.*/@@g; my $version = q{ $Revision: 1.2 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; my $verbose = 0; my %artists; sub simplify_artist($) { my ($str) = @_; $str = lc($str); my $orig = $str; 1 while ($str =~ s/\b(a|an|and|in|of|on|for|the|with|dj|los|le|les|la)\b//gi); $str =~ s/[^a-z\d]//g; # lose non-alphanumeric $str =~ s/(.)\1+/$1/g; # collapse consecutive letters ("xx" -> "x") $str = $orig if ($str eq ''); return $str; } #my $debug_count=0; sub id3_check($); sub id3_check($) { my ($file) = @_; # return if ($debug_count++ > 200); $file =~ s@/+$@@; if (-l $file) { print STDERR "$progname: skipping symlink: $file\n" if ($verbose > 1); } elsif (-d $file) { local *DIR; opendir (DIR, $file) || error ("$file: $!"); my @files = readdir (DIR); closedir DIR; foreach (@files) { next if (m/^\./); $_ = "$file/$_"; id3_check ($_); } } elsif ($file =~ m/\.mp3$/) { my $mp3 = MP3::Tag->new($file); error ("$file: unreadable?") unless ($mp3); $mp3->get_tags(); if (! exists $mp3->{ID3v2}) { print STDERR "$progname: $file: no id3v2 tags\n"; return; } my $artist = $mp3->{ID3v2}->get_frame('TPE1'); if (!defined ($artist)) { my $f2 = $file; $f2 =~ s@^.*/([^/]*/[^/]*/[^/]*)$@$1@s; print STDERR "$progname: no artist: $f2\n" if ($verbose); } else { my $artist2 = simplify_artist($artist); my $valP = $artists{$artist2}; my %val = $valP ? %$valP : (); $val{$artist} = 1 + ($val{$artist} || 0); $artists{$artist2} = \%val; } $mp3->close(); } elsif ($verbose > 1) { print STDERR "$progname: skipping non-mp3: $file\n"; } } sub summarize() { foreach my $key (sort keys(%artists)) { my $val = $artists{$key}; my %names = %$val; my $count = keys(%names); if ($count == 1) { $key = (keys(%names))[0]; print STDERR "$progname: OK: \"$key\"\n" if ($verbose > 3); } else { my @txt = (); foreach (sort keys(%names)) { push @txt, "\"$_\" ($names{$_})"; } print STDERR "$progname: mismatch: " . join (', ', @txt) . "\n"; } } } sub error($) { my ($err) = @_; print STDERR "$progname: $err\n"; exit 1; } sub usage() { print STDERR "usage: $progname [--verbose] mp3_files...\n"; exit 1; } sub main() { my @files = (); while ($#ARGV >= 0) { $_ = shift @ARGV; if ($_ eq "--verbose") { $verbose++; } elsif (m/^-v+$/) { $verbose += length($_)-1; } elsif (m/^-./) { usage; } else { push @files, $_; } } usage unless ($#files >= 0); foreach (@files) { id3_check ($_); } summarize(); } main(); exit 0;