#!/usr/local/bin/perl # # David MacKay Jan 2000 # # itap.p # # reads in a file and reports # collisions of encodings. $verbose = 0 ; eval "\$$1=\$2" while @ARGV && $ARGV[0]=~ /^(\w+)=(.*)/ && shift; $i = 0 ; $c = 0 ; $clash = 0 ; while(<>) { s/^\s+// ; s/\s*\n// ; $word = uc($_) ; if (!$seen{$word} ) { $i ++ ; $seen{$word} = 1 ; $code = &encode($word) ; print $code,"\n" if ($verbose) ; if (!$seen{$code} ) { $c ++ ; $seen{$code} = $word ; } else { print "clash of $word ($code) with $seen{$code}\n" if ($verbose ) ; $clash ++ ; $clashh{$code} ++ ; $seen{$code} .= ", $word" ; } } else { print "skipping $word\n" if ($verbose) ; $skipped ++ ; } # } print STDERR "skipped $skipped words\n" if $skipped ; print STDERR "Read $i words and found $c distinct codes and $clash clashes\n" ; print STDOUT "Read $i words and found $c distinct codes and $clash clashes\n" ; foreach $cl (keys clashh) { $announce = " $cl : \t $seen{$cl} \n" ; # if ( $clashh{$cl} == 1 ) { $oneclash .= $announce ; } elsif ( $clashh{$cl} == 2 ) { $twoclash .= $announce ; } elsif ( $clashh{$cl} == 3 ) { $threeclash .= $announce ; } elsif ( $clashh{$cl} == 4 ) { $fourclash .= $announce ; } elsif ( $clashh{$cl} == 5 ) { $fiveclash .= $announce ; } elsif ( $clashh{$cl} == 6 ) { $sixclash .= $announce ; } else { $moreclash .= $announce ; } } print "
\n" ; print $moreclash ; print "\n" ; print "
\n" ; print $sixclash ; print "\n" ; print "
\n" ; print $fiveclash ; print "\n" ; print "
\n" ; print $fourclash ; print "\n" ; print "
\n" ; print $threeclash ; print "\n" ; print "
\n" ; print $twoclash ; print "\n" ; print "
\n" ; print $oneclash ; print "\n" ; sub encode { ($_,$junk) = @_ ; s/[ABC]/2/g; s/[DEF]/3/g; s/[GHI]/4/g; s/[JKL]/5/g; s/[MNO]/6/g; s/[PQRS]/7/g; s/[TUV]/8/g; s/[WXYZ]/9/g; return $_ ; }