Article 11097 of comp.lang.perl:
Path: feenix.metronet.com!news.ecn.bgu.edu!usenet.ins.cwru.edu!howland.reston.ans.net!europa.eng.gtefsd.com!MathWorks.Com!noc.near.net!newshost.meiko.com!not-for-mail
From: mike@meiko.com (Mike Stok)
Newsgroups: comp.lang.perl
Subject: soundex.pl
Date: 2 Mar 1994 09:47:16 -0500
Organization: Meiko Scientific, Inc., MA
Lines: 255
Message-ID: <2l28tk$6vc@hibbert.meiko.com>

[ Archivists note: this item has been replaced with the
  latest version, at the authors request. ]


#!/bin/sh
# This is a shell archive (produced by shar 3.49)
# To extract the files from this archive, save it to a file, remove
# everything above the "!/bin/sh" line above, and type "sh file_name".
#
# made 03/24/1994 00:35 UTC by Mike.Stok@meiko.concord.ma.us
# Source directory /tmp_mnt/develop/sw/misc/mike/soundex
#
# existing files will NOT be overwritten unless -c is specified
#
# This shar contains:
# length  mode       name
# ------ ---------- ------------------------------------------
#   1677 -r--r--r-- soundex.pl
#   2408 -r-xr-xr-x soundex.t
#
# ============= soundex.pl ==============
if test -f 'soundex.pl' -a X"$1" != X"-c"; then
	echo 'x - skipping soundex.pl (File already exists)'
else
echo 'x - extracting soundex.pl (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'soundex.pl' &&
package soundex;
X
;# $Id: soundex.pl,v 1.2 1994/03/24 00:30:27 mike Exp $
;#
;# Implementation of soundex algorithm as described by Knuth in volume
;# 3 of The Art of Computer Programming, with ideas stolen from Ian
;# Phillips <ian@pipex.net>.
;#
;# Mike Stok <Mike.Stok@meiko.concord.ma.us>, 2 March 1994.
;#
;# Knuth's test cases are:
;# 
;# Euler, Ellery -> E460
;# Gauss, Ghosh -> G200
;# Hilbert, Heilbronn -> H416
;# Knuth, Kant -> K530
;# Lloyd, Ladd -> L300
;# Lukasiewicz, Lissajous -> L222
;#
;# $Log: soundex.pl,v $
;# Revision 1.2  1994/03/24  00:30:27  mike
;# Subtle bug (any excuse :-) spotted by Rich Pinder <rpinder@hsc.usc.edu>
;# in the way I handles leasing characters which were different but had
;# the same soundex code.  This showed up comparing it with Oracle's
;# soundex output.
;#
;# Revision 1.1  1994/03/02  13:01:30  mike
;# Initial revision
;#
;#
;##############################################################################
X
;# $soundex'noCode is used to indicate a string doesn't have a soundex
;# code, I like undef other people may want to set it to 'Z000'.
X
$noCode = undef;
X
;# main'soundex
;#
;# usage:
;#
;# @codes = &main'soundex (@wordList);
;# $code = &main'soundex ($word);
;#
;# This strenuously avoids $[
X
sub main'soundex
{
X  local (@s, $f, $fc, $_) = @_;
X
X  foreach (@s)
X  {
X    tr/a-z/A-Z/;
X    tr/A-Z//cd;
X
X    if ($_ eq '')
X    {
X      $_ = $noCode;
X    }
X    else
X    {
X      ($f) = /^(.)/;
X      tr/AEHIOUWYBFPVCGJKQSXZDTLMNR/00000000111122222222334556/;
X      ($fc) = /^(.)/;
X      s/^$fc+//;
X      tr///cs;
X      tr/0//d;
X      $_ = $f . $_ . '000';
X      s/^(.{4}).*/$1/;
X    }
X  }
X
X  wantarray ? @s : shift @s;
}
X
1;
SHAR_EOF
chmod 0444 soundex.pl ||
echo 'restore of soundex.pl failed'
Wc_c="`wc -c < 'soundex.pl'`"
test 1677 -eq "$Wc_c" ||
	echo 'soundex.pl: original size 1677, current size' "$Wc_c"
fi
# ============= soundex.t ==============
if test -f 'soundex.t' -a X"$1" != X"-c"; then
	echo 'x - skipping soundex.t (File already exists)'
else
echo 'x - extracting soundex.t (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'soundex.t' &&
#!./perl
;#
;# $Id: soundex.t,v 1.2 1994/03/24 00:30:27 mike Exp $
;#
;# test module for soundex.pl
;#
;# $Log: soundex.t,v $
;# Revision 1.2  1994/03/24  00:30:27  mike
;# Subtle bug (any excuse :-) spotted by Rich Pinder <rpinder@hsc.usc.edu>
;# in the way I handles leasing characters which were different but had
;# the same soundex code.  This showed up comparing it with Oracle's
;# soundex output.
;#
;# Revision 1.1  1994/03/02  13:03:02  mike
;# Initial revision
;#
;#
X
require '../lib/soundex.pl';
X
$test = 0;
print "1..13\n";
X
while (<DATA>)
{
X  chop;
X  next if /^\s*;?#/;
X  next if /^\s*$/;
X
X  ++$test;
X  $bad = 0;
X
X  if (/^eval\s+/)
X  {
X    ($try = $_) =~ s/^eval\s+//;
X
X    eval ($try);
X    if ($@)
X    {
X      $bad++;
X      print "not ok $test\n";
X      print "# eval '$try' returned $@";
X    }
X  }
X  elsif (/^\(/)
X  {
X    ($in, $out) = split (':');
X
X    $try = "\@expect = $out; \@got = &soundex $in;";
X    eval ($try);
X
X    if (@expect != @got)
X    {
X      $bad++;
X      print "not ok $test\n";
X      print "# expected ", scalar @expect, " results, got ", scalar @got, "\n";
X      print "# expected (", join (', ', @expect),
X	    ") got (", join (', ', @got), ")\n";
X    }
X    else
X    {
X      while (@got)
X      {
X	$expect = shift @expect;
X	$got = shift @got;
X
X	if ($expect ne $got)
X	{
X	  $bad++;
X	  print "not ok $test\n";
X	  print "# expected $expect, got $got\n";
X	}
X      }
X    }
X  }
X  else
X  {
X    ($in, $out) = split (':');
X
X    $try = "\$expect = $out; \$got = &soundex ($in);";
X    eval ($try);
X
X    if ($expect ne $got)
X    {
X      $bad++;
X      print "not ok $test\n";
X      print "# expected $expect, got $got\n";
X    }
X  }
X
X  print "ok $test\n" unless $bad;
}
X
__END__
#
# 1..6
#
# Knuth's test cases, scalar in, scalar out
#
'Euler':'E460'
'Gauss':'G200'
'Hilbert':'H416'
'Knuth':'K530'
'Lloyd':'L300'
'Lukasiewicz':'L222'
#
# 7..8
#
# check default bad code
#
'2 + 2 = 4':undef
undef:undef
#
# 9
#
# check array in, array out
#
('Ellery', 'Ghosh', 'Heilbronn', 'Kant', 'Ladd', 'Lissajous'):('E460', 'G200', 'H416', 'K530', 'L300', 'L222')
#
# 10
#
# check array with explicit undef
#
('Mike', undef, 'Stok'):('M200', undef, 'S320')
#
# 11..12
#
# check setting $soundex'noCode
#
eval $soundex'noCode = 'Z000';
('Mike', undef, 'Stok'):('M200', 'Z000', 'S320')
#
# 13
#
# a subtle difference between me & oracle, spotted by Rich Pinder
# <rpinder@hsc.usc.edu>
#
CZARKOWSKA:C622
SHAR_EOF
chmod 0555 soundex.t ||
echo 'restore of soundex.t failed'
Wc_c="`wc -c < 'soundex.t'`"
test 2408 -eq "$Wc_c" ||
	echo 'soundex.t: original size 2408, current size' "$Wc_c"
fi
exit 0

-- 
The "usual disclaimers" apply.    | Meiko
Mike Stok                         | 130C Baker Ave. Ext
Mike.Stok@meiko.concord.ma.us     | Concord, MA 01742
Meiko tel: (508) 371 0088         | 

Article 11044 of comp.lang.perl:
Path: feenix.metronet.com!news.utdallas.edu!convex!cs.utexas.edu!howland.reston.ans.net!pipex!pipex!not-for-mail
From: ian@pipex.net (Ian Phillipps)
Newsgroups: comp.lang.perl
Subject: Soundex Function for Perl ??
Supersedes: <2kjgpj$di5@tank.pipex.net>
Date: 27 Feb 1994 21:24:14 -0000
Organization: PIPEX Ltd, Cambridge, UK.
Lines: 38
Message-ID: <2kr31u$7e4@tank.pipex.net>
References: <2kg9r1$j5d@hsc.usc.edu>
NNTP-Posting-Host: tank.pipex.net

In article <2kg9r1$j5d@hsc.usc.edu>, Rich Pinder <rpinder@hsc.usc.edu> wrote:
>There is a 'standard' function to assist in name matching algorighims
>called  'Soundex'.  
>
>I believe its in the public domain, and I also am sure the specs of the
>function are available.....but.....

Since it's post-your-soundex time, here's mine. No disclaimers. If it
doesn't work at your site, I'll come out free of charge and fix it. Just
send me the air tickets :-)

Ian
----
[This article supersedes an earlier posting of mine - which failed on
'Lloyd'. Here's a replacement. Thanks to Mike Stok...]

# This implements the "Soundex" algorithm.
# Each argument is treated as a single word, and is reduced to its
# Soundex four character (Z999) code. The resulting code or array of
# codes is passed back as a the result.
# ian@unipalm.co.uk 11 Sep 92

sub soundex
{
    local( @res ) = @_;
    local($i, $t, $_);
    for ( @res )
    {
	tr/a-zA-Z//cd;
	tr/a-zA-Z/A-ZA-Z/s;
	($i,$t) = /(.)(.*)/;
	$t =~ tr/BFPVCGJKQSXZDTLMNRAEHIOUWY/111122222222334556/sd;
	$_ = substr(($i||'Z').$t.'000', 0, 4 );
    }
    wantarray ? @res : $res[0];
}

1;


Article 10948 of comp.lang.perl:
Path: feenix.metronet.com!news.utdallas.edu!convex!cs.utexas.edu!howland.reston.ans.net!europa.eng.gtefsd.com!news.uoregon.edu!gaia.ucs.orst.edu!ruby.oce.orst.edu!tardis.co.uk!bill
From: bill@tardis.co.uk (William Hails)
Newsgroups: comp.lang.perl
Subject: Re: Soundex Function for Perl ??
Date: Thu, 24 Feb 94 11:24:04 GMT
Organization: University Computing Services - Oregon State University
Lines: 57
Message-ID: <18531.9402241124@devone.tardis.co.uk>
NNTP-Posting-Host: ruby.oce.orst.edu
Originator: root@ruby.oce.orst.edu


on 23 Feb 1994 11:12:33 -0800 rpinder@hsc.usc.edu (Rich Pinder) wrote:
rpinder> There is a 'standard' function to assist in name matching algorighims
rpinder> called  'Soundex'.  

rpinder> I believe its in the public domain, and I also am sure the specs of the
rpinder> function are available.....but.....

rpinder> I'd be interested in knowing if there is any such function already \
compiled
rpinder> that would be accessible from Perl.

Heres one I knocked up a while back, usual disclaimers etc.

8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---
# soundex subroutine

sub soundex {
package soundex;

    local ($name) = @_;
    local($buf, $c, $pc, $idx) = ("Z000");

    $name =~ tr/a-z/A-Z/;
    for ($pc = '0'; $name && $idx < 4; substr($name, 0, 1) = '') {
	if (($c) = ($name =~ /^([A-Z])/)) {
	    if ($idx == 0 || ($map{$c} ne '0' && $map{$c} ne $pc)) {
		substr($buf, $idx, 1) = $idx ? $map{$c} : $c;
		++$idx;
	    }
	    $pc = $map{$c};
	}
    }

    $buf;
}

package soundex;

%map = (
    A, 0,  B, 1,  C, 2,  D, 3,  E, 0,  F, 1,  G, 2,  H, 0,  I, 0,
    J, 2,  K, 2,  L, 4,  M, 5,  N, 5,  O, 1,  P, 0,  Q, 2,  R, 6,
    S, 2,  T, 3,  U, 0,  V, 1,  W, 0,  X, 2,  Y, 0,  Z, 2
);

1;

8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---
-- 

Cheers
        Bill

Bill Hails <bill@tardis.co.uk>
Tel (UK) 0483 300 200