mf_soundex.c 3.25 KB
Newer Older
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1 2 3 4 5 6 7 8
/* Copyright (C) 2000 MySQL AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
9
   but WITHOUT ANY WARRANTY; without even the implied warranty of
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
10 11 12 13 14 15
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

/****************************************************************
*	SOUNDEX ALGORITHM in C					*
*								*
*	The basic Algorithm source is taken from EDN Nov.	*
*	14, 1985 pg. 36.					*
*								*
*	As a test Those in Illinois will find that the		*
*	first group of numbers in their drivers license		*
*	number is the soundex number for their last name.	*
*								*
*	RHW  PC-IBBS ID. #1230					*
*								*
*	As an extension if remove_garbage is set then all non-	*
*	alpha characters are skipped				*
serg@serg.mylan's avatar
serg@serg.mylan committed
31 32 33 34
*                                                               *
*       Note, that this implementation corresponds to the       *
*       original version of the algorithm, not to the more      *
*       popular "enhanced" version, described by Knuth.         *
bk@work.mysql.com's avatar
bk@work.mysql.com committed
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
****************************************************************/

#include "mysys_priv.h"
#include <m_ctype.h>
#include "my_static.h"

static char get_scode(char **ptr,pbool remove_garbage);

		/* outputed string is 4 byte long */
		/* out_pntr can be == in_pntr */

void soundex(register my_string out_pntr, my_string in_pntr,
	     pbool remove_garbage)
{
  char ch,last_ch;
  reg3 my_string end;

  if (remove_garbage)
  {
serg@serg.mylan's avatar
serg@serg.mylan committed
54
    while (*in_pntr && !isalpha(*in_pntr))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
      in_pntr++;
  }
  *out_pntr++ = toupper(*in_pntr);	/* Copy first letter		 */
  last_ch = get_scode(&in_pntr,0);	/* code of the first letter	 */
					/* for the first 'double-letter  */
					/* check.			 */
  end=out_pntr+3;			/* Loop on input letters until	 */
					/* end of input (null) or output */
					/* letter code count = 3	 */

  in_pntr++;
  while (out_pntr < end && (ch = get_scode(&in_pntr,remove_garbage)) != 0)
  {
    in_pntr++;
    if ((ch != '0') && (ch != last_ch)) /* if not skipped or double */
    {
      *out_pntr++ = ch;			/* letter, copy to output */
    }					/* for next double-letter check */
    last_ch = ch;			/* save code of last input letter */
  }
  while (out_pntr < end)
    *out_pntr++ = '0';
  *out_pntr=0;				/* end string */
  return;
} /* soundex */


  /*
    If alpha, map input letter to soundex code.
    If not alpha and remove_garbage is set then skipp to next char
    else return 0
    */

static char get_scode(char **ptr, pbool remove_garbage)
{
  uchar ch;

  if (remove_garbage)
  {
    while (**ptr && !isalpha(**ptr))
      (*ptr)++;
  }
  ch=toupper(**ptr);
  if (ch < 'A' || ch > 'Z')
  {
serg@serg.mylan's avatar
serg@serg.mylan committed
100
    if (isalpha(ch))			/* If extended alpha (country spec) */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
101 102 103 104 105
      return '0';			/* threat as vokal */
    return 0;				/* Can't map */
  }
  return(soundex_map[ch-'A']);
} /* get_scode */