soundex.c 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. /*
  2. +----------------------------------------------------------------------+
  3. | Copyright (c) The PHP Group |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | https://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Author: Bjørn Borud - Guardian Networks AS <borud@guardian.no> |
  14. +----------------------------------------------------------------------+
  15. */
  16. #include "php.h"
  17. #include <stdlib.h>
  18. #include <errno.h>
  19. #include <ctype.h>
  20. #include "php_string.h"
  21. /* Simple soundex algorithm as described by Knuth in TAOCP, vol 3 */
  22. /* {{{ Calculate the soundex key of a string */
  23. PHP_FUNCTION(soundex)
  24. {
  25. char *str;
  26. size_t i, _small, str_len, code, last;
  27. char soundex[4 + 1];
  28. static const char soundex_table[26] =
  29. {0, /* A */
  30. '1', /* B */
  31. '2', /* C */
  32. '3', /* D */
  33. 0, /* E */
  34. '1', /* F */
  35. '2', /* G */
  36. 0, /* H */
  37. 0, /* I */
  38. '2', /* J */
  39. '2', /* K */
  40. '4', /* L */
  41. '5', /* M */
  42. '5', /* N */
  43. 0, /* O */
  44. '1', /* P */
  45. '2', /* Q */
  46. '6', /* R */
  47. '2', /* S */
  48. '3', /* T */
  49. 0, /* U */
  50. '1', /* V */
  51. 0, /* W */
  52. '2', /* X */
  53. 0, /* Y */
  54. '2'}; /* Z */
  55. ZEND_PARSE_PARAMETERS_START(1, 1)
  56. Z_PARAM_STRING(str, str_len)
  57. ZEND_PARSE_PARAMETERS_END();
  58. /* build soundex string */
  59. last = -1;
  60. for (i = 0, _small = 0; i < str_len && _small < 4; i++) {
  61. /* convert chars to upper case and strip non-letter chars */
  62. /* BUG: should also map here accented letters used in non */
  63. /* English words or names (also found in English text!): */
  64. /* esstsett, thorn, n-tilde, c-cedilla, s-caron, ... */
  65. code = toupper((int)(unsigned char)str[i]);
  66. if (code >= 'A' && code <= 'Z') {
  67. if (_small == 0) {
  68. /* remember first valid char */
  69. soundex[_small++] = (char)code;
  70. last = soundex_table[code - 'A'];
  71. }
  72. else {
  73. /* ignore sequences of consonants with same soundex */
  74. /* code in trail, and vowels unless they separate */
  75. /* consonant letters */
  76. code = soundex_table[code - 'A'];
  77. if (code != last) {
  78. if (code != 0) {
  79. soundex[_small++] = (char)code;
  80. }
  81. last = code;
  82. }
  83. }
  84. }
  85. }
  86. /* pad with '0' and terminate with 0 ;-) */
  87. while (_small < 4) {
  88. soundex[_small++] = '0';
  89. }
  90. soundex[_small] = '\0';
  91. RETURN_STRINGL(soundex, _small);
  92. }
  93. /* }}} */