soundex.c 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2016 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Bjørn Borud - Guardian Networks AS <borud@guardian.no> |
  16. +----------------------------------------------------------------------+
  17. */
  18. /* $Id$ */
  19. #include "php.h"
  20. #include <stdlib.h>
  21. #include <errno.h>
  22. #include <ctype.h>
  23. #include "php_string.h"
  24. /* Simple soundex algorithm as described by Knuth in TAOCP, vol 3 */
  25. /* {{{ proto string soundex(string str)
  26. Calculate the soundex key of a string */
  27. PHP_FUNCTION(soundex)
  28. {
  29. char *str;
  30. int i, _small, str_len, code, last;
  31. char soundex[4 + 1];
  32. static char soundex_table[26] =
  33. {0, /* A */
  34. '1', /* B */
  35. '2', /* C */
  36. '3', /* D */
  37. 0, /* E */
  38. '1', /* F */
  39. '2', /* G */
  40. 0, /* H */
  41. 0, /* I */
  42. '2', /* J */
  43. '2', /* K */
  44. '4', /* L */
  45. '5', /* M */
  46. '5', /* N */
  47. 0, /* O */
  48. '1', /* P */
  49. '2', /* Q */
  50. '6', /* R */
  51. '2', /* S */
  52. '3', /* T */
  53. 0, /* U */
  54. '1', /* V */
  55. 0, /* W */
  56. '2', /* X */
  57. 0, /* Y */
  58. '2'}; /* Z */
  59. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &str, &str_len) == FAILURE) {
  60. return;
  61. }
  62. if (str_len == 0) {
  63. RETURN_FALSE;
  64. }
  65. /* build soundex string */
  66. last = -1;
  67. for (i = 0, _small = 0; i < str_len && _small < 4; i++) {
  68. /* convert chars to upper case and strip non-letter chars */
  69. /* BUG: should also map here accented letters used in non */
  70. /* English words or names (also found in English text!): */
  71. /* esstsett, thorn, n-tilde, c-cedilla, s-caron, ... */
  72. code = toupper((int)(unsigned char)str[i]);
  73. if (code >= 'A' && code <= 'Z') {
  74. if (_small == 0) {
  75. /* remember first valid char */
  76. soundex[_small++] = code;
  77. last = soundex_table[code - 'A'];
  78. }
  79. else {
  80. /* ignore sequences of consonants with same soundex */
  81. /* code in trail, and vowels unless they separate */
  82. /* consonant letters */
  83. code = soundex_table[code - 'A'];
  84. if (code != last) {
  85. if (code != 0) {
  86. soundex[_small++] = code;
  87. }
  88. last = code;
  89. }
  90. }
  91. }
  92. }
  93. /* pad with '0' and terminate with 0 ;-) */
  94. while (_small < 4) {
  95. soundex[_small++] = '0';
  96. }
  97. soundex[_small] = '\0';
  98. RETURN_STRINGL(soundex, _small, 1);
  99. }
  100. /* }}} */
  101. /*
  102. * Local variables:
  103. * tab-width: 4
  104. * c-basic-offset: 4
  105. * End:
  106. * vim600: sw=4 ts=4 fdm=marker
  107. * vim<600: sw=4 ts=4
  108. */