soundex.c 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 7 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2018 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Bjørn Borud - Guardian Networks AS <borud@guardian.no> |
  16. +----------------------------------------------------------------------+
  17. */
  18. #include "php.h"
  19. #include <stdlib.h>
  20. #include <errno.h>
  21. #include <ctype.h>
  22. #include "php_string.h"
  23. /* Simple soundex algorithm as described by Knuth in TAOCP, vol 3 */
  24. /* {{{ proto string soundex(string str)
  25. Calculate the soundex key of a string */
  26. PHP_FUNCTION(soundex)
  27. {
  28. char *str;
  29. size_t i, _small, str_len, code, last;
  30. char soundex[4 + 1];
  31. static char soundex_table[26] =
  32. {0, /* A */
  33. '1', /* B */
  34. '2', /* C */
  35. '3', /* D */
  36. 0, /* E */
  37. '1', /* F */
  38. '2', /* G */
  39. 0, /* H */
  40. 0, /* I */
  41. '2', /* J */
  42. '2', /* K */
  43. '4', /* L */
  44. '5', /* M */
  45. '5', /* N */
  46. 0, /* O */
  47. '1', /* P */
  48. '2', /* Q */
  49. '6', /* R */
  50. '2', /* S */
  51. '3', /* T */
  52. 0, /* U */
  53. '1', /* V */
  54. 0, /* W */
  55. '2', /* X */
  56. 0, /* Y */
  57. '2'}; /* Z */
  58. ZEND_PARSE_PARAMETERS_START(1, 1)
  59. Z_PARAM_STRING(str, str_len)
  60. ZEND_PARSE_PARAMETERS_END();
  61. if (str_len == 0) {
  62. RETURN_FALSE;
  63. }
  64. /* build soundex string */
  65. last = -1;
  66. for (i = 0, _small = 0; i < str_len && _small < 4; i++) {
  67. /* convert chars to upper case and strip non-letter chars */
  68. /* BUG: should also map here accented letters used in non */
  69. /* English words or names (also found in English text!): */
  70. /* esstsett, thorn, n-tilde, c-cedilla, s-caron, ... */
  71. code = toupper((int)(unsigned char)str[i]);
  72. if (code >= 'A' && code <= 'Z') {
  73. if (_small == 0) {
  74. /* remember first valid char */
  75. soundex[_small++] = (char)code;
  76. last = soundex_table[code - 'A'];
  77. }
  78. else {
  79. /* ignore sequences of consonants with same soundex */
  80. /* code in trail, and vowels unless they separate */
  81. /* consonant letters */
  82. code = soundex_table[code - 'A'];
  83. if (code != last) {
  84. if (code != 0) {
  85. soundex[_small++] = (char)code;
  86. }
  87. last = code;
  88. }
  89. }
  90. }
  91. }
  92. /* pad with '0' and terminate with 0 ;-) */
  93. while (_small < 4) {
  94. soundex[_small++] = '0';
  95. }
  96. soundex[_small] = '\0';
  97. RETURN_STRINGL(soundex, _small);
  98. }
  99. /* }}} */
  100. /*
  101. * Local variables:
  102. * tab-width: 4
  103. * c-basic-offset: 4
  104. * End:
  105. * vim600: sw=4 ts=4 fdm=marker
  106. * vim<600: sw=4 ts=4
  107. */