levenshtein.c 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2016 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Hartmut Holzgraefe <hholzgra@php.net> |
  16. +----------------------------------------------------------------------+
  17. */
  18. /* $Id$ */
  19. #include "php.h"
  20. #include <stdlib.h>
  21. #include <errno.h>
  22. #include <ctype.h>
  23. #include "php_string.h"
  24. #define LEVENSHTEIN_MAX_LENGTH 255
  25. /* {{{ reference_levdist
  26. * reference implementation, only optimized for memory usage, not speed */
  27. static int reference_levdist(const char *s1, int l1, const char *s2, int l2, int cost_ins, int cost_rep, int cost_del )
  28. {
  29. int *p1, *p2, *tmp;
  30. int i1, i2, c0, c1, c2;
  31. if (l1 == 0) {
  32. return l2 * cost_ins;
  33. }
  34. if (l2 == 0) {
  35. return l1 * cost_del;
  36. }
  37. if ((l1 > LEVENSHTEIN_MAX_LENGTH) || (l2 > LEVENSHTEIN_MAX_LENGTH)) {
  38. return -1;
  39. }
  40. p1 = safe_emalloc((l2 + 1), sizeof(int), 0);
  41. p2 = safe_emalloc((l2 + 1), sizeof(int), 0);
  42. for (i2 = 0; i2 <= l2; i2++) {
  43. p1[i2] = i2 * cost_ins;
  44. }
  45. for (i1 = 0; i1 < l1 ; i1++) {
  46. p2[0] = p1[0] + cost_del;
  47. for (i2 = 0; i2 < l2; i2++) {
  48. c0 = p1[i2] + ((s1[i1] == s2[i2]) ? 0 : cost_rep);
  49. c1 = p1[i2 + 1] + cost_del;
  50. if (c1 < c0) {
  51. c0 = c1;
  52. }
  53. c2 = p2[i2] + cost_ins;
  54. if (c2 < c0) {
  55. c0 = c2;
  56. }
  57. p2[i2 + 1] = c0;
  58. }
  59. tmp = p1;
  60. p1 = p2;
  61. p2 = tmp;
  62. }
  63. c0 = p1[l2];
  64. efree(p1);
  65. efree(p2);
  66. return c0;
  67. }
  68. /* }}} */
  69. /* {{{ custom_levdist
  70. */
  71. static int custom_levdist(char *str1, char *str2, char *callback_name TSRMLS_DC)
  72. {
  73. php_error_docref(NULL TSRMLS_CC, E_WARNING, "The general Levenshtein support is not there yet");
  74. /* not there yet */
  75. return -1;
  76. }
  77. /* }}} */
  78. /* {{{ proto int levenshtein(string str1, string str2[, int cost_ins, int cost_rep, int cost_del])
  79. Calculate Levenshtein distance between two strings */
  80. PHP_FUNCTION(levenshtein)
  81. {
  82. int argc = ZEND_NUM_ARGS();
  83. char *str1, *str2;
  84. char *callback_name;
  85. int str1_len, str2_len, callback_len;
  86. long cost_ins, cost_rep, cost_del;
  87. int distance = -1;
  88. switch (argc) {
  89. case 2: /* just two strings: use maximum performance version */
  90. if (zend_parse_parameters(2 TSRMLS_CC, "ss", &str1, &str1_len, &str2, &str2_len) == FAILURE) {
  91. return;
  92. }
  93. distance = reference_levdist(str1, str1_len, str2, str2_len, 1, 1, 1);
  94. break;
  95. case 5: /* more general version: calc cost by ins/rep/del weights */
  96. if (zend_parse_parameters(5 TSRMLS_CC, "sslll", &str1, &str1_len, &str2, &str2_len, &cost_ins, &cost_rep, &cost_del) == FAILURE) {
  97. return;
  98. }
  99. distance = reference_levdist(str1, str1_len, str2, str2_len, cost_ins, cost_rep, cost_del);
  100. break;
  101. case 3: /* most general version: calc cost by user-supplied function */
  102. if (zend_parse_parameters(3 TSRMLS_CC, "sss", &str1, &str1_len, &str2, &str2_len, &callback_name, &callback_len) == FAILURE) {
  103. return;
  104. }
  105. distance = custom_levdist(str1, str2, callback_name TSRMLS_CC);
  106. break;
  107. default:
  108. WRONG_PARAM_COUNT;
  109. }
  110. if (distance < 0 && /* TODO */ ZEND_NUM_ARGS() != 3) {
  111. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Argument string(s) too long");
  112. }
  113. RETURN_LONG(distance);
  114. }
  115. /* }}} */
  116. /*
  117. * Local variables:
  118. * tab-width: 4
  119. * c-basic-offset: 4
  120. * End:
  121. * vim600: sw=4 ts=4 fdm=marker
  122. * vim<600: sw=4 ts=4
  123. */