ereg.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2016 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Authors: Rasmus Lerdorf <rasmus@php.net> |
  16. | Jim Winstead <jimw@php.net> |
  17. | Jaakko Hyvätti <jaakko@hyvatti.iki.fi> |
  18. +----------------------------------------------------------------------+
  19. */
  20. /* $Id$ */
  21. #include <stdio.h>
  22. #include <ctype.h>
  23. #include "php.h"
  24. #include "ext/standard/php_string.h"
  25. #include "php_ereg.h"
  26. #include "ext/standard/info.h"
  27. /* {{{ arginfo */
  28. ZEND_BEGIN_ARG_INFO_EX(arginfo_ereg, 0, 0, 2)
  29. ZEND_ARG_INFO(0, pattern)
  30. ZEND_ARG_INFO(0, string)
  31. ZEND_ARG_INFO(1, registers) /* ARRAY_INFO(1, registers, 1) */
  32. ZEND_END_ARG_INFO()
  33. ZEND_BEGIN_ARG_INFO(arginfo_ereg_replace, 0)
  34. ZEND_ARG_INFO(0, pattern)
  35. ZEND_ARG_INFO(0, replacement)
  36. ZEND_ARG_INFO(0, string)
  37. ZEND_END_ARG_INFO()
  38. ZEND_BEGIN_ARG_INFO_EX(arginfo_split, 0, 0, 2)
  39. ZEND_ARG_INFO(0, pattern)
  40. ZEND_ARG_INFO(0, string)
  41. ZEND_ARG_INFO(0, limit)
  42. ZEND_END_ARG_INFO()
  43. ZEND_BEGIN_ARG_INFO(arginfo_sql_regcase, 0)
  44. ZEND_ARG_INFO(0, string)
  45. ZEND_END_ARG_INFO()
  46. /* }}} */
  47. /* {{{ Function table */
  48. const zend_function_entry ereg_functions[] = {
  49. PHP_DEP_FE(ereg, arginfo_ereg)
  50. PHP_DEP_FE(ereg_replace, arginfo_ereg_replace)
  51. PHP_DEP_FE(eregi, arginfo_ereg)
  52. PHP_DEP_FE(eregi_replace, arginfo_ereg_replace)
  53. PHP_DEP_FE(split, arginfo_split)
  54. PHP_DEP_FE(spliti, arginfo_split)
  55. PHP_DEP_FE(sql_regcase, arginfo_sql_regcase)
  56. PHP_FE_END
  57. };
  58. /* }}} */
  59. /* {{{ reg_cache */
  60. typedef struct {
  61. regex_t preg;
  62. int cflags;
  63. unsigned long lastuse;
  64. } reg_cache;
  65. static int reg_magic = 0;
  66. #define EREG_CACHE_SIZE 4096
  67. /* }}} */
  68. ZEND_DECLARE_MODULE_GLOBALS(ereg)
  69. static PHP_GINIT_FUNCTION(ereg);
  70. static PHP_GSHUTDOWN_FUNCTION(ereg);
  71. /* {{{ Module entry */
  72. zend_module_entry ereg_module_entry = {
  73. STANDARD_MODULE_HEADER,
  74. "ereg",
  75. ereg_functions,
  76. NULL,
  77. NULL,
  78. NULL,
  79. NULL,
  80. PHP_MINFO(ereg),
  81. NO_VERSION_YET,
  82. PHP_MODULE_GLOBALS(ereg),
  83. PHP_GINIT(ereg),
  84. PHP_GSHUTDOWN(ereg),
  85. NULL,
  86. STANDARD_MODULE_PROPERTIES_EX
  87. };
  88. /* }}} */
  89. /* {{{ COMPILE_DL_EREG */
  90. #ifdef COMPILE_DL_EREG
  91. ZEND_GET_MODULE(ereg)
  92. #endif
  93. /* }}} */
  94. /* {{{ ereg_lru_cmp */
  95. static int ereg_lru_cmp(const void *a, const void *b TSRMLS_DC)
  96. {
  97. Bucket *f = *((Bucket **) a);
  98. Bucket *s = *((Bucket **) b);
  99. if (((reg_cache *)f->pData)->lastuse <
  100. ((reg_cache *)s->pData)->lastuse) {
  101. return -1;
  102. } else if (((reg_cache *)f->pData)->lastuse ==
  103. ((reg_cache *)s->pData)->lastuse) {
  104. return 0;
  105. } else {
  106. return 1;
  107. }
  108. }
  109. /* }}} */
  110. /* {{{ static ereg_clean_cache */
  111. static int ereg_clean_cache(void *data, void *arg TSRMLS_DC)
  112. {
  113. int *num_clean = (int *)arg;
  114. if (*num_clean > 0) {
  115. (*num_clean)--;
  116. return ZEND_HASH_APPLY_REMOVE;
  117. } else {
  118. return ZEND_HASH_APPLY_STOP;
  119. }
  120. }
  121. /* }}} */
  122. /* {{{ _php_regcomp
  123. */
  124. static int _php_regcomp(regex_t *preg, const char *pattern, int cflags TSRMLS_DC)
  125. {
  126. int r = 0;
  127. int patlen = strlen(pattern);
  128. reg_cache *rc = NULL;
  129. if (zend_hash_num_elements(&EREG(ht_rc)) >= EREG_CACHE_SIZE) {
  130. /* easier than dealing with overflow as it happens */
  131. if (EREG(lru_counter) >= (1 << 31) || zend_hash_sort(&EREG(ht_rc), zend_qsort, ereg_lru_cmp, 0 TSRMLS_CC) == FAILURE) {
  132. zend_hash_clean(&EREG(ht_rc));
  133. EREG(lru_counter) = 0;
  134. } else {
  135. int num_clean = EREG_CACHE_SIZE / 4;
  136. zend_hash_apply_with_argument(&EREG(ht_rc), ereg_clean_cache, &num_clean TSRMLS_CC);
  137. }
  138. }
  139. if(zend_hash_find(&EREG(ht_rc), (char *) pattern, patlen+1, (void **) &rc) == SUCCESS
  140. && rc->cflags == cflags) {
  141. #ifdef HAVE_REGEX_T_RE_MAGIC
  142. /*
  143. * We use a saved magic number to see whether cache is corrupted, and if it
  144. * is, we flush it and compile the pattern from scratch.
  145. */
  146. if (rc->preg.re_magic != reg_magic) {
  147. zend_hash_clean(&EREG(ht_rc));
  148. EREG(lru_counter) = 0;
  149. } else {
  150. memcpy(preg, &rc->preg, sizeof(*preg));
  151. return r;
  152. }
  153. }
  154. r = regcomp(preg, pattern, cflags);
  155. if(!r) {
  156. reg_cache rcp;
  157. rcp.cflags = cflags;
  158. rcp.lastuse = ++(EREG(lru_counter));
  159. memcpy(&rcp.preg, preg, sizeof(*preg));
  160. /*
  161. * Since we don't have access to the actual MAGIC1 definition in the private
  162. * header file, we save the magic value immediately after compilation. Hopefully,
  163. * it's good.
  164. */
  165. if (!reg_magic) reg_magic = preg->re_magic;
  166. zend_hash_update(&EREG(ht_rc), (char *) pattern, patlen+1,
  167. (void *) &rcp, sizeof(rcp), NULL);
  168. }
  169. #else
  170. memcpy(preg, &rc->preg, sizeof(*preg));
  171. } else {
  172. r = regcomp(preg, pattern, cflags);
  173. if(!r) {
  174. reg_cache rcp;
  175. rcp.cflags = cflags;
  176. rcp.lastuse = ++(EREG(lru_counter));
  177. memcpy(&rcp.preg, preg, sizeof(*preg));
  178. zend_hash_update(&EREG(ht_rc), (char *) pattern, patlen+1,
  179. (void *) &rcp, sizeof(rcp), NULL);
  180. }
  181. }
  182. #endif
  183. return r;
  184. }
  185. /* }}} */
  186. static void _free_ereg_cache(reg_cache *rc)
  187. {
  188. regfree(&rc->preg);
  189. }
  190. #undef regfree
  191. #define regfree(a);
  192. #undef regcomp
  193. #define regcomp(a, b, c) _php_regcomp(a, b, c TSRMLS_CC)
  194. /* {{{ PHP_GINIT_FUNCTION
  195. */
  196. static PHP_GINIT_FUNCTION(ereg)
  197. {
  198. zend_hash_init(&ereg_globals->ht_rc, 0, NULL, (void (*)(void *)) _free_ereg_cache, 1);
  199. ereg_globals->lru_counter = 0;
  200. }
  201. /* }}} */
  202. /* {{{ PHP_GSHUTDOWN_FUNCTION
  203. */
  204. static PHP_GSHUTDOWN_FUNCTION(ereg)
  205. {
  206. zend_hash_destroy(&ereg_globals->ht_rc);
  207. }
  208. /* }}} */
  209. PHP_MINFO_FUNCTION(ereg)
  210. {
  211. php_info_print_table_start();
  212. #if HSREGEX
  213. php_info_print_table_row(2, "Regex Library", "Bundled library enabled");
  214. #else
  215. php_info_print_table_row(2, "Regex Library", "System library enabled");
  216. #endif
  217. php_info_print_table_end();
  218. }
  219. /* {{{ php_ereg_eprint
  220. * php_ereg_eprint - convert error number to name
  221. */
  222. static void php_ereg_eprint(int err, regex_t *re TSRMLS_DC) {
  223. char *buf = NULL, *message = NULL;
  224. size_t len;
  225. size_t buf_len;
  226. #ifdef REG_ITOA
  227. /* get the length of the message */
  228. buf_len = regerror(REG_ITOA | err, re, NULL, 0);
  229. if (buf_len) {
  230. buf = (char *)safe_emalloc(buf_len, sizeof(char), 0);
  231. if (!buf) return; /* fail silently */
  232. /* finally, get the error message */
  233. regerror(REG_ITOA | err, re, buf, buf_len);
  234. }
  235. #else
  236. buf_len = 0;
  237. #endif
  238. len = regerror(err, re, NULL, 0);
  239. if (len) {
  240. message = (char *)safe_emalloc((buf_len + len + 2), sizeof(char), 0);
  241. if (!message) {
  242. return; /* fail silently */
  243. }
  244. if (buf_len) {
  245. snprintf(message, buf_len, "%s: ", buf);
  246. buf_len += 1; /* so pointer math below works */
  247. }
  248. /* drop the message into place */
  249. regerror(err, re, message + buf_len, len);
  250. php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", message);
  251. }
  252. STR_FREE(buf);
  253. STR_FREE(message);
  254. }
  255. /* }}} */
  256. /* {{{ php_ereg
  257. */
  258. static void php_ereg(INTERNAL_FUNCTION_PARAMETERS, int icase)
  259. {
  260. zval **regex, /* Regular expression */
  261. **array = NULL; /* Optional register array */
  262. char *findin; /* String to apply expression to */
  263. int findin_len;
  264. regex_t re;
  265. regmatch_t *subs;
  266. int err, match_len, string_len;
  267. uint i;
  268. int copts = 0;
  269. off_t start, end;
  270. char *buf = NULL;
  271. char *string = NULL;
  272. int argc = ZEND_NUM_ARGS();
  273. if (zend_parse_parameters(argc TSRMLS_CC, "Zs|Z", &regex, &findin, &findin_len, &array) == FAILURE) {
  274. return;
  275. }
  276. if (icase) {
  277. copts |= REG_ICASE;
  278. }
  279. if (argc == 2) {
  280. copts |= REG_NOSUB;
  281. }
  282. /* compile the regular expression from the supplied regex */
  283. if (Z_TYPE_PP(regex) == IS_STRING) {
  284. err = regcomp(&re, Z_STRVAL_PP(regex), REG_EXTENDED | copts);
  285. } else {
  286. /* we convert numbers to integers and treat them as a string */
  287. if (Z_TYPE_PP(regex) == IS_DOUBLE) {
  288. convert_to_long_ex(regex); /* get rid of decimal places */
  289. }
  290. convert_to_string_ex(regex);
  291. /* don't bother doing an extended regex with just a number */
  292. err = regcomp(&re, Z_STRVAL_PP(regex), copts);
  293. }
  294. if (err) {
  295. php_ereg_eprint(err, &re TSRMLS_CC);
  296. RETURN_FALSE;
  297. }
  298. /* make a copy of the string we're looking in */
  299. string = estrndup(findin, findin_len);
  300. /* allocate storage for (sub-)expression-matches */
  301. subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);
  302. /* actually execute the regular expression */
  303. err = regexec(&re, string, re.re_nsub+1, subs, 0);
  304. if (err && err != REG_NOMATCH) {
  305. php_ereg_eprint(err, &re TSRMLS_CC);
  306. regfree(&re);
  307. efree(subs);
  308. RETURN_FALSE;
  309. }
  310. match_len = 1;
  311. if (array && err != REG_NOMATCH) {
  312. match_len = (int) (subs[0].rm_eo - subs[0].rm_so);
  313. string_len = findin_len + 1;
  314. buf = emalloc(string_len);
  315. zval_dtor(*array); /* start with clean array */
  316. array_init(*array);
  317. for (i = 0; i <= re.re_nsub; i++) {
  318. start = subs[i].rm_so;
  319. end = subs[i].rm_eo;
  320. if (start != -1 && end > 0 && start < string_len && end < string_len && start < end) {
  321. add_index_stringl(*array, i, string+start, end-start, 1);
  322. } else {
  323. add_index_bool(*array, i, 0);
  324. }
  325. }
  326. efree(buf);
  327. }
  328. efree(subs);
  329. efree(string);
  330. if (err == REG_NOMATCH) {
  331. RETVAL_FALSE;
  332. } else {
  333. if (match_len == 0)
  334. match_len = 1;
  335. RETVAL_LONG(match_len);
  336. }
  337. regfree(&re);
  338. }
  339. /* }}} */
  340. /* {{{ proto int ereg(string pattern, string string [, array registers])
  341. Regular expression match */
  342. PHP_FUNCTION(ereg)
  343. {
  344. php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  345. }
  346. /* }}} */
  347. /* {{{ proto int eregi(string pattern, string string [, array registers])
  348. Case-insensitive regular expression match */
  349. PHP_FUNCTION(eregi)
  350. {
  351. php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  352. }
  353. /* }}} */
  354. /* {{{ php_ereg_replace
  355. * this is the meat and potatoes of regex replacement! */
  356. PHP_EREG_API char *php_ereg_replace(const char *pattern, const char *replace, const char *string, int icase, int extended TSRMLS_DC)
  357. {
  358. regex_t re;
  359. regmatch_t *subs;
  360. char *buf, /* buf is where we build the replaced string */
  361. *nbuf, /* nbuf is used when we grow the buffer */
  362. *walkbuf; /* used to walk buf when replacing backrefs */
  363. const char *walk; /* used to walk replacement string for backrefs */
  364. size_t buf_len, new_l;
  365. int pos, tmp, string_len;
  366. int err, copts = 0;
  367. string_len = strlen(string);
  368. if (icase) {
  369. copts = REG_ICASE;
  370. }
  371. if (extended) {
  372. copts |= REG_EXTENDED;
  373. }
  374. err = regcomp(&re, pattern, copts);
  375. if (err) {
  376. php_ereg_eprint(err, &re TSRMLS_CC);
  377. return ((char *) -1);
  378. }
  379. /* allocate storage for (sub-)expression-matches */
  380. subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);
  381. /* start with a buffer that is twice the size of the stringo
  382. we're doing replacements in */
  383. buf = safe_emalloc(string_len, 2, 1);
  384. buf_len = 2 * string_len + 1;
  385. err = pos = 0;
  386. buf[0] = '\0';
  387. while (!err) {
  388. err = regexec(&re, &string[pos], re.re_nsub+1, subs, (pos ? REG_NOTBOL : 0));
  389. if (err && err != REG_NOMATCH) {
  390. php_ereg_eprint(err, &re TSRMLS_CC);
  391. efree(subs);
  392. efree(buf);
  393. regfree(&re);
  394. return ((char *) -1);
  395. }
  396. if (!err) {
  397. /* backref replacement is done in two passes:
  398. 1) find out how long the string will be, and allocate buf
  399. 2) copy the part before match, replacement and backrefs to buf
  400. Jaakko Hyvätti <Jaakko.Hyvatti@iki.fi>
  401. */
  402. new_l = strlen(buf) + subs[0].rm_so; /* part before the match */
  403. walk = replace;
  404. while (*walk) {
  405. if ('\\' == *walk && isdigit((unsigned char)walk[1]) && ((unsigned char)walk[1]) - '0' <= (int)re.re_nsub) {
  406. if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1) {
  407. new_l += subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
  408. }
  409. walk += 2;
  410. } else {
  411. new_l++;
  412. walk++;
  413. }
  414. }
  415. if (new_l + 1 > buf_len) {
  416. nbuf = safe_emalloc(new_l + 1, 2, buf_len);
  417. buf_len = 1 + buf_len + 2 * new_l;
  418. strncpy(nbuf, buf, buf_len - 1);
  419. nbuf[buf_len - 1] = '\0';
  420. efree(buf);
  421. buf = nbuf;
  422. }
  423. tmp = strlen(buf);
  424. /* copy the part of the string before the match */
  425. strncat(buf, &string[pos], subs[0].rm_so);
  426. /* copy replacement and backrefs */
  427. walkbuf = &buf[tmp + subs[0].rm_so];
  428. walk = replace;
  429. while (*walk) {
  430. if ('\\' == *walk && isdigit((unsigned char)walk[1]) && (unsigned char)walk[1] - '0' <= (int)re.re_nsub) {
  431. if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1
  432. /* this next case shouldn't happen. it does. */
  433. && subs[walk[1] - '0'].rm_so <= subs[walk[1] - '0'].rm_eo) {
  434. tmp = subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
  435. memcpy (walkbuf, &string[pos + subs[walk[1] - '0'].rm_so], tmp);
  436. walkbuf += tmp;
  437. }
  438. walk += 2;
  439. } else {
  440. *walkbuf++ = *walk++;
  441. }
  442. }
  443. *walkbuf = '\0';
  444. /* and get ready to keep looking for replacements */
  445. if (subs[0].rm_so == subs[0].rm_eo) {
  446. if (subs[0].rm_so + pos >= string_len) {
  447. break;
  448. }
  449. new_l = strlen (buf) + 1;
  450. if (new_l + 1 > buf_len) {
  451. nbuf = safe_emalloc(new_l + 1, 2, buf_len);
  452. buf_len = 1 + buf_len + 2 * new_l;
  453. strncpy(nbuf, buf, buf_len-1);
  454. efree(buf);
  455. buf = nbuf;
  456. }
  457. pos += subs[0].rm_eo + 1;
  458. buf [new_l-1] = string [pos-1];
  459. buf [new_l] = '\0';
  460. } else {
  461. pos += subs[0].rm_eo;
  462. }
  463. } else { /* REG_NOMATCH */
  464. new_l = strlen(buf) + strlen(&string[pos]);
  465. if (new_l + 1 > buf_len) {
  466. buf_len = new_l + 1; /* now we know exactly how long it is */
  467. nbuf = safe_emalloc(new_l, 1, 1);
  468. strncpy(nbuf, buf, buf_len-1);
  469. efree(buf);
  470. buf = nbuf;
  471. }
  472. /* stick that last bit of string on our output */
  473. strlcat(buf, &string[pos], buf_len);
  474. }
  475. }
  476. /* don't want to leak memory .. */
  477. efree(subs);
  478. regfree(&re);
  479. /* whew. */
  480. return (buf);
  481. }
  482. /* }}} */
  483. /* {{{ php_do_ereg_replace
  484. */
  485. static void php_do_ereg_replace(INTERNAL_FUNCTION_PARAMETERS, int icase)
  486. {
  487. zval **arg_pattern,
  488. **arg_replace;
  489. char *pattern, *arg_string;
  490. char *string;
  491. char *replace;
  492. char *ret;
  493. int arg_string_len;
  494. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZs", &arg_pattern, &arg_replace, &arg_string, &arg_string_len) == FAILURE) {
  495. return;
  496. }
  497. if (Z_TYPE_PP(arg_pattern) == IS_STRING) {
  498. if (Z_STRVAL_PP(arg_pattern) && Z_STRLEN_PP(arg_pattern)) {
  499. pattern = estrndup(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern));
  500. } else {
  501. pattern = STR_EMPTY_ALLOC();
  502. }
  503. } else {
  504. convert_to_long_ex(arg_pattern);
  505. pattern = emalloc(2);
  506. pattern[0] = (char) Z_LVAL_PP(arg_pattern);
  507. pattern[1] = '\0';
  508. }
  509. if (Z_TYPE_PP(arg_replace) == IS_STRING) {
  510. if (Z_STRVAL_PP(arg_replace) && Z_STRLEN_PP(arg_replace)) {
  511. replace = estrndup(Z_STRVAL_PP(arg_replace), Z_STRLEN_PP(arg_replace));
  512. } else {
  513. replace = STR_EMPTY_ALLOC();
  514. }
  515. } else {
  516. convert_to_long_ex(arg_replace);
  517. replace = emalloc(2);
  518. replace[0] = (char) Z_LVAL_PP(arg_replace);
  519. replace[1] = '\0';
  520. }
  521. if (arg_string && arg_string_len) {
  522. string = estrndup(arg_string, arg_string_len);
  523. } else {
  524. string = STR_EMPTY_ALLOC();
  525. }
  526. /* do the actual work */
  527. ret = php_ereg_replace(pattern, replace, string, icase, 1 TSRMLS_CC);
  528. if (ret == (char *) -1) {
  529. RETVAL_FALSE;
  530. } else {
  531. RETVAL_STRINGL_CHECK(ret, strlen(ret), 1);
  532. STR_FREE(ret);
  533. }
  534. STR_FREE(string);
  535. STR_FREE(replace);
  536. STR_FREE(pattern);
  537. }
  538. /* }}} */
  539. /* {{{ proto string ereg_replace(string pattern, string replacement, string string)
  540. Replace regular expression */
  541. PHP_FUNCTION(ereg_replace)
  542. {
  543. php_do_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  544. }
  545. /* }}} */
  546. /* {{{ proto string eregi_replace(string pattern, string replacement, string string)
  547. Case insensitive replace regular expression */
  548. PHP_FUNCTION(eregi_replace)
  549. {
  550. php_do_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  551. }
  552. /* }}} */
  553. /* {{{ php_split
  554. */
  555. static void php_split(INTERNAL_FUNCTION_PARAMETERS, int icase)
  556. {
  557. long count = -1;
  558. regex_t re;
  559. regmatch_t subs[1];
  560. char *spliton, *str, *strp, *endp;
  561. int spliton_len, str_len;
  562. int err, size, copts = 0;
  563. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &spliton, &spliton_len, &str, &str_len, &count) == FAILURE) {
  564. return;
  565. }
  566. if (icase) {
  567. copts = REG_ICASE;
  568. }
  569. strp = str;
  570. endp = strp + str_len;
  571. err = regcomp(&re, spliton, REG_EXTENDED | copts);
  572. if (err) {
  573. php_ereg_eprint(err, &re TSRMLS_CC);
  574. RETURN_FALSE;
  575. }
  576. array_init(return_value);
  577. /* churn through str, generating array entries as we go */
  578. while ((count == -1 || count > 1) && !(err = regexec(&re, strp, 1, subs, 0))) {
  579. if (subs[0].rm_so == 0 && subs[0].rm_eo) {
  580. /* match is at start of string, return empty string */
  581. add_next_index_stringl(return_value, "", 0, 1);
  582. /* skip ahead the length of the regex match */
  583. strp += subs[0].rm_eo;
  584. } else if (subs[0].rm_so == 0 && subs[0].rm_eo == 0) {
  585. /* No more matches */
  586. regfree(&re);
  587. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid Regular Expression");
  588. zend_hash_destroy(Z_ARRVAL_P(return_value));
  589. efree(Z_ARRVAL_P(return_value));
  590. RETURN_FALSE;
  591. } else {
  592. /* On a real match */
  593. /* make a copy of the substring */
  594. size = subs[0].rm_so;
  595. /* add it to the array */
  596. add_next_index_stringl(return_value, strp, size, 1);
  597. /* point at our new starting point */
  598. strp = strp + subs[0].rm_eo;
  599. }
  600. /* if we're only looking for a certain number of points,
  601. stop looking once we hit it */
  602. if (count != -1) {
  603. count--;
  604. }
  605. }
  606. /* see if we encountered an error */
  607. if (err && err != REG_NOMATCH) {
  608. php_ereg_eprint(err, &re TSRMLS_CC);
  609. regfree(&re);
  610. zend_hash_destroy(Z_ARRVAL_P(return_value));
  611. efree(Z_ARRVAL_P(return_value));
  612. RETURN_FALSE;
  613. }
  614. /* otherwise we just have one last element to add to the array */
  615. size = endp - strp;
  616. add_next_index_stringl(return_value, strp, size, 1);
  617. regfree(&re);
  618. }
  619. /* }}} */
  620. /* {{{ proto array split(string pattern, string string [, int limit])
  621. Split string into array by regular expression */
  622. PHP_FUNCTION(split)
  623. {
  624. php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  625. }
  626. /* }}} */
  627. /* {{{ proto array spliti(string pattern, string string [, int limit])
  628. Split string into array by regular expression case-insensitive */
  629. PHP_FUNCTION(spliti)
  630. {
  631. php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  632. }
  633. /* }}} */
  634. /* {{{ proto string sql_regcase(string string)
  635. Make regular expression for case insensitive match */
  636. PHP_EREG_API PHP_FUNCTION(sql_regcase)
  637. {
  638. char *string, *tmp;
  639. int string_len;
  640. unsigned char c;
  641. register int i, j;
  642. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &string, &string_len) == FAILURE) {
  643. return;
  644. }
  645. tmp = safe_emalloc(string_len, 4, 1);
  646. for (i = j = 0; i < string_len; i++) {
  647. c = (unsigned char) string[i];
  648. if ( j >= INT_MAX - 1 || (isalpha(c) && j >= INT_MAX - 4)) {
  649. php_error_docref(NULL TSRMLS_CC, E_WARNING, "String too long, max length is %d", INT_MAX);
  650. efree(tmp);
  651. RETURN_FALSE;
  652. }
  653. if (isalpha(c)) {
  654. tmp[j++] = '[';
  655. tmp[j++] = toupper(c);
  656. tmp[j++] = tolower(c);
  657. tmp[j++] = ']';
  658. } else {
  659. tmp[j++] = c;
  660. }
  661. }
  662. tmp[j] = 0;
  663. RETVAL_STRINGL(tmp, j, 1);
  664. efree(tmp);
  665. }
  666. /* }}} */
  667. /*
  668. * Local variables:
  669. * tab-width: 4
  670. * c-basic-offset: 4
  671. * End:
  672. * vim600: noet sw=4 ts=4 fdm=marker
  673. * vim<600: noet sw=4 ts=4
  674. */