string.c 153 KB


  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 7 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2018 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Authors: Rasmus Lerdorf <rasmus@php.net> |
  16. | Stig Sæther Bakken <ssb@php.net> |
  17. | Zeev Suraski <zeev@php.net> |
  18. +----------------------------------------------------------------------+
  19. */
  20. #include <stdio.h>
  21. #include "php.h"
  22. #include "php_rand.h"
  23. #include "php_string.h"
  24. #include "php_variables.h"
  25. #ifdef HAVE_LOCALE_H
  26. # include <locale.h>
  27. #endif
  28. #ifdef HAVE_LANGINFO_H
  29. # include <langinfo.h>
  30. #endif
  31. #ifdef HAVE_MONETARY_H
  32. # include <monetary.h>
  33. #endif
  34. /*
  35. * This define is here because some versions of libintl redefine setlocale
  36. * to point to libintl_setlocale. That's a ridiculous thing to do as far
  37. * as I am concerned, but with this define and the subsequent undef we
  38. * limit the damage to just the actual setlocale() call in this file
  39. * without turning zif_setlocale into zif_libintl_setlocale. -Rasmus
  40. */
  41. #define php_my_setlocale setlocale
  42. #ifdef HAVE_LIBINTL
  43. # include <libintl.h> /* For LC_MESSAGES */
  44. #ifdef setlocale
  45. # undef setlocale
  46. #endif
  47. #endif
  48. #include "scanf.h"
  49. #include "zend_API.h"
  50. #include "zend_execute.h"
  51. #include "php_globals.h"
  52. #include "basic_functions.h"
  53. #include "zend_smart_str.h"
  54. #include <Zend/zend_exceptions.h>
  55. #ifdef ZTS
  56. #include "TSRM.h"
  57. #endif
  58. /* For str_getcsv() support */
  59. #include "ext/standard/file.h"
  60. /* For php_next_utf8_char() */
  61. #include "ext/standard/html.h"
  62. #define STR_PAD_LEFT 0
  63. #define STR_PAD_RIGHT 1
  64. #define STR_PAD_BOTH 2
  65. #define PHP_PATHINFO_DIRNAME 1
  66. #define PHP_PATHINFO_BASENAME 2
  67. #define PHP_PATHINFO_EXTENSION 4
  68. #define PHP_PATHINFO_FILENAME 8
  69. #define PHP_PATHINFO_ALL (PHP_PATHINFO_DIRNAME | PHP_PATHINFO_BASENAME | PHP_PATHINFO_EXTENSION | PHP_PATHINFO_FILENAME)
  70. #define STR_STRSPN 0
  71. #define STR_STRCSPN 1
  72. /* {{{ register_string_constants
  73. */
  74. void register_string_constants(INIT_FUNC_ARGS)
  75. {
  76. REGISTER_LONG_CONSTANT("STR_PAD_LEFT", STR_PAD_LEFT, CONST_CS | CONST_PERSISTENT);
  77. REGISTER_LONG_CONSTANT("STR_PAD_RIGHT", STR_PAD_RIGHT, CONST_CS | CONST_PERSISTENT);
  78. REGISTER_LONG_CONSTANT("STR_PAD_BOTH", STR_PAD_BOTH, CONST_CS | CONST_PERSISTENT);
  79. REGISTER_LONG_CONSTANT("PATHINFO_DIRNAME", PHP_PATHINFO_DIRNAME, CONST_CS | CONST_PERSISTENT);
  80. REGISTER_LONG_CONSTANT("PATHINFO_BASENAME", PHP_PATHINFO_BASENAME, CONST_CS | CONST_PERSISTENT);
  81. REGISTER_LONG_CONSTANT("PATHINFO_EXTENSION", PHP_PATHINFO_EXTENSION, CONST_CS | CONST_PERSISTENT);
  82. REGISTER_LONG_CONSTANT("PATHINFO_FILENAME", PHP_PATHINFO_FILENAME, CONST_CS | CONST_PERSISTENT);
  83. #ifdef HAVE_LOCALECONV
  84. /* If last members of struct lconv equal CHAR_MAX, no grouping is done */
  85. /* This is bad, but since we are going to be hardcoding in the POSIX stuff anyway... */
  86. # ifndef HAVE_LIMITS_H
  87. # define CHAR_MAX 127
  88. # endif
  89. REGISTER_LONG_CONSTANT("CHAR_MAX", CHAR_MAX, CONST_CS | CONST_PERSISTENT);
  90. #endif
  91. #ifdef HAVE_LOCALE_H
  92. REGISTER_LONG_CONSTANT("LC_CTYPE", LC_CTYPE, CONST_CS | CONST_PERSISTENT);
  93. REGISTER_LONG_CONSTANT("LC_NUMERIC", LC_NUMERIC, CONST_CS | CONST_PERSISTENT);
  94. REGISTER_LONG_CONSTANT("LC_TIME", LC_TIME, CONST_CS | CONST_PERSISTENT);
  95. REGISTER_LONG_CONSTANT("LC_COLLATE", LC_COLLATE, CONST_CS | CONST_PERSISTENT);
  96. REGISTER_LONG_CONSTANT("LC_MONETARY", LC_MONETARY, CONST_CS | CONST_PERSISTENT);
  97. REGISTER_LONG_CONSTANT("LC_ALL", LC_ALL, CONST_CS | CONST_PERSISTENT);
  98. # ifdef LC_MESSAGES
  99. REGISTER_LONG_CONSTANT("LC_MESSAGES", LC_MESSAGES, CONST_CS | CONST_PERSISTENT);
  100. # endif
  101. #endif
  102. }
  103. /* }}} */
  104. int php_tag_find(char *tag, size_t len, const char *set);
  105. /* this is read-only, so it's ok */
  106. ZEND_SET_ALIGNED(16, static char hexconvtab[]) = "0123456789abcdef";
  107. /* localeconv mutex */
  108. #ifdef ZTS
  109. static MUTEX_T locale_mutex = NULL;
  110. #endif
  111. /* {{{ php_bin2hex
  112. */
  113. static zend_string *php_bin2hex(const unsigned char *old, const size_t oldlen)
  114. {
  115. zend_string *result;
  116. size_t i, j;
  117. result = zend_string_safe_alloc(oldlen, 2 * sizeof(char), 0, 0);
  118. for (i = j = 0; i < oldlen; i++) {
  119. ZSTR_VAL(result)[j++] = hexconvtab[old[i] >> 4];
  120. ZSTR_VAL(result)[j++] = hexconvtab[old[i] & 15];
  121. }
  122. ZSTR_VAL(result)[j] = '\0';
  123. return result;
  124. }
  125. /* }}} */
  126. /* {{{ php_hex2bin
  127. */
  128. static zend_string *php_hex2bin(const unsigned char *old, const size_t oldlen)
  129. {
  130. size_t target_length = oldlen >> 1;
  131. zend_string *str = zend_string_alloc(target_length, 0);
  132. unsigned char *ret = (unsigned char *)ZSTR_VAL(str);
  133. size_t i, j;
  134. for (i = j = 0; i < target_length; i++) {
  135. unsigned char c = old[j++];
  136. unsigned char l = c & ~0x20;
  137. int is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
  138. unsigned char d;
  139. /* basically (c >= '0' && c <= '9') || (l >= 'A' && l <= 'F') */
  140. if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
  141. d = (l - 0x10 - 0x27 * is_letter) << 4;
  142. } else {
  143. zend_string_efree(str);
  144. return NULL;
  145. }
  146. c = old[j++];
  147. l = c & ~0x20;
  148. is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
  149. if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
  150. d |= l - 0x10 - 0x27 * is_letter;
  151. } else {
  152. zend_string_efree(str);
  153. return NULL;
  154. }
  155. ret[i] = d;
  156. }
  157. ret[i] = '\0';
  158. return str;
  159. }
  160. /* }}} */
  161. #ifdef HAVE_LOCALECONV
  162. /* {{{ localeconv_r
  163. * glibc's localeconv is not reentrant, so lets make it so ... sorta */
  164. PHPAPI struct lconv *localeconv_r(struct lconv *out)
  165. {
  166. # ifdef ZTS
  167. tsrm_mutex_lock( locale_mutex );
  168. # endif
  169. /* cur->locinfo is struct __crt_locale_info which implementation is
  170. hidden in vc14. TODO revisit this and check if a workaround available
  171. and needed. */
  172. #if defined(PHP_WIN32) && _MSC_VER < 1900 && defined(ZTS)
  173. {
  174. /* Even with the enabled per thread locale, localeconv
  175. won't check any locale change in the master thread. */
  176. _locale_t cur = _get_current_locale();
  177. *out = *cur->locinfo->lconv;
  178. _free_locale(cur);
  179. }
  180. #else
  181. /* localeconv doesn't return an error condition */
  182. *out = *localeconv();
  183. #endif
  184. # ifdef ZTS
  185. tsrm_mutex_unlock( locale_mutex );
  186. # endif
  187. return out;
  188. }
  189. /* }}} */
  190. # ifdef ZTS
  191. /* {{{ PHP_MINIT_FUNCTION
  192. */
  193. PHP_MINIT_FUNCTION(localeconv)
  194. {
  195. locale_mutex = tsrm_mutex_alloc();
  196. return SUCCESS;
  197. }
  198. /* }}} */
  199. /* {{{ PHP_MSHUTDOWN_FUNCTION
  200. */
  201. PHP_MSHUTDOWN_FUNCTION(localeconv)
  202. {
  203. tsrm_mutex_free( locale_mutex );
  204. locale_mutex = NULL;
  205. return SUCCESS;
  206. }
  207. /* }}} */
  208. # endif
  209. #endif
  210. /* {{{ proto string bin2hex(string data)
  211. Converts the binary representation of data to hex */
  212. PHP_FUNCTION(bin2hex)
  213. {
  214. zend_string *result;
  215. zend_string *data;
  216. ZEND_PARSE_PARAMETERS_START(1, 1)
  217. Z_PARAM_STR(data)
  218. ZEND_PARSE_PARAMETERS_END();
  219. result = php_bin2hex((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
  220. if (!result) {
  221. RETURN_FALSE;
  222. }
  223. RETURN_STR(result);
  224. }
  225. /* }}} */
  226. /* {{{ proto string hex2bin(string data)
  227. Converts the hex representation of data to binary */
  228. PHP_FUNCTION(hex2bin)
  229. {
  230. zend_string *result, *data;
  231. ZEND_PARSE_PARAMETERS_START(1, 1)
  232. Z_PARAM_STR(data)
  233. ZEND_PARSE_PARAMETERS_END();
  234. if (ZSTR_LEN(data) % 2 != 0) {
  235. php_error_docref(NULL, E_WARNING, "Hexadecimal input string must have an even length");
  236. RETURN_FALSE;
  237. }
  238. result = php_hex2bin((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
  239. if (!result) {
  240. php_error_docref(NULL, E_WARNING, "Input string must be hexadecimal string");
  241. RETURN_FALSE;
  242. }
  243. RETVAL_STR(result);
  244. }
  245. /* }}} */
  246. static void php_spn_common_handler(INTERNAL_FUNCTION_PARAMETERS, int behavior) /* {{{ */
  247. {
  248. zend_string *s11, *s22;
  249. zend_long start = 0, len = 0;
  250. ZEND_PARSE_PARAMETERS_START(2, 4)
  251. Z_PARAM_STR(s11)
  252. Z_PARAM_STR(s22)
  253. Z_PARAM_OPTIONAL
  254. Z_PARAM_LONG(start)
  255. Z_PARAM_LONG(len)
  256. ZEND_PARSE_PARAMETERS_END();
  257. if (ZEND_NUM_ARGS() < 4) {
  258. len = ZSTR_LEN(s11);
  259. }
  260. /* look at substr() function for more information */
  261. if (start < 0) {
  262. start += (zend_long)ZSTR_LEN(s11);
  263. if (start < 0) {
  264. start = 0;
  265. }
  266. } else if ((size_t)start > ZSTR_LEN(s11)) {
  267. RETURN_FALSE;
  268. }
  269. if (len < 0) {
  270. len += (ZSTR_LEN(s11) - start);
  271. if (len < 0) {
  272. len = 0;
  273. }
  274. }
  275. if (len > (zend_long)ZSTR_LEN(s11) - start) {
  276. len = ZSTR_LEN(s11) - start;
  277. }
  278. if(len == 0) {
  279. RETURN_LONG(0);
  280. }
  281. if (behavior == STR_STRSPN) {
  282. RETURN_LONG(php_strspn(ZSTR_VAL(s11) + start /*str1_start*/,
  283. ZSTR_VAL(s22) /*str2_start*/,
  284. ZSTR_VAL(s11) + start + len /*str1_end*/,
  285. ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
  286. } else if (behavior == STR_STRCSPN) {
  287. RETURN_LONG(php_strcspn(ZSTR_VAL(s11) + start /*str1_start*/,
  288. ZSTR_VAL(s22) /*str2_start*/,
  289. ZSTR_VAL(s11) + start + len /*str1_end*/,
  290. ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
  291. }
  292. }
  293. /* }}} */
  294. /* {{{ proto int strspn(string str, string mask [, int start [, int len]])
  295. Finds length of initial segment consisting entirely of characters found in mask. If start or/and length is provided works like strspn(substr($s,$start,$len),$good_chars) */
  296. PHP_FUNCTION(strspn)
  297. {
  298. php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, STR_STRSPN);
  299. }
  300. /* }}} */
  301. /* {{{ proto int strcspn(string str, string mask [, int start [, int len]])
  302. Finds length of initial segment consisting entirely of characters not found in mask. If start or/and length is provide works like strcspn(substr($s,$start,$len),$bad_chars) */
  303. PHP_FUNCTION(strcspn)
  304. {
  305. php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, STR_STRCSPN);
  306. }
  307. /* }}} */
  308. /* {{{ PHP_MINIT_FUNCTION(nl_langinfo) */
  309. #if HAVE_NL_LANGINFO
  310. PHP_MINIT_FUNCTION(nl_langinfo)
  311. {
  312. #define REGISTER_NL_LANGINFO_CONSTANT(x) REGISTER_LONG_CONSTANT(#x, x, CONST_CS | CONST_PERSISTENT)
  313. #ifdef ABDAY_1
  314. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_1);
  315. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_2);
  316. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_3);
  317. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_4);
  318. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_5);
  319. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_6);
  320. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_7);
  321. #endif
  322. #ifdef DAY_1
  323. REGISTER_NL_LANGINFO_CONSTANT(DAY_1);
  324. REGISTER_NL_LANGINFO_CONSTANT(DAY_2);
  325. REGISTER_NL_LANGINFO_CONSTANT(DAY_3);
  326. REGISTER_NL_LANGINFO_CONSTANT(DAY_4);
  327. REGISTER_NL_LANGINFO_CONSTANT(DAY_5);
  328. REGISTER_NL_LANGINFO_CONSTANT(DAY_6);
  329. REGISTER_NL_LANGINFO_CONSTANT(DAY_7);
  330. #endif
  331. #ifdef ABMON_1
  332. REGISTER_NL_LANGINFO_CONSTANT(ABMON_1);
  333. REGISTER_NL_LANGINFO_CONSTANT(ABMON_2);
  334. REGISTER_NL_LANGINFO_CONSTANT(ABMON_3);
  335. REGISTER_NL_LANGINFO_CONSTANT(ABMON_4);
  336. REGISTER_NL_LANGINFO_CONSTANT(ABMON_5);
  337. REGISTER_NL_LANGINFO_CONSTANT(ABMON_6);
  338. REGISTER_NL_LANGINFO_CONSTANT(ABMON_7);
  339. REGISTER_NL_LANGINFO_CONSTANT(ABMON_8);
  340. REGISTER_NL_LANGINFO_CONSTANT(ABMON_9);
  341. REGISTER_NL_LANGINFO_CONSTANT(ABMON_10);
  342. REGISTER_NL_LANGINFO_CONSTANT(ABMON_11);
  343. REGISTER_NL_LANGINFO_CONSTANT(ABMON_12);
  344. #endif
  345. #ifdef MON_1
  346. REGISTER_NL_LANGINFO_CONSTANT(MON_1);
  347. REGISTER_NL_LANGINFO_CONSTANT(MON_2);
  348. REGISTER_NL_LANGINFO_CONSTANT(MON_3);
  349. REGISTER_NL_LANGINFO_CONSTANT(MON_4);
  350. REGISTER_NL_LANGINFO_CONSTANT(MON_5);
  351. REGISTER_NL_LANGINFO_CONSTANT(MON_6);
  352. REGISTER_NL_LANGINFO_CONSTANT(MON_7);
  353. REGISTER_NL_LANGINFO_CONSTANT(MON_8);
  354. REGISTER_NL_LANGINFO_CONSTANT(MON_9);
  355. REGISTER_NL_LANGINFO_CONSTANT(MON_10);
  356. REGISTER_NL_LANGINFO_CONSTANT(MON_11);
  357. REGISTER_NL_LANGINFO_CONSTANT(MON_12);
  358. #endif
  359. #ifdef AM_STR
  360. REGISTER_NL_LANGINFO_CONSTANT(AM_STR);
  361. #endif
  362. #ifdef PM_STR
  363. REGISTER_NL_LANGINFO_CONSTANT(PM_STR);
  364. #endif
  365. #ifdef D_T_FMT
  366. REGISTER_NL_LANGINFO_CONSTANT(D_T_FMT);
  367. #endif
  368. #ifdef D_FMT
  369. REGISTER_NL_LANGINFO_CONSTANT(D_FMT);
  370. #endif
  371. #ifdef T_FMT
  372. REGISTER_NL_LANGINFO_CONSTANT(T_FMT);
  373. #endif
  374. #ifdef T_FMT_AMPM
  375. REGISTER_NL_LANGINFO_CONSTANT(T_FMT_AMPM);
  376. #endif
  377. #ifdef ERA
  378. REGISTER_NL_LANGINFO_CONSTANT(ERA);
  379. #endif
  380. #ifdef ERA_YEAR
  381. REGISTER_NL_LANGINFO_CONSTANT(ERA_YEAR);
  382. #endif
  383. #ifdef ERA_D_T_FMT
  384. REGISTER_NL_LANGINFO_CONSTANT(ERA_D_T_FMT);
  385. #endif
  386. #ifdef ERA_D_FMT
  387. REGISTER_NL_LANGINFO_CONSTANT(ERA_D_FMT);
  388. #endif
  389. #ifdef ERA_T_FMT
  390. REGISTER_NL_LANGINFO_CONSTANT(ERA_T_FMT);
  391. #endif
  392. #ifdef ALT_DIGITS
  393. REGISTER_NL_LANGINFO_CONSTANT(ALT_DIGITS);
  394. #endif
  395. #ifdef INT_CURR_SYMBOL
  396. REGISTER_NL_LANGINFO_CONSTANT(INT_CURR_SYMBOL);
  397. #endif
  398. #ifdef CURRENCY_SYMBOL
  399. REGISTER_NL_LANGINFO_CONSTANT(CURRENCY_SYMBOL);
  400. #endif
  401. #ifdef CRNCYSTR
  402. REGISTER_NL_LANGINFO_CONSTANT(CRNCYSTR);
  403. #endif
  404. #ifdef MON_DECIMAL_POINT
  405. REGISTER_NL_LANGINFO_CONSTANT(MON_DECIMAL_POINT);
  406. #endif
  407. #ifdef MON_THOUSANDS_SEP
  408. REGISTER_NL_LANGINFO_CONSTANT(MON_THOUSANDS_SEP);
  409. #endif
  410. #ifdef MON_GROUPING
  411. REGISTER_NL_LANGINFO_CONSTANT(MON_GROUPING);
  412. #endif
  413. #ifdef POSITIVE_SIGN
  414. REGISTER_NL_LANGINFO_CONSTANT(POSITIVE_SIGN);
  415. #endif
  416. #ifdef NEGATIVE_SIGN
  417. REGISTER_NL_LANGINFO_CONSTANT(NEGATIVE_SIGN);
  418. #endif
  419. #ifdef INT_FRAC_DIGITS
  420. REGISTER_NL_LANGINFO_CONSTANT(INT_FRAC_DIGITS);
  421. #endif
  422. #ifdef FRAC_DIGITS
  423. REGISTER_NL_LANGINFO_CONSTANT(FRAC_DIGITS);
  424. #endif
  425. #ifdef P_CS_PRECEDES
  426. REGISTER_NL_LANGINFO_CONSTANT(P_CS_PRECEDES);
  427. #endif
  428. #ifdef P_SEP_BY_SPACE
  429. REGISTER_NL_LANGINFO_CONSTANT(P_SEP_BY_SPACE);
  430. #endif
  431. #ifdef N_CS_PRECEDES
  432. REGISTER_NL_LANGINFO_CONSTANT(N_CS_PRECEDES);
  433. #endif
  434. #ifdef N_SEP_BY_SPACE
  435. REGISTER_NL_LANGINFO_CONSTANT(N_SEP_BY_SPACE);
  436. #endif
  437. #ifdef P_SIGN_POSN
  438. REGISTER_NL_LANGINFO_CONSTANT(P_SIGN_POSN);
  439. #endif
  440. #ifdef N_SIGN_POSN
  441. REGISTER_NL_LANGINFO_CONSTANT(N_SIGN_POSN);
  442. #endif
  443. #ifdef DECIMAL_POINT
  444. REGISTER_NL_LANGINFO_CONSTANT(DECIMAL_POINT);
  445. #endif
  446. #ifdef RADIXCHAR
  447. REGISTER_NL_LANGINFO_CONSTANT(RADIXCHAR);
  448. #endif
  449. #ifdef THOUSANDS_SEP
  450. REGISTER_NL_LANGINFO_CONSTANT(THOUSANDS_SEP);
  451. #endif
  452. #ifdef THOUSEP
  453. REGISTER_NL_LANGINFO_CONSTANT(THOUSEP);
  454. #endif
  455. #ifdef GROUPING
  456. REGISTER_NL_LANGINFO_CONSTANT(GROUPING);
  457. #endif
  458. #ifdef YESEXPR
  459. REGISTER_NL_LANGINFO_CONSTANT(YESEXPR);
  460. #endif
  461. #ifdef NOEXPR
  462. REGISTER_NL_LANGINFO_CONSTANT(NOEXPR);
  463. #endif
  464. #ifdef YESSTR
  465. REGISTER_NL_LANGINFO_CONSTANT(YESSTR);
  466. #endif
  467. #ifdef NOSTR
  468. REGISTER_NL_LANGINFO_CONSTANT(NOSTR);
  469. #endif
  470. #ifdef CODESET
  471. REGISTER_NL_LANGINFO_CONSTANT(CODESET);
  472. #endif
  473. #undef REGISTER_NL_LANGINFO_CONSTANT
  474. return SUCCESS;
  475. }
  476. /* }}} */
  477. /* {{{ proto string nl_langinfo(int item)
  478. Query language and locale information */
  479. PHP_FUNCTION(nl_langinfo)
  480. {
  481. zend_long item;
  482. char *value;
  483. ZEND_PARSE_PARAMETERS_START(1, 1)
  484. Z_PARAM_LONG(item)
  485. ZEND_PARSE_PARAMETERS_END();
  486. switch(item) { /* {{{ */
  487. #ifdef ABDAY_1
  488. case ABDAY_1:
  489. case ABDAY_2:
  490. case ABDAY_3:
  491. case ABDAY_4:
  492. case ABDAY_5:
  493. case ABDAY_6:
  494. case ABDAY_7:
  495. #endif
  496. #ifdef DAY_1
  497. case DAY_1:
  498. case DAY_2:
  499. case DAY_3:
  500. case DAY_4:
  501. case DAY_5:
  502. case DAY_6:
  503. case DAY_7:
  504. #endif
  505. #ifdef ABMON_1
  506. case ABMON_1:
  507. case ABMON_2:
  508. case ABMON_3:
  509. case ABMON_4:
  510. case ABMON_5:
  511. case ABMON_6:
  512. case ABMON_7:
  513. case ABMON_8:
  514. case ABMON_9:
  515. case ABMON_10:
  516. case ABMON_11:
  517. case ABMON_12:
  518. #endif
  519. #ifdef MON_1
  520. case MON_1:
  521. case MON_2:
  522. case MON_3:
  523. case MON_4:
  524. case MON_5:
  525. case MON_6:
  526. case MON_7:
  527. case MON_8:
  528. case MON_9:
  529. case MON_10:
  530. case MON_11:
  531. case MON_12:
  532. #endif
  533. #ifdef AM_STR
  534. case AM_STR:
  535. #endif
  536. #ifdef PM_STR
  537. case PM_STR:
  538. #endif
  539. #ifdef D_T_FMT
  540. case D_T_FMT:
  541. #endif
  542. #ifdef D_FMT
  543. case D_FMT:
  544. #endif
  545. #ifdef T_FMT
  546. case T_FMT:
  547. #endif
  548. #ifdef T_FMT_AMPM
  549. case T_FMT_AMPM:
  550. #endif
  551. #ifdef ERA
  552. case ERA:
  553. #endif
  554. #ifdef ERA_YEAR
  555. case ERA_YEAR:
  556. #endif
  557. #ifdef ERA_D_T_FMT
  558. case ERA_D_T_FMT:
  559. #endif
  560. #ifdef ERA_D_FMT
  561. case ERA_D_FMT:
  562. #endif
  563. #ifdef ERA_T_FMT
  564. case ERA_T_FMT:
  565. #endif
  566. #ifdef ALT_DIGITS
  567. case ALT_DIGITS:
  568. #endif
  569. #ifdef INT_CURR_SYMBOL
  570. case INT_CURR_SYMBOL:
  571. #endif
  572. #ifdef CURRENCY_SYMBOL
  573. case CURRENCY_SYMBOL:
  574. #endif
  575. #ifdef CRNCYSTR
  576. case CRNCYSTR:
  577. #endif
  578. #ifdef MON_DECIMAL_POINT
  579. case MON_DECIMAL_POINT:
  580. #endif
  581. #ifdef MON_THOUSANDS_SEP
  582. case MON_THOUSANDS_SEP:
  583. #endif
  584. #ifdef MON_GROUPING
  585. case MON_GROUPING:
  586. #endif
  587. #ifdef POSITIVE_SIGN
  588. case POSITIVE_SIGN:
  589. #endif
  590. #ifdef NEGATIVE_SIGN
  591. case NEGATIVE_SIGN:
  592. #endif
  593. #ifdef INT_FRAC_DIGITS
  594. case INT_FRAC_DIGITS:
  595. #endif
  596. #ifdef FRAC_DIGITS
  597. case FRAC_DIGITS:
  598. #endif
  599. #ifdef P_CS_PRECEDES
  600. case P_CS_PRECEDES:
  601. #endif
  602. #ifdef P_SEP_BY_SPACE
  603. case P_SEP_BY_SPACE:
  604. #endif
  605. #ifdef N_CS_PRECEDES
  606. case N_CS_PRECEDES:
  607. #endif
  608. #ifdef N_SEP_BY_SPACE
  609. case N_SEP_BY_SPACE:
  610. #endif
  611. #ifdef P_SIGN_POSN
  612. case P_SIGN_POSN:
  613. #endif
  614. #ifdef N_SIGN_POSN
  615. case N_SIGN_POSN:
  616. #endif
  617. #ifdef DECIMAL_POINT
  618. case DECIMAL_POINT:
  619. #elif defined(RADIXCHAR)
  620. case RADIXCHAR:
  621. #endif
  622. #ifdef THOUSANDS_SEP
  623. case THOUSANDS_SEP:
  624. #elif defined(THOUSEP)
  625. case THOUSEP:
  626. #endif
  627. #ifdef GROUPING
  628. case GROUPING:
  629. #endif
  630. #ifdef YESEXPR
  631. case YESEXPR:
  632. #endif
  633. #ifdef NOEXPR
  634. case NOEXPR:
  635. #endif
  636. #ifdef YESSTR
  637. case YESSTR:
  638. #endif
  639. #ifdef NOSTR
  640. case NOSTR:
  641. #endif
  642. #ifdef CODESET
  643. case CODESET:
  644. #endif
  645. break;
  646. default:
  647. php_error_docref(NULL, E_WARNING, "Item '" ZEND_LONG_FMT "' is not valid", item);
  648. RETURN_FALSE;
  649. }
  650. /* }}} */
  651. value = nl_langinfo(item);
  652. if (value == NULL) {
  653. RETURN_FALSE;
  654. } else {
  655. RETURN_STRING(value);
  656. }
  657. }
  658. #endif
  659. /* }}} */
  660. #ifdef HAVE_STRCOLL
  661. /* {{{ proto int strcoll(string str1, string str2)
  662. Compares two strings using the current locale */
  663. PHP_FUNCTION(strcoll)
  664. {
  665. zend_string *s1, *s2;
  666. ZEND_PARSE_PARAMETERS_START(2, 2)
  667. Z_PARAM_STR(s1)
  668. Z_PARAM_STR(s2)
  669. ZEND_PARSE_PARAMETERS_END();
  670. RETURN_LONG(strcoll((const char *) ZSTR_VAL(s1),
  671. (const char *) ZSTR_VAL(s2)));
  672. }
  673. /* }}} */
  674. #endif
  675. /* {{{ php_charmask
  676. * Fills a 256-byte bytemask with input. You can specify a range like 'a..z',
  677. * it needs to be incrementing.
  678. * Returns: FAILURE/SUCCESS whether the input was correct (i.e. no range errors)
  679. */
  680. static inline int php_charmask(const unsigned char *input, size_t len, char *mask)
  681. {
  682. const unsigned char *end;
  683. unsigned char c;
  684. int result = SUCCESS;
  685. memset(mask, 0, 256);
  686. for (end = input+len; input < end; input++) {
  687. c=*input;
  688. if ((input+3 < end) && input[1] == '.' && input[2] == '.'
  689. && input[3] >= c) {
  690. memset(mask+c, 1, input[3] - c + 1);
  691. input+=3;
  692. } else if ((input+1 < end) && input[0] == '.' && input[1] == '.') {
  693. /* Error, try to be as helpful as possible:
  694. (a range ending/starting with '.' won't be captured here) */
  695. if (end-len >= input) { /* there was no 'left' char */
  696. php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the left of '..'");
  697. result = FAILURE;
  698. continue;
  699. }
  700. if (input+2 >= end) { /* there is no 'right' char */
  701. php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the right of '..'");
  702. result = FAILURE;
  703. continue;
  704. }
  705. if (input[-1] > input[2]) { /* wrong order */
  706. php_error_docref(NULL, E_WARNING, "Invalid '..'-range, '..'-range needs to be incrementing");
  707. result = FAILURE;
  708. continue;
  709. }
  710. /* FIXME: better error (a..b..c is the only left possibility?) */
  711. php_error_docref(NULL, E_WARNING, "Invalid '..'-range");
  712. result = FAILURE;
  713. continue;
  714. } else {
  715. mask[c]=1;
  716. }
  717. }
  718. return result;
  719. }
  720. /* }}} */
  721. /* {{{ php_trim_int()
  722. * mode 1 : trim left
  723. * mode 2 : trim right
  724. * mode 3 : trim left and right
  725. * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
  726. */
  727. static zend_always_inline zend_string *php_trim_int(zend_string *str, char *what, size_t what_len, int mode)
  728. {
  729. const char *start = ZSTR_VAL(str);
  730. const char *end = start + ZSTR_LEN(str);
  731. char mask[256];
  732. if (what) {
  733. if (what_len == 1) {
  734. char p = *what;
  735. if (mode & 1) {
  736. while (start != end) {
  737. if (*start == p) {
  738. start++;
  739. } else {
  740. break;
  741. }
  742. }
  743. }
  744. if (mode & 2) {
  745. while (start != end) {
  746. if (*(end-1) == p) {
  747. end--;
  748. } else {
  749. break;
  750. }
  751. }
  752. }
  753. } else {
  754. php_charmask((unsigned char*)what, what_len, mask);
  755. if (mode & 1) {
  756. while (start != end) {
  757. if (mask[(unsigned char)*start]) {
  758. start++;
  759. } else {
  760. break;
  761. }
  762. }
  763. }
  764. if (mode & 2) {
  765. while (start != end) {
  766. if (mask[(unsigned char)*(end-1)]) {
  767. end--;
  768. } else {
  769. break;
  770. }
  771. }
  772. }
  773. }
  774. } else {
  775. if (mode & 1) {
  776. while (start != end) {
  777. unsigned char c = (unsigned char)*start;
  778. if (c <= ' ' &&
  779. (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
  780. start++;
  781. } else {
  782. break;
  783. }
  784. }
  785. }
  786. if (mode & 2) {
  787. while (start != end) {
  788. unsigned char c = (unsigned char)*(end-1);
  789. if (c <= ' ' &&
  790. (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
  791. end--;
  792. } else {
  793. break;
  794. }
  795. }
  796. }
  797. }
  798. if (ZSTR_LEN(str) == end - start) {
  799. return zend_string_copy(str);
  800. } else if (end - start == 0) {
  801. return ZSTR_EMPTY_ALLOC();
  802. } else {
  803. return zend_string_init(start, end - start, 0);
  804. }
  805. }
  806. /* }}} */
  807. /* {{{ php_trim_int()
  808. * mode 1 : trim left
  809. * mode 2 : trim right
  810. * mode 3 : trim left and right
  811. * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
  812. */
  813. PHPAPI zend_string *php_trim(zend_string *str, char *what, size_t what_len, int mode)
  814. {
  815. return php_trim_int(str, what, what_len, mode);
  816. }
  817. /* }}} */
  818. /* {{{ php_do_trim
  819. * Base for trim(), rtrim() and ltrim() functions.
  820. */
  821. static zend_always_inline void php_do_trim(INTERNAL_FUNCTION_PARAMETERS, int mode)
  822. {
  823. zend_string *str;
  824. zend_string *what = NULL;
  825. ZEND_PARSE_PARAMETERS_START(1, 2)
  826. Z_PARAM_STR(str)
  827. Z_PARAM_OPTIONAL
  828. Z_PARAM_STR(what)
  829. ZEND_PARSE_PARAMETERS_END();
  830. ZVAL_STR(return_value, php_trim_int(str, (what ? ZSTR_VAL(what) : NULL), (what ? ZSTR_LEN(what) : 0), mode));
  831. }
  832. /* }}} */
  833. /* {{{ proto string trim(string str [, string character_mask])
  834. Strips whitespace from the beginning and end of a string */
  835. PHP_FUNCTION(trim)
  836. {
  837. php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 3);
  838. }
  839. /* }}} */
  840. /* {{{ proto string rtrim(string str [, string character_mask])
  841. Removes trailing whitespace */
  842. PHP_FUNCTION(rtrim)
  843. {
  844. php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
  845. }
  846. /* }}} */
  847. /* {{{ proto string ltrim(string str [, string character_mask])
  848. Strips whitespace from the beginning of a string */
  849. PHP_FUNCTION(ltrim)
  850. {
  851. php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  852. }
  853. /* }}} */
  854. /* {{{ proto string wordwrap(string str [, int width [, string break [, bool cut]]])
  855. Wraps buffer to selected number of characters using string break char */
  856. PHP_FUNCTION(wordwrap)
  857. {
  858. zend_string *text;
  859. char *breakchar = "\n";
  860. size_t newtextlen, chk, breakchar_len = 1;
  861. size_t alloced;
  862. zend_long current = 0, laststart = 0, lastspace = 0;
  863. zend_long linelength = 75;
  864. zend_bool docut = 0;
  865. zend_string *newtext;
  866. ZEND_PARSE_PARAMETERS_START(1, 4)
  867. Z_PARAM_STR(text)
  868. Z_PARAM_OPTIONAL
  869. Z_PARAM_LONG(linelength)
  870. Z_PARAM_STRING(breakchar, breakchar_len)
  871. Z_PARAM_BOOL(docut)
  872. ZEND_PARSE_PARAMETERS_END();
  873. if (ZSTR_LEN(text) == 0) {
  874. RETURN_EMPTY_STRING();
  875. }
  876. if (breakchar_len == 0) {
  877. php_error_docref(NULL, E_WARNING, "Break string cannot be empty");
  878. RETURN_FALSE;
  879. }
  880. if (linelength == 0 && docut) {
  881. php_error_docref(NULL, E_WARNING, "Can't force cut when width is zero");
  882. RETURN_FALSE;
  883. }
  884. /* Special case for a single-character break as it needs no
  885. additional storage space */
  886. if (breakchar_len == 1 && !docut) {
  887. newtext = zend_string_init(ZSTR_VAL(text), ZSTR_LEN(text), 0);
  888. laststart = lastspace = 0;
  889. for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
  890. if (ZSTR_VAL(text)[current] == breakchar[0]) {
  891. laststart = lastspace = current + 1;
  892. } else if (ZSTR_VAL(text)[current] == ' ') {
  893. if (current - laststart >= linelength) {
  894. ZSTR_VAL(newtext)[current] = breakchar[0];
  895. laststart = current + 1;
  896. }
  897. lastspace = current;
  898. } else if (current - laststart >= linelength && laststart != lastspace) {
  899. ZSTR_VAL(newtext)[lastspace] = breakchar[0];
  900. laststart = lastspace + 1;
  901. }
  902. }
  903. RETURN_NEW_STR(newtext);
  904. } else {
  905. /* Multiple character line break or forced cut */
  906. if (linelength > 0) {
  907. chk = (size_t)(ZSTR_LEN(text)/linelength + 1);
  908. newtext = zend_string_safe_alloc(chk, breakchar_len, ZSTR_LEN(text), 0);
  909. alloced = ZSTR_LEN(text) + chk * breakchar_len + 1;
  910. } else {
  911. chk = ZSTR_LEN(text);
  912. alloced = ZSTR_LEN(text) * (breakchar_len + 1) + 1;
  913. newtext = zend_string_safe_alloc(ZSTR_LEN(text), breakchar_len + 1, 0, 0);
  914. }
  915. /* now keep track of the actual new text length */
  916. newtextlen = 0;
  917. laststart = lastspace = 0;
  918. for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
  919. if (chk == 0) {
  920. alloced += (size_t) (((ZSTR_LEN(text) - current + 1)/linelength + 1) * breakchar_len) + 1;
  921. newtext = zend_string_extend(newtext, alloced, 0);
  922. chk = (size_t) ((ZSTR_LEN(text) - current)/linelength) + 1;
  923. }
  924. /* when we hit an existing break, copy to new buffer, and
  925. * fix up laststart and lastspace */
  926. if (ZSTR_VAL(text)[current] == breakchar[0]
  927. && current + breakchar_len < ZSTR_LEN(text)
  928. && !strncmp(ZSTR_VAL(text) + current, breakchar, breakchar_len)) {
  929. memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart + breakchar_len);
  930. newtextlen += current - laststart + breakchar_len;
  931. current += breakchar_len - 1;
  932. laststart = lastspace = current + 1;
  933. chk--;
  934. }
  935. /* if it is a space, check if it is at the line boundary,
  936. * copy and insert a break, or just keep track of it */
  937. else if (ZSTR_VAL(text)[current] == ' ') {
  938. if (current - laststart >= linelength) {
  939. memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
  940. newtextlen += current - laststart;
  941. memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
  942. newtextlen += breakchar_len;
  943. laststart = current + 1;
  944. chk--;
  945. }
  946. lastspace = current;
  947. }
  948. /* if we are cutting, and we've accumulated enough
  949. * characters, and we haven't see a space for this line,
  950. * copy and insert a break. */
  951. else if (current - laststart >= linelength
  952. && docut && laststart >= lastspace) {
  953. memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
  954. newtextlen += current - laststart;
  955. memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
  956. newtextlen += breakchar_len;
  957. laststart = lastspace = current;
  958. chk--;
  959. }
  960. /* if the current word puts us over the linelength, copy
  961. * back up until the last space, insert a break, and move
  962. * up the laststart */
  963. else if (current - laststart >= linelength
  964. && laststart < lastspace) {
  965. memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, lastspace - laststart);
  966. newtextlen += lastspace - laststart;
  967. memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
  968. newtextlen += breakchar_len;
  969. laststart = lastspace = lastspace + 1;
  970. chk--;
  971. }
  972. }
  973. /* copy over any stragglers */
  974. if (laststart != current) {
  975. memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
  976. newtextlen += current - laststart;
  977. }
  978. ZSTR_VAL(newtext)[newtextlen] = '\0';
  979. /* free unused memory */
  980. newtext = zend_string_truncate(newtext, newtextlen, 0);
  981. RETURN_NEW_STR(newtext);
  982. }
  983. }
  984. /* }}} */
  985. /* {{{ php_explode
  986. */
  987. PHPAPI void php_explode(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
  988. {
  989. const char *p1 = ZSTR_VAL(str);
  990. const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
  991. const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
  992. zval tmp;
  993. if (p2 == NULL) {
  994. ZVAL_STR_COPY(&tmp, str);
  995. zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
  996. } else {
  997. do {
  998. size_t l = p2 - p1;
  999. if (l == 0) {
  1000. ZVAL_EMPTY_STRING(&tmp);
  1001. } else if (l == 1) {
  1002. ZVAL_INTERNED_STR(&tmp, ZSTR_CHAR((zend_uchar)(*p1)));
  1003. } else {
  1004. ZVAL_STRINGL(&tmp, p1, p2 - p1);
  1005. }
  1006. zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
  1007. p1 = p2 + ZSTR_LEN(delim);
  1008. p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
  1009. } while (p2 != NULL && --limit > 1);
  1010. if (p1 <= endp) {
  1011. ZVAL_STRINGL(&tmp, p1, endp - p1);
  1012. zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
  1013. }
  1014. }
  1015. }
  1016. /* }}} */
  1017. /* {{{ php_explode_negative_limit
  1018. */
  1019. PHPAPI void php_explode_negative_limit(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
  1020. {
  1021. #define EXPLODE_ALLOC_STEP 64
  1022. const char *p1 = ZSTR_VAL(str);
  1023. const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
  1024. const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
  1025. zval tmp;
  1026. if (p2 == NULL) {
  1027. /*
  1028. do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0
  1029. by doing nothing we return empty array
  1030. */
  1031. } else {
  1032. size_t allocated = EXPLODE_ALLOC_STEP, found = 0;
  1033. zend_long i, to_return;
  1034. const char **positions = emalloc(allocated * sizeof(char *));
  1035. positions[found++] = p1;
  1036. do {
  1037. if (found >= allocated) {
  1038. allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */
  1039. positions = erealloc(positions, allocated*sizeof(char *));
  1040. }
  1041. positions[found++] = p1 = p2 + ZSTR_LEN(delim);
  1042. p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
  1043. } while (p2 != NULL);
  1044. to_return = limit + found;
  1045. /* limit is at least -1 therefore no need of bounds checking : i will be always less than found */
  1046. for (i = 0; i < to_return; i++) { /* this checks also for to_return > 0 */
  1047. ZVAL_STRINGL(&tmp, positions[i], (positions[i+1] - ZSTR_LEN(delim)) - positions[i]);
  1048. zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
  1049. }
  1050. efree((void *)positions);
  1051. }
  1052. #undef EXPLODE_ALLOC_STEP
  1053. }
  1054. /* }}} */
  1055. /* {{{ proto array explode(string separator, string str [, int limit])
  1056. Splits a string on string separator and return array of components. If limit is positive only limit number of components is returned. If limit is negative all components except the last abs(limit) are returned. */
  1057. PHP_FUNCTION(explode)
  1058. {
  1059. zend_string *str, *delim;
  1060. zend_long limit = ZEND_LONG_MAX; /* No limit */
  1061. zval tmp;
  1062. ZEND_PARSE_PARAMETERS_START(2, 3)
  1063. Z_PARAM_STR(delim)
  1064. Z_PARAM_STR(str)
  1065. Z_PARAM_OPTIONAL
  1066. Z_PARAM_LONG(limit)
  1067. ZEND_PARSE_PARAMETERS_END();
  1068. if (ZSTR_LEN(delim) == 0) {
  1069. php_error_docref(NULL, E_WARNING, "Empty delimiter");
  1070. RETURN_FALSE;
  1071. }
  1072. array_init(return_value);
  1073. if (ZSTR_LEN(str) == 0) {
  1074. if (limit >= 0) {
  1075. ZVAL_EMPTY_STRING(&tmp);
  1076. zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
  1077. }
  1078. return;
  1079. }
  1080. if (limit > 1) {
  1081. php_explode(delim, str, return_value, limit);
  1082. } else if (limit < 0) {
  1083. php_explode_negative_limit(delim, str, return_value, limit);
  1084. } else {
  1085. ZVAL_STR_COPY(&tmp, str);
  1086. zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
  1087. }
  1088. }
  1089. /* }}} */
  1090. /* {{{ proto string join(array src, string glue)
  1091. An alias for implode */
  1092. /* }}} */
  1093. /* {{{ php_implode
  1094. */
  1095. PHPAPI void php_implode(const zend_string *glue, zval *pieces, zval *return_value)
  1096. {
  1097. zval *tmp;
  1098. int numelems;
  1099. zend_string *str;
  1100. char *cptr;
  1101. size_t len = 0;
  1102. struct {
  1103. zend_string *str;
  1104. zend_long lval;
  1105. } *strings, *ptr;
  1106. ALLOCA_FLAG(use_heap)
  1107. numelems = zend_hash_num_elements(Z_ARRVAL_P(pieces));
  1108. if (numelems == 0) {
  1109. RETURN_EMPTY_STRING();
  1110. } else if (numelems == 1) {
  1111. /* loop to search the first not undefined element... */
  1112. ZEND_HASH_FOREACH_VAL_IND(Z_ARRVAL_P(pieces), tmp) {
  1113. RETURN_STR(zval_get_string(tmp));
  1114. } ZEND_HASH_FOREACH_END();
  1115. }
  1116. ptr = strings = do_alloca((sizeof(*strings)) * numelems, use_heap);
  1117. ZEND_HASH_FOREACH_VAL_IND(Z_ARRVAL_P(pieces), tmp) {
  1118. if (EXPECTED(Z_TYPE_P(tmp) == IS_STRING)) {
  1119. ptr->str = Z_STR_P(tmp);
  1120. len += ZSTR_LEN(ptr->str);
  1121. ptr->lval = 0;
  1122. ptr++;
  1123. } else if (UNEXPECTED(Z_TYPE_P(tmp) == IS_LONG)) {
  1124. zend_long val = Z_LVAL_P(tmp);
  1125. ptr->str = NULL;
  1126. ptr->lval = val;
  1127. ptr++;
  1128. if (val <= 0) {
  1129. len++;
  1130. }
  1131. while (val) {
  1132. val /= 10;
  1133. len++;
  1134. }
  1135. } else {
  1136. ptr->str = zval_get_string_func(tmp);
  1137. len += ZSTR_LEN(ptr->str);
  1138. ptr->lval = 1;
  1139. ptr++;
  1140. }
  1141. } ZEND_HASH_FOREACH_END();
  1142. /* numelems can not be 0, we checked above */
  1143. str = zend_string_safe_alloc(numelems - 1, ZSTR_LEN(glue), len, 0);
  1144. cptr = ZSTR_VAL(str) + ZSTR_LEN(str);
  1145. *cptr = 0;
  1146. while (1) {
  1147. ptr--;
  1148. if (EXPECTED(ptr->str)) {
  1149. cptr -= ZSTR_LEN(ptr->str);
  1150. memcpy(cptr, ZSTR_VAL(ptr->str), ZSTR_LEN(ptr->str));
  1151. if (ptr->lval) {
  1152. zend_string_release_ex(ptr->str, 0);
  1153. }
  1154. } else {
  1155. char *oldPtr = cptr;
  1156. char oldVal = *cptr;
  1157. cptr = zend_print_long_to_buf(cptr, ptr->lval);
  1158. *oldPtr = oldVal;
  1159. }
  1160. if (ptr == strings) {
  1161. break;
  1162. }
  1163. cptr -= ZSTR_LEN(glue);
  1164. memcpy(cptr, ZSTR_VAL(glue), ZSTR_LEN(glue));
  1165. }
  1166. free_alloca(strings, use_heap);
  1167. RETURN_NEW_STR(str);
  1168. }
  1169. /* }}} */
  1170. /* {{{ proto string implode([string glue,] array pieces)
  1171. Joins array elements placing glue string between items and return one string */
  1172. PHP_FUNCTION(implode)
  1173. {
  1174. zval *arg1, *arg2 = NULL, *pieces;
  1175. zend_string *glue, *tmp_glue;
  1176. ZEND_PARSE_PARAMETERS_START(1, 2)
  1177. Z_PARAM_ZVAL(arg1)
  1178. Z_PARAM_OPTIONAL
  1179. Z_PARAM_ZVAL(arg2)
  1180. ZEND_PARSE_PARAMETERS_END();
  1181. if (arg2 == NULL) {
  1182. if (Z_TYPE_P(arg1) != IS_ARRAY) {
  1183. php_error_docref(NULL, E_WARNING, "Argument must be an array");
  1184. return;
  1185. }
  1186. glue = ZSTR_EMPTY_ALLOC();
  1187. tmp_glue = NULL;
  1188. pieces = arg1;
  1189. } else {
  1190. if (Z_TYPE_P(arg1) == IS_ARRAY) {
  1191. glue = zval_get_tmp_string(arg2, &tmp_glue);
  1192. pieces = arg1;
  1193. } else if (Z_TYPE_P(arg2) == IS_ARRAY) {
  1194. glue = zval_get_tmp_string(arg1, &tmp_glue);
  1195. pieces = arg2;
  1196. } else {
  1197. php_error_docref(NULL, E_WARNING, "Invalid arguments passed");
  1198. return;
  1199. }
  1200. }
  1201. php_implode(glue, pieces, return_value);
  1202. zend_tmp_string_release(tmp_glue);
  1203. }
  1204. /* }}} */
  1205. #define STRTOK_TABLE(p) BG(strtok_table)[(unsigned char) *p]
  1206. /* {{{ proto string strtok([string str,] string token)
  1207. Tokenize a string */
  1208. PHP_FUNCTION(strtok)
  1209. {
  1210. zend_string *str, *tok = NULL;
  1211. char *token;
  1212. char *token_end;
  1213. char *p;
  1214. char *pe;
  1215. size_t skipped = 0;
  1216. ZEND_PARSE_PARAMETERS_START(1, 2)
  1217. Z_PARAM_STR(str)
  1218. Z_PARAM_OPTIONAL
  1219. Z_PARAM_STR(tok)
  1220. ZEND_PARSE_PARAMETERS_END();
  1221. if (ZEND_NUM_ARGS() == 1) {
  1222. tok = str;
  1223. } else {
  1224. zval_ptr_dtor(&BG(strtok_zval));
  1225. ZVAL_STRINGL(&BG(strtok_zval), ZSTR_VAL(str), ZSTR_LEN(str));
  1226. BG(strtok_last) = BG(strtok_string) = Z_STRVAL(BG(strtok_zval));
  1227. BG(strtok_len) = ZSTR_LEN(str);
  1228. }
  1229. p = BG(strtok_last); /* Where we start to search */
  1230. pe = BG(strtok_string) + BG(strtok_len);
  1231. if (!p || p >= pe) {
  1232. RETURN_FALSE;
  1233. }
  1234. token = ZSTR_VAL(tok);
  1235. token_end = token + ZSTR_LEN(tok);
  1236. while (token < token_end) {
  1237. STRTOK_TABLE(token++) = 1;
  1238. }
  1239. /* Skip leading delimiters */
  1240. while (STRTOK_TABLE(p)) {
  1241. if (++p >= pe) {
  1242. /* no other chars left */
  1243. BG(strtok_last) = NULL;
  1244. RETVAL_FALSE;
  1245. goto restore;
  1246. }
  1247. skipped++;
  1248. }
  1249. /* We know at this place that *p is no delimiter, so skip it */
  1250. while (++p < pe) {
  1251. if (STRTOK_TABLE(p)) {
  1252. goto return_token;
  1253. }
  1254. }
  1255. if (p - BG(strtok_last)) {
  1256. return_token:
  1257. RETVAL_STRINGL(BG(strtok_last) + skipped, (p - BG(strtok_last)) - skipped);
  1258. BG(strtok_last) = p + 1;
  1259. } else {
  1260. RETVAL_FALSE;
  1261. BG(strtok_last) = NULL;
  1262. }
  1263. /* Restore table -- usually faster then memset'ing the table on every invocation */
  1264. restore:
  1265. token = ZSTR_VAL(tok);
  1266. while (token < token_end) {
  1267. STRTOK_TABLE(token++) = 0;
  1268. }
  1269. }
  1270. /* }}} */
  1271. /* {{{ php_strtoupper
  1272. */
  1273. PHPAPI char *php_strtoupper(char *s, size_t len)
  1274. {
  1275. unsigned char *c;
  1276. const unsigned char *e;
  1277. c = (unsigned char *)s;
  1278. e = (unsigned char *)c+len;
  1279. while (c < e) {
  1280. *c = toupper(*c);
  1281. c++;
  1282. }
  1283. return s;
  1284. }
  1285. /* }}} */
  1286. /* {{{ php_string_toupper
  1287. */
  1288. PHPAPI zend_string *php_string_toupper(zend_string *s)
  1289. {
  1290. unsigned char *c;
  1291. const unsigned char *e;
  1292. c = (unsigned char *)ZSTR_VAL(s);
  1293. e = c + ZSTR_LEN(s);
  1294. while (c < e) {
  1295. if (islower(*c)) {
  1296. register unsigned char *r;
  1297. zend_string *res = zend_string_alloc(ZSTR_LEN(s), 0);
  1298. if (c != (unsigned char*)ZSTR_VAL(s)) {
  1299. memcpy(ZSTR_VAL(res), ZSTR_VAL(s), c - (unsigned char*)ZSTR_VAL(s));
  1300. }
  1301. r = c + (ZSTR_VAL(res) - ZSTR_VAL(s));
  1302. while (c < e) {
  1303. *r = toupper(*c);
  1304. r++;
  1305. c++;
  1306. }
  1307. *r = '\0';
  1308. return res;
  1309. }
  1310. c++;
  1311. }
  1312. return zend_string_copy(s);
  1313. }
  1314. /* }}} */
  1315. /* {{{ proto string strtoupper(string str)
  1316. Makes a string uppercase */
  1317. PHP_FUNCTION(strtoupper)
  1318. {
  1319. zend_string *arg;
  1320. ZEND_PARSE_PARAMETERS_START(1, 1)
  1321. Z_PARAM_STR(arg)
  1322. ZEND_PARSE_PARAMETERS_END();
  1323. RETURN_STR(php_string_toupper(arg));
  1324. }
  1325. /* }}} */
  1326. /* {{{ php_strtolower
  1327. */
  1328. PHPAPI char *php_strtolower(char *s, size_t len)
  1329. {
  1330. unsigned char *c;
  1331. const unsigned char *e;
  1332. c = (unsigned char *)s;
  1333. e = c+len;
  1334. while (c < e) {
  1335. *c = tolower(*c);
  1336. c++;
  1337. }
  1338. return s;
  1339. }
  1340. /* }}} */
  1341. /* {{{ php_string_tolower
  1342. */
  1343. PHPAPI zend_string *php_string_tolower(zend_string *s)
  1344. {
  1345. unsigned char *c;
  1346. const unsigned char *e;
  1347. c = (unsigned char *)ZSTR_VAL(s);
  1348. e = c + ZSTR_LEN(s);
  1349. while (c < e) {
  1350. if (isupper(*c)) {
  1351. register unsigned char *r;
  1352. zend_string *res = zend_string_alloc(ZSTR_LEN(s), 0);
  1353. if (c != (unsigned char*)ZSTR_VAL(s)) {
  1354. memcpy(ZSTR_VAL(res), ZSTR_VAL(s), c - (unsigned char*)ZSTR_VAL(s));
  1355. }
  1356. r = c + (ZSTR_VAL(res) - ZSTR_VAL(s));
  1357. while (c < e) {
  1358. *r = tolower(*c);
  1359. r++;
  1360. c++;
  1361. }
  1362. *r = '\0';
  1363. return res;
  1364. }
  1365. c++;
  1366. }
  1367. return zend_string_copy(s);
  1368. }
  1369. /* }}} */
  1370. /* {{{ proto string strtolower(string str)
  1371. Makes a string lowercase */
  1372. PHP_FUNCTION(strtolower)
  1373. {
  1374. zend_string *str;
  1375. ZEND_PARSE_PARAMETERS_START(1, 1)
  1376. Z_PARAM_STR(str)
  1377. ZEND_PARSE_PARAMETERS_END();
  1378. RETURN_STR(php_string_tolower(str));
  1379. }
  1380. /* }}} */
  1381. /* {{{ php_basename
  1382. */
  1383. PHPAPI zend_string *php_basename(const char *s, size_t len, char *suffix, size_t sufflen)
  1384. {
  1385. char *c;
  1386. const char *comp, *cend;
  1387. size_t inc_len, cnt;
  1388. int state;
  1389. zend_string *ret;
  1390. comp = cend = c = (char*)s;
  1391. cnt = len;
  1392. state = 0;
  1393. while (cnt > 0) {
  1394. inc_len = (*c == '\0' ? 1 : php_mblen(c, cnt));
  1395. switch (inc_len) {
  1396. case -2:
  1397. case -1:
  1398. inc_len = 1;
  1399. php_mb_reset();
  1400. break;
  1401. case 0:
  1402. goto quit_loop;
  1403. case 1:
  1404. #if defined(PHP_WIN32)
  1405. if (*c == '/' || *c == '\\') {
  1406. #else
  1407. if (*c == '/') {
  1408. #endif
  1409. if (state == 1) {
  1410. state = 0;
  1411. cend = c;
  1412. }
  1413. #if defined(PHP_WIN32)
  1414. /* Catch relative paths in c:file.txt style. They're not to confuse
  1415. with the NTFS streams. This part ensures also, that no drive
  1416. letter traversing happens. */
  1417. } else if ((*c == ':' && (c - comp == 1))) {
  1418. if (state == 0) {
  1419. comp = c;
  1420. state = 1;
  1421. } else {
  1422. cend = c;
  1423. state = 0;
  1424. }
  1425. #endif
  1426. } else {
  1427. if (state == 0) {
  1428. comp = c;
  1429. state = 1;
  1430. }
  1431. }
  1432. break;
  1433. default:
  1434. if (state == 0) {
  1435. comp = c;
  1436. state = 1;
  1437. }
  1438. break;
  1439. }
  1440. c += inc_len;
  1441. cnt -= inc_len;
  1442. }
  1443. quit_loop:
  1444. if (state == 1) {
  1445. cend = c;
  1446. }
  1447. if (suffix != NULL && sufflen < (size_t)(cend - comp) &&
  1448. memcmp(cend - sufflen, suffix, sufflen) == 0) {
  1449. cend -= sufflen;
  1450. }
  1451. len = cend - comp;
  1452. ret = zend_string_init(comp, len, 0);
  1453. return ret;
  1454. }
  1455. /* }}} */
  1456. /* {{{ proto string basename(string path [, string suffix])
  1457. Returns the filename component of the path */
  1458. PHP_FUNCTION(basename)
  1459. {
  1460. char *string, *suffix = NULL;
  1461. size_t string_len, suffix_len = 0;
  1462. ZEND_PARSE_PARAMETERS_START(1, 2)
  1463. Z_PARAM_STRING(string, string_len)
  1464. Z_PARAM_OPTIONAL
  1465. Z_PARAM_STRING(suffix, suffix_len)
  1466. ZEND_PARSE_PARAMETERS_END();
  1467. RETURN_STR(php_basename(string, string_len, suffix, suffix_len));
  1468. }
  1469. /* }}} */
  1470. /* {{{ php_dirname
  1471. Returns directory name component of path */
  1472. PHPAPI size_t php_dirname(char *path, size_t len)
  1473. {
  1474. return zend_dirname(path, len);
  1475. }
  1476. /* }}} */
  1477. /* {{{ proto string dirname(string path[, int levels])
  1478. Returns the directory name component of the path */
  1479. PHP_FUNCTION(dirname)
  1480. {
  1481. char *str;
  1482. size_t str_len;
  1483. zend_string *ret;
  1484. zend_long levels = 1;
  1485. ZEND_PARSE_PARAMETERS_START(1, 2)
  1486. Z_PARAM_STRING(str, str_len)
  1487. Z_PARAM_OPTIONAL
  1488. Z_PARAM_LONG(levels)
  1489. ZEND_PARSE_PARAMETERS_END();
  1490. ret = zend_string_init(str, str_len, 0);
  1491. if (levels == 1) {
  1492. /* Default case */
  1493. #ifdef PHP_WIN32
  1494. ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), str_len);
  1495. #else
  1496. ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), str_len);
  1497. #endif
  1498. } else if (levels < 1) {
  1499. php_error_docref(NULL, E_WARNING, "Invalid argument, levels must be >= 1");
  1500. zend_string_efree(ret);
  1501. return;
  1502. } else {
  1503. /* Some levels up */
  1504. do {
  1505. #ifdef PHP_WIN32
  1506. ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
  1507. #else
  1508. ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
  1509. #endif
  1510. } while (ZSTR_LEN(ret) < str_len && --levels);
  1511. }
  1512. RETURN_NEW_STR(ret);
  1513. }
  1514. /* }}} */
  1515. /* {{{ proto array pathinfo(string path[, int options])
  1516. Returns information about a certain string */
  1517. PHP_FUNCTION(pathinfo)
  1518. {
  1519. zval tmp;
  1520. char *path, *dirname;
  1521. size_t path_len;
  1522. int have_basename;
  1523. zend_long opt = PHP_PATHINFO_ALL;
  1524. zend_string *ret = NULL;
  1525. ZEND_PARSE_PARAMETERS_START(1, 2)
  1526. Z_PARAM_STRING(path, path_len)
  1527. Z_PARAM_OPTIONAL
  1528. Z_PARAM_LONG(opt)
  1529. ZEND_PARSE_PARAMETERS_END();
  1530. have_basename = ((opt & PHP_PATHINFO_BASENAME) == PHP_PATHINFO_BASENAME);
  1531. array_init(&tmp);
  1532. if ((opt & PHP_PATHINFO_DIRNAME) == PHP_PATHINFO_DIRNAME) {
  1533. dirname = estrndup(path, path_len);
  1534. php_dirname(dirname, path_len);
  1535. if (*dirname) {
  1536. add_assoc_string(&tmp, "dirname", dirname);
  1537. }
  1538. efree(dirname);
  1539. }
  1540. if (have_basename) {
  1541. ret = php_basename(path, path_len, NULL, 0);
  1542. add_assoc_str(&tmp, "basename", zend_string_copy(ret));
  1543. }
  1544. if ((opt & PHP_PATHINFO_EXTENSION) == PHP_PATHINFO_EXTENSION) {
  1545. const char *p;
  1546. ptrdiff_t idx;
  1547. if (!have_basename) {
  1548. ret = php_basename(path, path_len, NULL, 0);
  1549. }
  1550. p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
  1551. if (p) {
  1552. idx = p - ZSTR_VAL(ret);
  1553. add_assoc_stringl(&tmp, "extension", ZSTR_VAL(ret) + idx + 1, ZSTR_LEN(ret) - idx - 1);
  1554. }
  1555. }
  1556. if ((opt & PHP_PATHINFO_FILENAME) == PHP_PATHINFO_FILENAME) {
  1557. const char *p;
  1558. ptrdiff_t idx;
  1559. /* Have we already looked up the basename? */
  1560. if (!have_basename && !ret) {
  1561. ret = php_basename(path, path_len, NULL, 0);
  1562. }
  1563. p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
  1564. idx = p ? (p - ZSTR_VAL(ret)) : (ptrdiff_t)ZSTR_LEN(ret);
  1565. add_assoc_stringl(&tmp, "filename", ZSTR_VAL(ret), idx);
  1566. }
  1567. if (ret) {
  1568. zend_string_release_ex(ret, 0);
  1569. }
  1570. if (opt == PHP_PATHINFO_ALL) {
  1571. ZVAL_COPY_VALUE(return_value, &tmp);
  1572. } else {
  1573. zval *element;
  1574. if ((element = zend_hash_get_current_data(Z_ARRVAL(tmp))) != NULL) {
  1575. ZVAL_COPY_DEREF(return_value, element);
  1576. } else {
  1577. ZVAL_EMPTY_STRING(return_value);
  1578. }
  1579. zval_ptr_dtor(&tmp);
  1580. }
  1581. }
  1582. /* }}} */
  1583. /* {{{ php_stristr
  1584. case insensitve strstr */
  1585. PHPAPI char *php_stristr(char *s, char *t, size_t s_len, size_t t_len)
  1586. {
  1587. php_strtolower(s, s_len);
  1588. php_strtolower(t, t_len);
  1589. return (char*)php_memnstr(s, t, t_len, s + s_len);
  1590. }
  1591. /* }}} */
  1592. /* {{{ php_strspn
  1593. */
  1594. PHPAPI size_t php_strspn(char *s1, char *s2, char *s1_end, char *s2_end)
  1595. {
  1596. register const char *p = s1, *spanp;
  1597. register char c = *p;
  1598. cont:
  1599. for (spanp = s2; p != s1_end && spanp != s2_end;) {
  1600. if (*spanp++ == c) {
  1601. c = *(++p);
  1602. goto cont;
  1603. }
  1604. }
  1605. return (p - s1);
  1606. }
  1607. /* }}} */
  1608. /* {{{ php_strcspn
  1609. */
  1610. PHPAPI size_t php_strcspn(char *s1, char *s2, char *s1_end, char *s2_end)
  1611. {
  1612. register const char *p, *spanp;
  1613. register char c = *s1;
  1614. for (p = s1;;) {
  1615. spanp = s2;
  1616. do {
  1617. if (*spanp == c || p == s1_end) {
  1618. return p - s1;
  1619. }
  1620. } while (spanp++ < (s2_end - 1));
  1621. c = *++p;
  1622. }
  1623. /* NOTREACHED */
  1624. }
  1625. /* }}} */
  1626. /* {{{ php_needle_char
  1627. */
  1628. static int php_needle_char(zval *needle, char *target)
  1629. {
  1630. switch (Z_TYPE_P(needle)) {
  1631. case IS_LONG:
  1632. *target = (char)Z_LVAL_P(needle);
  1633. return SUCCESS;
  1634. case IS_NULL:
  1635. case IS_FALSE:
  1636. *target = '\0';
  1637. return SUCCESS;
  1638. case IS_TRUE:
  1639. *target = '\1';
  1640. return SUCCESS;
  1641. case IS_DOUBLE:
  1642. *target = (char)(int)Z_DVAL_P(needle);
  1643. return SUCCESS;
  1644. case IS_OBJECT:
  1645. *target = (char) zval_get_long(needle);
  1646. return SUCCESS;
  1647. default:
  1648. php_error_docref(NULL, E_WARNING, "needle is not a string or an integer");
  1649. return FAILURE;
  1650. }
  1651. }
  1652. /* }}} */
  1653. /* {{{ proto string stristr(string haystack, string needle[, bool part])
  1654. Finds first occurrence of a string within another, case insensitive */
  1655. PHP_FUNCTION(stristr)
  1656. {
  1657. zval *needle;
  1658. zend_string *haystack;
  1659. const char *found = NULL;
  1660. size_t found_offset;
  1661. char *haystack_dup;
  1662. char needle_char[2];
  1663. zend_bool part = 0;
  1664. ZEND_PARSE_PARAMETERS_START(2, 3)
  1665. Z_PARAM_STR(haystack)
  1666. Z_PARAM_ZVAL(needle)
  1667. Z_PARAM_OPTIONAL
  1668. Z_PARAM_BOOL(part)
  1669. ZEND_PARSE_PARAMETERS_END();
  1670. haystack_dup = estrndup(ZSTR_VAL(haystack), ZSTR_LEN(haystack));
  1671. if (Z_TYPE_P(needle) == IS_STRING) {
  1672. char *orig_needle;
  1673. if (!Z_STRLEN_P(needle)) {
  1674. php_error_docref(NULL, E_WARNING, "Empty needle");
  1675. efree(haystack_dup);
  1676. RETURN_FALSE;
  1677. }
  1678. orig_needle = estrndup(Z_STRVAL_P(needle), Z_STRLEN_P(needle));
  1679. found = php_stristr(haystack_dup, orig_needle, ZSTR_LEN(haystack), Z_STRLEN_P(needle));
  1680. efree(orig_needle);
  1681. } else {
  1682. if (php_needle_char(needle, needle_char) != SUCCESS) {
  1683. efree(haystack_dup);
  1684. RETURN_FALSE;
  1685. }
  1686. needle_char[1] = 0;
  1687. php_error_docref(NULL, E_DEPRECATED,
  1688. "Non-string needles will be interpreted as strings in the future. " \
  1689. "Use an explicit chr() call to preserve the current behavior");
  1690. found = php_stristr(haystack_dup, needle_char, ZSTR_LEN(haystack), 1);
  1691. }
  1692. if (found) {
  1693. found_offset = found - haystack_dup;
  1694. if (part) {
  1695. RETVAL_STRINGL(ZSTR_VAL(haystack), found_offset);
  1696. } else {
  1697. RETVAL_STRINGL(ZSTR_VAL(haystack) + found_offset, ZSTR_LEN(haystack) - found_offset);
  1698. }
  1699. } else {
  1700. RETVAL_FALSE;
  1701. }
  1702. efree(haystack_dup);
  1703. }
  1704. /* }}} */
  1705. /* {{{ proto string strstr(string haystack, string needle[, bool part])
  1706. Finds first occurrence of a string within another */
  1707. PHP_FUNCTION(strstr)
  1708. {
  1709. zval *needle;
  1710. zend_string *haystack;
  1711. const char *found = NULL;
  1712. char needle_char[2];
  1713. zend_long found_offset;
  1714. zend_bool part = 0;
  1715. ZEND_PARSE_PARAMETERS_START(2, 3)
  1716. Z_PARAM_STR(haystack)
  1717. Z_PARAM_ZVAL(needle)
  1718. Z_PARAM_OPTIONAL
  1719. Z_PARAM_BOOL(part)
  1720. ZEND_PARSE_PARAMETERS_END();
  1721. if (Z_TYPE_P(needle) == IS_STRING) {
  1722. if (!Z_STRLEN_P(needle)) {
  1723. php_error_docref(NULL, E_WARNING, "Empty needle");
  1724. RETURN_FALSE;
  1725. }
  1726. found = php_memnstr(ZSTR_VAL(haystack), Z_STRVAL_P(needle), Z_STRLEN_P(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
  1727. } else {
  1728. if (php_needle_char(needle, needle_char) != SUCCESS) {
  1729. RETURN_FALSE;
  1730. }
  1731. needle_char[1] = 0;
  1732. php_error_docref(NULL, E_DEPRECATED,
  1733. "Non-string needles will be interpreted as strings in the future. " \
  1734. "Use an explicit chr() call to preserve the current behavior");
  1735. found = php_memnstr(ZSTR_VAL(haystack), needle_char, 1, ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
  1736. }
  1737. if (found) {
  1738. found_offset = found - ZSTR_VAL(haystack);
  1739. if (part) {
  1740. RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
  1741. } else {
  1742. RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
  1743. }
  1744. }
  1745. RETURN_FALSE;
  1746. }
  1747. /* }}} */
  1748. /* {{{ proto string strchr(string haystack, string needle)
  1749. An alias for strstr */
  1750. /* }}} */
  1751. /* {{{ proto int strpos(string haystack, string needle [, int offset])
  1752. Finds position of first occurrence of a string within another */
  1753. PHP_FUNCTION(strpos)
  1754. {
  1755. zval *needle;
  1756. zend_string *haystack;
  1757. const char *found = NULL;
  1758. char needle_char[2];
  1759. zend_long offset = 0;
  1760. ZEND_PARSE_PARAMETERS_START(2, 3)
  1761. Z_PARAM_STR(haystack)
  1762. Z_PARAM_ZVAL(needle)
  1763. Z_PARAM_OPTIONAL
  1764. Z_PARAM_LONG(offset)
  1765. ZEND_PARSE_PARAMETERS_END();
  1766. if (offset < 0) {
  1767. offset += (zend_long)ZSTR_LEN(haystack);
  1768. }
  1769. if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
  1770. php_error_docref(NULL, E_WARNING, "Offset not contained in string");
  1771. RETURN_FALSE;
  1772. }
  1773. if (Z_TYPE_P(needle) == IS_STRING) {
  1774. if (!Z_STRLEN_P(needle)) {
  1775. php_error_docref(NULL, E_WARNING, "Empty needle");
  1776. RETURN_FALSE;
  1777. }
  1778. found = (char*)php_memnstr(ZSTR_VAL(haystack) + offset,
  1779. Z_STRVAL_P(needle),
  1780. Z_STRLEN_P(needle),
  1781. ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
  1782. } else {
  1783. if (php_needle_char(needle, needle_char) != SUCCESS) {
  1784. RETURN_FALSE;
  1785. }
  1786. needle_char[1] = 0;
  1787. php_error_docref(NULL, E_DEPRECATED,
  1788. "Non-string needles will be interpreted as strings in the future. " \
  1789. "Use an explicit chr() call to preserve the current behavior");
  1790. found = (char*)php_memnstr(ZSTR_VAL(haystack) + offset,
  1791. needle_char,
  1792. 1,
  1793. ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
  1794. }
  1795. if (found) {
  1796. RETURN_LONG(found - ZSTR_VAL(haystack));
  1797. } else {
  1798. RETURN_FALSE;
  1799. }
  1800. }
  1801. /* }}} */
  1802. /* {{{ proto int stripos(string haystack, string needle [, int offset])
  1803. Finds position of first occurrence of a string within another, case insensitive */
  1804. PHP_FUNCTION(stripos)
  1805. {
  1806. const char *found = NULL;
  1807. zend_string *haystack;
  1808. zend_long offset = 0;
  1809. char needle_char[2];
  1810. zval *needle;
  1811. zend_string *needle_dup = NULL, *haystack_dup;
  1812. ZEND_PARSE_PARAMETERS_START(2, 3)
  1813. Z_PARAM_STR(haystack)
  1814. Z_PARAM_ZVAL(needle)
  1815. Z_PARAM_OPTIONAL
  1816. Z_PARAM_LONG(offset)
  1817. ZEND_PARSE_PARAMETERS_END();
  1818. if (offset < 0) {
  1819. offset += (zend_long)ZSTR_LEN(haystack);
  1820. }
  1821. if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
  1822. php_error_docref(NULL, E_WARNING, "Offset not contained in string");
  1823. RETURN_FALSE;
  1824. }
  1825. if (ZSTR_LEN(haystack) == 0) {
  1826. RETURN_FALSE;
  1827. }
  1828. if (Z_TYPE_P(needle) == IS_STRING) {
  1829. if (Z_STRLEN_P(needle) == 0 || Z_STRLEN_P(needle) > ZSTR_LEN(haystack)) {
  1830. RETURN_FALSE;
  1831. }
  1832. haystack_dup = php_string_tolower(haystack);
  1833. needle_dup = php_string_tolower(Z_STR_P(needle));
  1834. found = (char*)php_memnstr(ZSTR_VAL(haystack_dup) + offset,
  1835. ZSTR_VAL(needle_dup), ZSTR_LEN(needle_dup), ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack));
  1836. } else {
  1837. if (php_needle_char(needle, needle_char) != SUCCESS) {
  1838. RETURN_FALSE;
  1839. }
  1840. php_error_docref(NULL, E_DEPRECATED,
  1841. "Non-string needles will be interpreted as strings in the future. " \
  1842. "Use an explicit chr() call to preserve the current behavior");
  1843. haystack_dup = php_string_tolower(haystack);
  1844. needle_char[0] = tolower(needle_char[0]);
  1845. needle_char[1] = '\0';
  1846. found = (char*)php_memnstr(ZSTR_VAL(haystack_dup) + offset,
  1847. needle_char,
  1848. sizeof(needle_char) - 1,
  1849. ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack));
  1850. }
  1851. if (found) {
  1852. RETVAL_LONG(found - ZSTR_VAL(haystack_dup));
  1853. } else {
  1854. RETVAL_FALSE;
  1855. }
  1856. zend_string_release_ex(haystack_dup, 0);
  1857. if (needle_dup) {
  1858. zend_string_release_ex(needle_dup, 0);
  1859. }
  1860. }
  1861. /* }}} */
  1862. /* {{{ proto int strrpos(string haystack, string needle [, int offset])
  1863. Finds position of last occurrence of a string within another string */
  1864. PHP_FUNCTION(strrpos)
  1865. {
  1866. zval *zneedle;
  1867. zend_string *haystack;
  1868. size_t needle_len;
  1869. zend_long offset = 0;
  1870. char ord_needle[2];
  1871. const char *p, *e, *found, *needle;
  1872. ZEND_PARSE_PARAMETERS_START(2, 3)
  1873. Z_PARAM_STR(haystack)
  1874. Z_PARAM_ZVAL(zneedle)
  1875. Z_PARAM_OPTIONAL
  1876. Z_PARAM_LONG(offset)
  1877. ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
  1878. if (Z_TYPE_P(zneedle) == IS_STRING) {
  1879. needle = Z_STRVAL_P(zneedle);
  1880. needle_len = Z_STRLEN_P(zneedle);
  1881. } else {
  1882. if (php_needle_char(zneedle, ord_needle) != SUCCESS) {
  1883. RETURN_FALSE;
  1884. }
  1885. php_error_docref(NULL, E_DEPRECATED,
  1886. "Non-string needles will be interpreted as strings in the future. " \
  1887. "Use an explicit chr() call to preserve the current behavior");
  1888. ord_needle[1] = '\0';
  1889. needle = ord_needle;
  1890. needle_len = 1;
  1891. }
  1892. if ((ZSTR_LEN(haystack) == 0) || (needle_len == 0)) {
  1893. RETURN_FALSE;
  1894. }
  1895. if (offset >= 0) {
  1896. if ((size_t)offset > ZSTR_LEN(haystack)) {
  1897. php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
  1898. RETURN_FALSE;
  1899. }
  1900. p = ZSTR_VAL(haystack) + (size_t)offset;
  1901. e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
  1902. } else {
  1903. if (offset < -INT_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
  1904. php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
  1905. RETURN_FALSE;
  1906. }
  1907. p = ZSTR_VAL(haystack);
  1908. if ((size_t)-offset < needle_len) {
  1909. e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
  1910. } else {
  1911. e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) + offset + needle_len;
  1912. }
  1913. }
  1914. if ((found = zend_memnrstr(p, needle, needle_len, e))) {
  1915. RETURN_LONG(found - ZSTR_VAL(haystack));
  1916. }
  1917. RETURN_FALSE;
  1918. }
  1919. /* }}} */
  1920. /* {{{ proto int strripos(string haystack, string needle [, int offset])
  1921. Finds position of last occurrence of a string within another string */
  1922. PHP_FUNCTION(strripos)
  1923. {
  1924. zval *zneedle;
  1925. zend_string *needle;
  1926. zend_string *haystack;
  1927. zend_long offset = 0;
  1928. const char *p, *e, *found;
  1929. zend_string *needle_dup, *haystack_dup, *ord_needle = NULL;
  1930. ALLOCA_FLAG(use_heap);
  1931. ZEND_PARSE_PARAMETERS_START(2, 3)
  1932. Z_PARAM_STR(haystack)
  1933. Z_PARAM_ZVAL(zneedle)
  1934. Z_PARAM_OPTIONAL
  1935. Z_PARAM_LONG(offset)
  1936. ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
  1937. ZSTR_ALLOCA_ALLOC(ord_needle, 1, use_heap);
  1938. if (Z_TYPE_P(zneedle) == IS_STRING) {
  1939. needle = Z_STR_P(zneedle);
  1940. } else {
  1941. if (php_needle_char(zneedle, ZSTR_VAL(ord_needle)) != SUCCESS) {
  1942. ZSTR_ALLOCA_FREE(ord_needle, use_heap);
  1943. RETURN_FALSE;
  1944. }
  1945. php_error_docref(NULL, E_DEPRECATED,
  1946. "Non-string needles will be interpreted as strings in the future. " \
  1947. "Use an explicit chr() call to preserve the current behavior");
  1948. ZSTR_VAL(ord_needle)[1] = '\0';
  1949. needle = ord_needle;
  1950. }
  1951. if ((ZSTR_LEN(haystack) == 0) || (ZSTR_LEN(needle) == 0)) {
  1952. ZSTR_ALLOCA_FREE(ord_needle, use_heap);
  1953. RETURN_FALSE;
  1954. }
  1955. if (ZSTR_LEN(needle) == 1) {
  1956. /* Single character search can shortcut memcmps
  1957. Can also avoid tolower emallocs */
  1958. if (offset >= 0) {
  1959. if ((size_t)offset > ZSTR_LEN(haystack)) {
  1960. ZSTR_ALLOCA_FREE(ord_needle, use_heap);
  1961. php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
  1962. RETURN_FALSE;
  1963. }
  1964. p = ZSTR_VAL(haystack) + (size_t)offset;
  1965. e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - 1;
  1966. } else {
  1967. p = ZSTR_VAL(haystack);
  1968. if (offset < -INT_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
  1969. ZSTR_ALLOCA_FREE(ord_needle, use_heap);
  1970. php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
  1971. RETURN_FALSE;
  1972. }
  1973. e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) + (size_t)offset;
  1974. }
  1975. /* Borrow that ord_needle buffer to avoid repeatedly tolower()ing needle */
  1976. *ZSTR_VAL(ord_needle) = tolower(*ZSTR_VAL(needle));
  1977. while (e >= p) {
  1978. if (tolower(*e) == *ZSTR_VAL(ord_needle)) {
  1979. ZSTR_ALLOCA_FREE(ord_needle, use_heap);
  1980. RETURN_LONG(e - p + (offset > 0 ? offset : 0));
  1981. }
  1982. e--;
  1983. }
  1984. ZSTR_ALLOCA_FREE(ord_needle, use_heap);
  1985. RETURN_FALSE;
  1986. }
  1987. haystack_dup = php_string_tolower(haystack);
  1988. if (offset >= 0) {
  1989. if ((size_t)offset > ZSTR_LEN(haystack)) {
  1990. zend_string_release_ex(haystack_dup, 0);
  1991. ZSTR_ALLOCA_FREE(ord_needle, use_heap);
  1992. php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
  1993. RETURN_FALSE;
  1994. }
  1995. p = ZSTR_VAL(haystack_dup) + offset;
  1996. e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
  1997. } else {
  1998. if (offset < -INT_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
  1999. zend_string_release_ex(haystack_dup, 0);
  2000. ZSTR_ALLOCA_FREE(ord_needle, use_heap);
  2001. php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
  2002. RETURN_FALSE;
  2003. }
  2004. p = ZSTR_VAL(haystack_dup);
  2005. if ((size_t)-offset < ZSTR_LEN(needle)) {
  2006. e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
  2007. } else {
  2008. e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack) + offset + ZSTR_LEN(needle);
  2009. }
  2010. }
  2011. needle_dup = php_string_tolower(needle);
  2012. if ((found = (char *)zend_memnrstr(p, ZSTR_VAL(needle_dup), ZSTR_LEN(needle_dup), e))) {
  2013. RETVAL_LONG(found - ZSTR_VAL(haystack_dup));
  2014. zend_string_release_ex(needle_dup, 0);
  2015. zend_string_release_ex(haystack_dup, 0);
  2016. ZSTR_ALLOCA_FREE(ord_needle, use_heap);
  2017. } else {
  2018. zend_string_release_ex(needle_dup, 0);
  2019. zend_string_release_ex(haystack_dup, 0);
  2020. ZSTR_ALLOCA_FREE(ord_needle, use_heap);
  2021. RETURN_FALSE;
  2022. }
  2023. }
  2024. /* }}} */
  2025. /* {{{ proto string strrchr(string haystack, string needle)
  2026. Finds the last occurrence of a character in a string within another */
  2027. PHP_FUNCTION(strrchr)
  2028. {
  2029. zval *needle;
  2030. zend_string *haystack;
  2031. const char *found = NULL;
  2032. zend_long found_offset;
  2033. ZEND_PARSE_PARAMETERS_START(2, 2)
  2034. Z_PARAM_STR(haystack)
  2035. Z_PARAM_ZVAL(needle)
  2036. ZEND_PARSE_PARAMETERS_END();
  2037. if (Z_TYPE_P(needle) == IS_STRING) {
  2038. found = zend_memrchr(ZSTR_VAL(haystack), *Z_STRVAL_P(needle), ZSTR_LEN(haystack));
  2039. } else {
  2040. char needle_chr;
  2041. if (php_needle_char(needle, &needle_chr) != SUCCESS) {
  2042. RETURN_FALSE;
  2043. }
  2044. php_error_docref(NULL, E_DEPRECATED,
  2045. "Non-string needles will be interpreted as strings in the future. " \
  2046. "Use an explicit chr() call to preserve the current behavior");
  2047. found = zend_memrchr(ZSTR_VAL(haystack), needle_chr, ZSTR_LEN(haystack));
  2048. }
  2049. if (found) {
  2050. found_offset = found - ZSTR_VAL(haystack);
  2051. RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
  2052. } else {
  2053. RETURN_FALSE;
  2054. }
  2055. }
  2056. /* }}} */
  2057. /* {{{ php_chunk_split
  2058. */
  2059. static zend_string *php_chunk_split(const char *src, size_t srclen, const char *end, size_t endlen, size_t chunklen)
  2060. {
  2061. char *q;
  2062. const char *p;
  2063. size_t chunks; /* complete chunks! */
  2064. size_t restlen;
  2065. size_t out_len;
  2066. zend_string *dest;
  2067. chunks = srclen / chunklen;
  2068. restlen = srclen - chunks * chunklen; /* srclen % chunklen */
  2069. if (chunks > INT_MAX - 1) {
  2070. return NULL;
  2071. }
  2072. out_len = chunks + 1;
  2073. if (endlen !=0 && out_len > INT_MAX/endlen) {
  2074. return NULL;
  2075. }
  2076. out_len *= endlen;
  2077. if (out_len > INT_MAX - srclen - 1) {
  2078. return NULL;
  2079. }
  2080. out_len += srclen + 1;
  2081. dest = zend_string_alloc(out_len * sizeof(char), 0);
  2082. for (p = src, q = ZSTR_VAL(dest); p < (src + srclen - chunklen + 1); ) {
  2083. memcpy(q, p, chunklen);
  2084. q += chunklen;
  2085. memcpy(q, end, endlen);
  2086. q += endlen;
  2087. p += chunklen;
  2088. }
  2089. if (restlen) {
  2090. memcpy(q, p, restlen);
  2091. q += restlen;
  2092. memcpy(q, end, endlen);
  2093. q += endlen;
  2094. }
  2095. *q = '\0';
  2096. ZSTR_LEN(dest) = q - ZSTR_VAL(dest);
  2097. return dest;
  2098. }
  2099. /* }}} */
  2100. /* {{{ proto string chunk_split(string str [, int chunklen [, string ending]])
  2101. Returns split line */
  2102. PHP_FUNCTION(chunk_split)
  2103. {
  2104. zend_string *str;
  2105. char *end = "\r\n";
  2106. size_t endlen = 2;
  2107. zend_long chunklen = 76;
  2108. zend_string *result;
  2109. ZEND_PARSE_PARAMETERS_START(1, 3)
  2110. Z_PARAM_STR(str)
  2111. Z_PARAM_OPTIONAL
  2112. Z_PARAM_LONG(chunklen)
  2113. Z_PARAM_STRING(end, endlen)
  2114. ZEND_PARSE_PARAMETERS_END();
  2115. if (chunklen <= 0) {
  2116. php_error_docref(NULL, E_WARNING, "Chunk length should be greater than zero");
  2117. RETURN_FALSE;
  2118. }
  2119. if ((size_t)chunklen > ZSTR_LEN(str)) {
  2120. /* to maintain BC, we must return original string + ending */
  2121. result = zend_string_safe_alloc(ZSTR_LEN(str), 1, endlen, 0);
  2122. memcpy(ZSTR_VAL(result), ZSTR_VAL(str), ZSTR_LEN(str));
  2123. memcpy(ZSTR_VAL(result) + ZSTR_LEN(str), end, endlen);
  2124. ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
  2125. RETURN_NEW_STR(result);
  2126. }
  2127. if (!ZSTR_LEN(str)) {
  2128. RETURN_EMPTY_STRING();
  2129. }
  2130. result = php_chunk_split(ZSTR_VAL(str), ZSTR_LEN(str), end, endlen, (size_t)chunklen);
  2131. if (result) {
  2132. RETURN_STR(result);
  2133. } else {
  2134. RETURN_FALSE;
  2135. }
  2136. }
  2137. /* }}} */
  2138. /* {{{ proto string substr(string str, int start [, int length])
  2139. Returns part of a string */
  2140. PHP_FUNCTION(substr)
  2141. {
  2142. zend_string *str;
  2143. zend_long l = 0, f;
  2144. int argc = ZEND_NUM_ARGS();
  2145. ZEND_PARSE_PARAMETERS_START(2, 3)
  2146. Z_PARAM_STR(str)
  2147. Z_PARAM_LONG(f)
  2148. Z_PARAM_OPTIONAL
  2149. Z_PARAM_LONG(l)
  2150. ZEND_PARSE_PARAMETERS_END();
  2151. if (argc > 2) {
  2152. if ((l < 0 && (size_t)(-l) > ZSTR_LEN(str))) {
  2153. RETURN_FALSE;
  2154. } else if (l > (zend_long)ZSTR_LEN(str)) {
  2155. l = ZSTR_LEN(str);
  2156. }
  2157. } else {
  2158. l = ZSTR_LEN(str);
  2159. }
  2160. if (f > (zend_long)ZSTR_LEN(str)) {
  2161. RETURN_FALSE;
  2162. } else if (f < 0 && (size_t)-f > ZSTR_LEN(str)) {
  2163. f = 0;
  2164. }
  2165. if (l < 0 && (l + (zend_long)ZSTR_LEN(str) - f) < 0) {
  2166. RETURN_FALSE;
  2167. }
  2168. /* if "from" position is negative, count start position from the end
  2169. * of the string
  2170. */
  2171. if (f < 0) {
  2172. f = (zend_long)ZSTR_LEN(str) + f;
  2173. if (f < 0) {
  2174. f = 0;
  2175. }
  2176. }
  2177. /* if "length" position is negative, set it to the length
  2178. * needed to stop that many chars from the end of the string
  2179. */
  2180. if (l < 0) {
  2181. l = ((zend_long)ZSTR_LEN(str) - f) + l;
  2182. if (l < 0) {
  2183. l = 0;
  2184. }
  2185. }
  2186. if (f > (zend_long)ZSTR_LEN(str)) {
  2187. RETURN_FALSE;
  2188. }
  2189. if ((size_t)l > ZSTR_LEN(str) - (size_t)f) {
  2190. l = ZSTR_LEN(str) - f;
  2191. }
  2192. if (l == 0) {
  2193. RETURN_EMPTY_STRING();
  2194. } else if (l == 1) {
  2195. RETURN_INTERNED_STR(ZSTR_CHAR((zend_uchar)(ZSTR_VAL(str)[f])));
  2196. } else if (l == ZSTR_LEN(str)) {
  2197. RETURN_STR_COPY(str);
  2198. }
  2199. RETURN_STRINGL(ZSTR_VAL(str) + f, l);
  2200. }
  2201. /* }}} */
  2202. /* {{{ proto mixed substr_replace(mixed str, mixed repl, mixed start [, mixed length])
  2203. Replaces part of a string with another string */
  2204. PHP_FUNCTION(substr_replace)
  2205. {
  2206. zval *str;
  2207. zval *from;
  2208. zval *len = NULL;
  2209. zval *repl;
  2210. zend_long l = 0;
  2211. zend_long f;
  2212. int argc = ZEND_NUM_ARGS();
  2213. zend_string *result;
  2214. HashPosition from_idx, repl_idx, len_idx;
  2215. zval *tmp_str = NULL, *tmp_from = NULL, *tmp_repl = NULL, *tmp_len= NULL;
  2216. ZEND_PARSE_PARAMETERS_START(3, 4)
  2217. Z_PARAM_ZVAL(str)
  2218. Z_PARAM_ZVAL(repl)
  2219. Z_PARAM_ZVAL(from)
  2220. Z_PARAM_OPTIONAL
  2221. Z_PARAM_ZVAL(len)
  2222. ZEND_PARSE_PARAMETERS_END();
  2223. if (Z_TYPE_P(str) != IS_ARRAY) {
  2224. convert_to_string_ex(str);
  2225. }
  2226. if (Z_TYPE_P(repl) != IS_ARRAY) {
  2227. convert_to_string_ex(repl);
  2228. }
  2229. if (Z_TYPE_P(from) != IS_ARRAY) {
  2230. convert_to_long_ex(from);
  2231. }
  2232. if (argc > 3) {
  2233. if (Z_TYPE_P(len) != IS_ARRAY) {
  2234. convert_to_long_ex(len);
  2235. l = Z_LVAL_P(len);
  2236. }
  2237. } else {
  2238. if (Z_TYPE_P(str) != IS_ARRAY) {
  2239. l = Z_STRLEN_P(str);
  2240. }
  2241. }
  2242. if (Z_TYPE_P(str) == IS_STRING) {
  2243. if (
  2244. (argc == 3 && Z_TYPE_P(from) == IS_ARRAY) ||
  2245. (argc == 4 && Z_TYPE_P(from) != Z_TYPE_P(len))
  2246. ) {
  2247. php_error_docref(NULL, E_WARNING, "'start' and 'length' should be of same type - numerical or array ");
  2248. RETURN_STR_COPY(Z_STR_P(str));
  2249. }
  2250. if (argc == 4 && Z_TYPE_P(from) == IS_ARRAY) {
  2251. if (zend_hash_num_elements(Z_ARRVAL_P(from)) != zend_hash_num_elements(Z_ARRVAL_P(len))) {
  2252. php_error_docref(NULL, E_WARNING, "'start' and 'length' should have the same number of elements");
  2253. RETURN_STR_COPY(Z_STR_P(str));
  2254. }
  2255. }
  2256. }
  2257. if (Z_TYPE_P(str) != IS_ARRAY) {
  2258. if (Z_TYPE_P(from) != IS_ARRAY) {
  2259. zend_string *repl_str;
  2260. zend_string *tmp_repl_str = NULL;
  2261. f = Z_LVAL_P(from);
  2262. /* if "from" position is negative, count start position from the end
  2263. * of the string
  2264. */
  2265. if (f < 0) {
  2266. f = (zend_long)Z_STRLEN_P(str) + f;
  2267. if (f < 0) {
  2268. f = 0;
  2269. }
  2270. } else if ((size_t)f > Z_STRLEN_P(str)) {
  2271. f = Z_STRLEN_P(str);
  2272. }
  2273. /* if "length" position is negative, set it to the length
  2274. * needed to stop that many chars from the end of the string
  2275. */
  2276. if (l < 0) {
  2277. l = ((zend_long)Z_STRLEN_P(str) - f) + l;
  2278. if (l < 0) {
  2279. l = 0;
  2280. }
  2281. }
  2282. if ((size_t)l > Z_STRLEN_P(str) || (l < 0 && (size_t)(-l) > Z_STRLEN_P(str))) {
  2283. l = Z_STRLEN_P(str);
  2284. }
  2285. if ((f + l) > (zend_long)Z_STRLEN_P(str)) {
  2286. l = Z_STRLEN_P(str) - f;
  2287. }
  2288. if (Z_TYPE_P(repl) == IS_ARRAY) {
  2289. repl_idx = 0;
  2290. while (repl_idx < Z_ARRVAL_P(repl)->nNumUsed) {
  2291. tmp_repl = &Z_ARRVAL_P(repl)->arData[repl_idx].val;
  2292. if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
  2293. break;
  2294. }
  2295. repl_idx++;
  2296. }
  2297. if (repl_idx < Z_ARRVAL_P(repl)->nNumUsed) {
  2298. repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
  2299. } else {
  2300. repl_str = STR_EMPTY_ALLOC();
  2301. }
  2302. } else {
  2303. repl_str = Z_STR_P(repl);
  2304. }
  2305. result = zend_string_safe_alloc(1, Z_STRLEN_P(str) - l + ZSTR_LEN(repl_str), 0, 0);
  2306. memcpy(ZSTR_VAL(result), Z_STRVAL_P(str), f);
  2307. if (ZSTR_LEN(repl_str)) {
  2308. memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
  2309. }
  2310. memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), Z_STRVAL_P(str) + f + l, Z_STRLEN_P(str) - f - l);
  2311. ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
  2312. zend_tmp_string_release(tmp_repl_str);
  2313. RETURN_NEW_STR(result);
  2314. } else {
  2315. php_error_docref(NULL, E_WARNING, "Functionality of 'start' and 'length' as arrays is not implemented");
  2316. RETURN_STR_COPY(Z_STR_P(str));
  2317. }
  2318. } else { /* str is array of strings */
  2319. zend_string *str_index = NULL;
  2320. size_t result_len;
  2321. zend_ulong num_index;
  2322. array_init(return_value);
  2323. from_idx = len_idx = repl_idx = 0;
  2324. ZEND_HASH_FOREACH_KEY_VAL_IND(Z_ARRVAL_P(str), num_index, str_index, tmp_str) {
  2325. zend_string *tmp_orig_str;
  2326. zend_string *orig_str = zval_get_tmp_string(tmp_str, &tmp_orig_str);
  2327. if (Z_TYPE_P(from) == IS_ARRAY) {
  2328. while (from_idx < Z_ARRVAL_P(from)->nNumUsed) {
  2329. tmp_from = &Z_ARRVAL_P(from)->arData[from_idx].val;
  2330. if (Z_TYPE_P(tmp_from) != IS_UNDEF) {
  2331. break;
  2332. }
  2333. from_idx++;
  2334. }
  2335. if (from_idx < Z_ARRVAL_P(from)->nNumUsed) {
  2336. f = zval_get_long(tmp_from);
  2337. if (f < 0) {
  2338. f = (zend_long)ZSTR_LEN(orig_str) + f;
  2339. if (f < 0) {
  2340. f = 0;
  2341. }
  2342. } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
  2343. f = ZSTR_LEN(orig_str);
  2344. }
  2345. from_idx++;
  2346. } else {
  2347. f = 0;
  2348. }
  2349. } else {
  2350. f = Z_LVAL_P(from);
  2351. if (f < 0) {
  2352. f = (zend_long)ZSTR_LEN(orig_str) + f;
  2353. if (f < 0) {
  2354. f = 0;
  2355. }
  2356. } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
  2357. f = ZSTR_LEN(orig_str);
  2358. }
  2359. }
  2360. if (argc > 3 && Z_TYPE_P(len) == IS_ARRAY) {
  2361. while (len_idx < Z_ARRVAL_P(len)->nNumUsed) {
  2362. tmp_len = &Z_ARRVAL_P(len)->arData[len_idx].val;
  2363. if (Z_TYPE_P(tmp_len) != IS_UNDEF) {
  2364. break;
  2365. }
  2366. len_idx++;
  2367. }
  2368. if (len_idx < Z_ARRVAL_P(len)->nNumUsed) {
  2369. l = zval_get_long(tmp_len);
  2370. len_idx++;
  2371. } else {
  2372. l = ZSTR_LEN(orig_str);
  2373. }
  2374. } else if (argc > 3) {
  2375. l = Z_LVAL_P(len);
  2376. } else {
  2377. l = ZSTR_LEN(orig_str);
  2378. }
  2379. if (l < 0) {
  2380. l = (ZSTR_LEN(orig_str) - f) + l;
  2381. if (l < 0) {
  2382. l = 0;
  2383. }
  2384. }
  2385. if ((f + l) > (zend_long)ZSTR_LEN(orig_str)) {
  2386. l = ZSTR_LEN(orig_str) - f;
  2387. }
  2388. result_len = ZSTR_LEN(orig_str) - l;
  2389. if (Z_TYPE_P(repl) == IS_ARRAY) {
  2390. while (repl_idx < Z_ARRVAL_P(repl)->nNumUsed) {
  2391. tmp_repl = &Z_ARRVAL_P(repl)->arData[repl_idx].val;
  2392. if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
  2393. break;
  2394. }
  2395. repl_idx++;
  2396. }
  2397. if (repl_idx < Z_ARRVAL_P(repl)->nNumUsed) {
  2398. zend_string *tmp_repl_str;
  2399. zend_string *repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
  2400. result_len += ZSTR_LEN(repl_str);
  2401. repl_idx++;
  2402. result = zend_string_safe_alloc(1, result_len, 0, 0);
  2403. memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
  2404. memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
  2405. memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
  2406. zend_tmp_string_release(tmp_repl_str);
  2407. } else {
  2408. result = zend_string_safe_alloc(1, result_len, 0, 0);
  2409. memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
  2410. memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
  2411. }
  2412. } else {
  2413. result_len += Z_STRLEN_P(repl);
  2414. result = zend_string_safe_alloc(1, result_len, 0, 0);
  2415. memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
  2416. memcpy((ZSTR_VAL(result) + f), Z_STRVAL_P(repl), Z_STRLEN_P(repl));
  2417. memcpy((ZSTR_VAL(result) + f + Z_STRLEN_P(repl)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
  2418. }
  2419. ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
  2420. if (str_index) {
  2421. zval tmp;
  2422. ZVAL_NEW_STR(&tmp, result);
  2423. zend_symtable_update(Z_ARRVAL_P(return_value), str_index, &tmp);
  2424. } else {
  2425. add_index_str(return_value, num_index, result);
  2426. }
  2427. zend_tmp_string_release(tmp_orig_str);
  2428. } ZEND_HASH_FOREACH_END();
  2429. } /* if */
  2430. }
  2431. /* }}} */
  2432. /* {{{ proto string quotemeta(string str)
  2433. Quotes meta characters */
  2434. PHP_FUNCTION(quotemeta)
  2435. {
  2436. zend_string *old;
  2437. const char *old_end, *p;
  2438. char *q;
  2439. char c;
  2440. zend_string *str;
  2441. ZEND_PARSE_PARAMETERS_START(1, 1)
  2442. Z_PARAM_STR(old)
  2443. ZEND_PARSE_PARAMETERS_END();
  2444. old_end = ZSTR_VAL(old) + ZSTR_LEN(old);
  2445. if (ZSTR_VAL(old) == old_end) {
  2446. RETURN_FALSE;
  2447. }
  2448. str = zend_string_safe_alloc(2, ZSTR_LEN(old), 0, 0);
  2449. for (p = ZSTR_VAL(old), q = ZSTR_VAL(str); p != old_end; p++) {
  2450. c = *p;
  2451. switch (c) {
  2452. case '.':
  2453. case '\\':
  2454. case '+':
  2455. case '*':
  2456. case '?':
  2457. case '[':
  2458. case '^':
  2459. case ']':
  2460. case '$':
  2461. case '(':
  2462. case ')':
  2463. *q++ = '\\';
  2464. /* break is missing _intentionally_ */
  2465. default:
  2466. *q++ = c;
  2467. }
  2468. }
  2469. *q = '\0';
  2470. RETURN_NEW_STR(zend_string_truncate(str, q - ZSTR_VAL(str), 0));
  2471. }
  2472. /* }}} */
  2473. /* {{{ proto int ord(string character)
  2474. Returns ASCII value of character
  2475. Warning: This function is special-cased by zend_compile.c and so is bypassed for constant string argument */
  2476. PHP_FUNCTION(ord)
  2477. {
  2478. zend_string *str;
  2479. ZEND_PARSE_PARAMETERS_START(1, 1)
  2480. Z_PARAM_STR(str)
  2481. ZEND_PARSE_PARAMETERS_END();
  2482. RETURN_LONG((unsigned char) ZSTR_VAL(str)[0]);
  2483. }
  2484. /* }}} */
  2485. /* {{{ proto string chr(int ascii)
  2486. Converts ASCII code to a character
  2487. Warning: This function is special-cased by zend_compile.c and so is bypassed for constant integer argument */
  2488. PHP_FUNCTION(chr)
  2489. {
  2490. zend_long c;
  2491. if (ZEND_NUM_ARGS() != 1) {
  2492. WRONG_PARAM_COUNT;
  2493. }
  2494. ZEND_PARSE_PARAMETERS_START_EX(ZEND_PARSE_PARAMS_QUIET, 1, 1)
  2495. Z_PARAM_LONG(c)
  2496. ZEND_PARSE_PARAMETERS_END_EX(c = 0);
  2497. c &= 0xff;
  2498. ZVAL_INTERNED_STR(return_value, ZSTR_CHAR(c));
  2499. }
  2500. /* }}} */
  2501. /* {{{ php_ucfirst
  2502. Uppercase the first character of the word in a native string */
  2503. static zend_string* php_ucfirst(zend_string *str)
  2504. {
  2505. const unsigned char ch = ZSTR_VAL(str)[0];
  2506. unsigned char r = toupper(ch);
  2507. if (r == ch) {
  2508. return zend_string_copy(str);
  2509. } else {
  2510. zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
  2511. ZSTR_VAL(s)[0] = r;
  2512. return s;
  2513. }
  2514. }
  2515. /* }}} */
  2516. /* {{{ proto string ucfirst(string str)
  2517. Makes a string's first character uppercase */
  2518. PHP_FUNCTION(ucfirst)
  2519. {
  2520. zend_string *str;
  2521. ZEND_PARSE_PARAMETERS_START(1, 1)
  2522. Z_PARAM_STR(str)
  2523. ZEND_PARSE_PARAMETERS_END();
  2524. if (!ZSTR_LEN(str)) {
  2525. RETURN_EMPTY_STRING();
  2526. }
  2527. RETURN_STR(php_ucfirst(str));
  2528. }
  2529. /* }}} */
  2530. /* {{{
  2531. Lowercase the first character of the word in a native string */
  2532. static zend_string* php_lcfirst(zend_string *str)
  2533. {
  2534. unsigned char r = tolower(ZSTR_VAL(str)[0]);
  2535. if (r == ZSTR_VAL(str)[0]) {
  2536. return zend_string_copy(str);
  2537. } else {
  2538. zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
  2539. ZSTR_VAL(s)[0] = r;
  2540. return s;
  2541. }
  2542. }
  2543. /* }}} */
  2544. /* {{{ proto string lcfirst(string str)
  2545. Make a string's first character lowercase */
  2546. PHP_FUNCTION(lcfirst)
  2547. {
  2548. zend_string *str;
  2549. ZEND_PARSE_PARAMETERS_START(1, 1)
  2550. Z_PARAM_STR(str)
  2551. ZEND_PARSE_PARAMETERS_END();
  2552. if (!ZSTR_LEN(str)) {
  2553. RETURN_EMPTY_STRING();
  2554. }
  2555. RETURN_STR(php_lcfirst(str));
  2556. }
  2557. /* }}} */
  2558. /* {{{ proto string ucwords(string str [, string delims])
  2559. Uppercase the first character of every word in a string */
  2560. PHP_FUNCTION(ucwords)
  2561. {
  2562. zend_string *str;
  2563. char *delims = " \t\r\n\f\v";
  2564. register char *r;
  2565. register const char *r_end;
  2566. size_t delims_len = 6;
  2567. char mask[256];
  2568. ZEND_PARSE_PARAMETERS_START(1, 2)
  2569. Z_PARAM_STR(str)
  2570. Z_PARAM_OPTIONAL
  2571. Z_PARAM_STRING(delims, delims_len)
  2572. ZEND_PARSE_PARAMETERS_END();
  2573. if (!ZSTR_LEN(str)) {
  2574. RETURN_EMPTY_STRING();
  2575. }
  2576. php_charmask((unsigned char *)delims, delims_len, mask);
  2577. ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
  2578. r = Z_STRVAL_P(return_value);
  2579. *r = toupper((unsigned char) *r);
  2580. for (r_end = r + Z_STRLEN_P(return_value) - 1; r < r_end; ) {
  2581. if (mask[(unsigned char)*r++]) {
  2582. *r = toupper((unsigned char) *r);
  2583. }
  2584. }
  2585. }
  2586. /* }}} */
  2587. /* {{{ php_strtr
  2588. */
  2589. PHPAPI char *php_strtr(char *str, size_t len, const char *str_from, const char *str_to, size_t trlen)
  2590. {
  2591. size_t i;
  2592. if (UNEXPECTED(trlen < 1)) {
  2593. return str;
  2594. } else if (trlen == 1) {
  2595. char ch_from = *str_from;
  2596. char ch_to = *str_to;
  2597. for (i = 0; i < len; i++) {
  2598. if (str[i] == ch_from) {
  2599. str[i] = ch_to;
  2600. }
  2601. }
  2602. } else {
  2603. unsigned char xlat[256], j = 0;
  2604. do { xlat[j] = j; } while (++j != 0);
  2605. for (i = 0; i < trlen; i++) {
  2606. xlat[(size_t)(unsigned char) str_from[i]] = str_to[i];
  2607. }
  2608. for (i = 0; i < len; i++) {
  2609. str[i] = xlat[(size_t)(unsigned char) str[i]];
  2610. }
  2611. }
  2612. return str;
  2613. }
  2614. /* }}} */
  2615. /* {{{ php_strtr_ex
  2616. */
  2617. static zend_string *php_strtr_ex(zend_string *str, const char *str_from, const char *str_to, size_t trlen)
  2618. {
  2619. zend_string *new_str = NULL;
  2620. size_t i;
  2621. if (UNEXPECTED(trlen < 1)) {
  2622. return zend_string_copy(str);
  2623. } else if (trlen == 1) {
  2624. char ch_from = *str_from;
  2625. char ch_to = *str_to;
  2626. for (i = 0; i < ZSTR_LEN(str); i++) {
  2627. if (ZSTR_VAL(str)[i] == ch_from) {
  2628. new_str = zend_string_alloc(ZSTR_LEN(str), 0);
  2629. memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), i);
  2630. ZSTR_VAL(new_str)[i] = ch_to;
  2631. break;
  2632. }
  2633. }
  2634. for (; i < ZSTR_LEN(str); i++) {
  2635. ZSTR_VAL(new_str)[i] = (ZSTR_VAL(str)[i] != ch_from) ? ZSTR_VAL(str)[i] : ch_to;
  2636. }
  2637. } else {
  2638. unsigned char xlat[256], j = 0;
  2639. do { xlat[j] = j; } while (++j != 0);
  2640. for (i = 0; i < trlen; i++) {
  2641. xlat[(size_t)(unsigned char) str_from[i]] = str_to[i];
  2642. }
  2643. for (i = 0; i < ZSTR_LEN(str); i++) {
  2644. if (ZSTR_VAL(str)[i] != xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]]) {
  2645. new_str = zend_string_alloc(ZSTR_LEN(str), 0);
  2646. memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), i);
  2647. ZSTR_VAL(new_str)[i] = xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]];
  2648. break;
  2649. }
  2650. }
  2651. for (;i < ZSTR_LEN(str); i++) {
  2652. ZSTR_VAL(new_str)[i] = xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]];
  2653. }
  2654. }
  2655. if (!new_str) {
  2656. return zend_string_copy(str);
  2657. }
  2658. ZSTR_VAL(new_str)[ZSTR_LEN(new_str)] = 0;
  2659. return new_str;
  2660. }
  2661. /* }}} */
  2662. /* {{{ php_strtr_array */
  2663. static void php_strtr_array(zval *return_value, zend_string *input, HashTable *pats)
  2664. {
  2665. const char *str = ZSTR_VAL(input);
  2666. size_t slen = ZSTR_LEN(input);
  2667. zend_ulong num_key;
  2668. zend_string *str_key;
  2669. size_t len, pos, old_pos;
  2670. int num_keys = 0;
  2671. size_t minlen = 128*1024;
  2672. size_t maxlen = 0;
  2673. HashTable str_hash;
  2674. zval *entry;
  2675. const char *key;
  2676. smart_str result = {0};
  2677. zend_ulong bitset[256/sizeof(zend_ulong)];
  2678. zend_ulong *num_bitset;
  2679. /* we will collect all possible key lengths */
  2680. num_bitset = ecalloc((slen + sizeof(zend_ulong)) / sizeof(zend_ulong), sizeof(zend_ulong));
  2681. memset(bitset, 0, sizeof(bitset));
  2682. /* check if original array has numeric keys */
  2683. ZEND_HASH_FOREACH_STR_KEY(pats, str_key) {
  2684. if (UNEXPECTED(!str_key)) {
  2685. num_keys = 1;
  2686. } else {
  2687. len = ZSTR_LEN(str_key);
  2688. if (UNEXPECTED(len < 1)) {
  2689. efree(num_bitset);
  2690. RETURN_FALSE;
  2691. } else if (UNEXPECTED(len > slen)) {
  2692. /* skip long patterns */
  2693. continue;
  2694. }
  2695. if (len > maxlen) {
  2696. maxlen = len;
  2697. }
  2698. if (len < minlen) {
  2699. minlen = len;
  2700. }
  2701. /* remember possible key length */
  2702. num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
  2703. bitset[((unsigned char)ZSTR_VAL(str_key)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(str_key)[0]) % sizeof(zend_ulong));
  2704. }
  2705. } ZEND_HASH_FOREACH_END();
  2706. if (UNEXPECTED(num_keys)) {
  2707. zend_string *key_used;
  2708. /* we have to rebuild HashTable with numeric keys */
  2709. zend_hash_init(&str_hash, zend_hash_num_elements(pats), NULL, NULL, 0);
  2710. ZEND_HASH_FOREACH_KEY_VAL_IND(pats, num_key, str_key, entry) {
  2711. if (UNEXPECTED(!str_key)) {
  2712. key_used = zend_long_to_str(num_key);
  2713. len = ZSTR_LEN(key_used);
  2714. if (UNEXPECTED(len > slen)) {
  2715. /* skip long patterns */
  2716. zend_string_release(key_used);
  2717. continue;
  2718. }
  2719. if (len > maxlen) {
  2720. maxlen = len;
  2721. }
  2722. if (len < minlen) {
  2723. minlen = len;
  2724. }
  2725. /* remember possible key length */
  2726. num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
  2727. bitset[((unsigned char)ZSTR_VAL(key_used)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(key_used)[0]) % sizeof(zend_ulong));
  2728. } else {
  2729. key_used = str_key;
  2730. len = ZSTR_LEN(key_used);
  2731. if (UNEXPECTED(len > slen)) {
  2732. /* skip long patterns */
  2733. continue;
  2734. }
  2735. }
  2736. zend_hash_add(&str_hash, key_used, entry);
  2737. if (UNEXPECTED(!str_key)) {
  2738. zend_string_release_ex(key_used, 0);
  2739. }
  2740. } ZEND_HASH_FOREACH_END();
  2741. pats = &str_hash;
  2742. }
  2743. if (UNEXPECTED(minlen > maxlen)) {
  2744. /* return the original string */
  2745. if (pats == &str_hash) {
  2746. zend_hash_destroy(&str_hash);
  2747. }
  2748. efree(num_bitset);
  2749. RETURN_STR_COPY(input);
  2750. }
  2751. old_pos = pos = 0;
  2752. while (pos <= slen - minlen) {
  2753. key = str + pos;
  2754. if (bitset[((unsigned char)key[0]) / sizeof(zend_ulong)] & (Z_UL(1) << (((unsigned char)key[0]) % sizeof(zend_ulong)))) {
  2755. len = maxlen;
  2756. if (len > slen - pos) {
  2757. len = slen - pos;
  2758. }
  2759. while (len >= minlen) {
  2760. if ((num_bitset[len / sizeof(zend_ulong)] & (Z_UL(1) << (len % sizeof(zend_ulong))))) {
  2761. entry = zend_hash_str_find(pats, key, len);
  2762. if (entry != NULL) {
  2763. zend_string *tmp;
  2764. zend_string *s = zval_get_tmp_string(entry, &tmp);
  2765. smart_str_appendl(&result, str + old_pos, pos - old_pos);
  2766. smart_str_append(&result, s);
  2767. old_pos = pos + len;
  2768. pos = old_pos - 1;
  2769. zend_tmp_string_release(tmp);
  2770. break;
  2771. }
  2772. }
  2773. len--;
  2774. }
  2775. }
  2776. pos++;
  2777. }
  2778. if (result.s) {
  2779. smart_str_appendl(&result, str + old_pos, slen - old_pos);
  2780. smart_str_0(&result);
  2781. RETVAL_NEW_STR(result.s);
  2782. } else {
  2783. smart_str_free(&result);
  2784. RETVAL_STR_COPY(input);
  2785. }
  2786. if (pats == &str_hash) {
  2787. zend_hash_destroy(&str_hash);
  2788. }
  2789. efree(num_bitset);
  2790. }
  2791. /* }}} */
  2792. /* {{{ php_char_to_str_ex
  2793. */
  2794. static zend_string* php_char_to_str_ex(zend_string *str, char from, char *to, size_t to_len, int case_sensitivity, zend_long *replace_count)
  2795. {
  2796. zend_string *result;
  2797. size_t char_count = 0;
  2798. int lc_from = 0;
  2799. const char *source, *source_end= ZSTR_VAL(str) + ZSTR_LEN(str);
  2800. char *target;
  2801. if (case_sensitivity) {
  2802. char *p = ZSTR_VAL(str), *e = p + ZSTR_LEN(str);
  2803. while ((p = memchr(p, from, (e - p)))) {
  2804. char_count++;
  2805. p++;
  2806. }
  2807. } else {
  2808. lc_from = tolower(from);
  2809. for (source = ZSTR_VAL(str); source < source_end; source++) {
  2810. if (tolower(*source) == lc_from) {
  2811. char_count++;
  2812. }
  2813. }
  2814. }
  2815. if (char_count == 0) {
  2816. return zend_string_copy(str);
  2817. }
  2818. if (to_len > 0) {
  2819. result = zend_string_safe_alloc(char_count, to_len - 1, ZSTR_LEN(str), 0);
  2820. } else {
  2821. result = zend_string_alloc(ZSTR_LEN(str) - char_count, 0);
  2822. }
  2823. target = ZSTR_VAL(result);
  2824. if (case_sensitivity) {
  2825. char *p = ZSTR_VAL(str), *e = p + ZSTR_LEN(str), *s = ZSTR_VAL(str);
  2826. while ((p = memchr(p, from, (e - p)))) {
  2827. memcpy(target, s, (p - s));
  2828. target += p - s;
  2829. memcpy(target, to, to_len);
  2830. target += to_len;
  2831. p++;
  2832. s = p;
  2833. if (replace_count) {
  2834. *replace_count += 1;
  2835. }
  2836. }
  2837. if (s < e) {
  2838. memcpy(target, s, (e - s));
  2839. target += e - s;
  2840. }
  2841. } else {
  2842. for (source = ZSTR_VAL(str); source < source_end; source++) {
  2843. if (tolower(*source) == lc_from) {
  2844. if (replace_count) {
  2845. *replace_count += 1;
  2846. }
  2847. memcpy(target, to, to_len);
  2848. target += to_len;
  2849. } else {
  2850. *target = *source;
  2851. target++;
  2852. }
  2853. }
  2854. }
  2855. *target = 0;
  2856. return result;
  2857. }
  2858. /* }}} */
  2859. /* {{{ php_str_to_str_ex
  2860. */
  2861. static zend_string *php_str_to_str_ex(zend_string *haystack,
  2862. const char *needle, size_t needle_len, const char *str, size_t str_len, zend_long *replace_count)
  2863. {
  2864. zend_string *new_str;
  2865. if (needle_len < ZSTR_LEN(haystack)) {
  2866. const char *end;
  2867. const char *p, *r;
  2868. char *e;
  2869. if (needle_len == str_len) {
  2870. new_str = NULL;
  2871. end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
  2872. for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
  2873. if (!new_str) {
  2874. new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
  2875. }
  2876. memcpy(ZSTR_VAL(new_str) + (r - ZSTR_VAL(haystack)), str, str_len);
  2877. (*replace_count)++;
  2878. }
  2879. if (!new_str) {
  2880. goto nothing_todo;
  2881. }
  2882. return new_str;
  2883. } else {
  2884. size_t count = 0;
  2885. const char *o = ZSTR_VAL(haystack);
  2886. const char *n = needle;
  2887. const char *endp = o + ZSTR_LEN(haystack);
  2888. while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
  2889. o += needle_len;
  2890. count++;
  2891. }
  2892. if (count == 0) {
  2893. /* Needle doesn't occur, shortcircuit the actual replacement. */
  2894. goto nothing_todo;
  2895. }
  2896. if (str_len > needle_len) {
  2897. new_str = zend_string_safe_alloc(count, str_len - needle_len, ZSTR_LEN(haystack), 0);
  2898. } else {
  2899. new_str = zend_string_alloc(count * (str_len - needle_len) + ZSTR_LEN(haystack), 0);
  2900. }
  2901. e = ZSTR_VAL(new_str);
  2902. end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
  2903. for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
  2904. memcpy(e, p, r - p);
  2905. e += r - p;
  2906. memcpy(e, str, str_len);
  2907. e += str_len;
  2908. (*replace_count)++;
  2909. }
  2910. if (p < end) {
  2911. memcpy(e, p, end - p);
  2912. e += end - p;
  2913. }
  2914. *e = '\0';
  2915. return new_str;
  2916. }
  2917. } else if (needle_len > ZSTR_LEN(haystack) || memcmp(ZSTR_VAL(haystack), needle, ZSTR_LEN(haystack))) {
  2918. nothing_todo:
  2919. return zend_string_copy(haystack);
  2920. } else {
  2921. if (str_len == 0) {
  2922. new_str = ZSTR_EMPTY_ALLOC();
  2923. } else if (str_len == 1) {
  2924. new_str = ZSTR_CHAR((zend_uchar)(*str));
  2925. } else {
  2926. new_str = zend_string_init(str, str_len, 0);
  2927. }
  2928. (*replace_count)++;
  2929. return new_str;
  2930. }
  2931. }
  2932. /* }}} */
  2933. /* {{{ php_str_to_str_i_ex
  2934. */
  2935. static zend_string *php_str_to_str_i_ex(zend_string *haystack, const char *lc_haystack,
  2936. zend_string *needle, const char *str, size_t str_len, zend_long *replace_count)
  2937. {
  2938. zend_string *new_str = NULL;
  2939. zend_string *lc_needle;
  2940. if (ZSTR_LEN(needle) < ZSTR_LEN(haystack)) {
  2941. const char *end;
  2942. const char *p, *r;
  2943. char *e;
  2944. if (ZSTR_LEN(needle) == str_len) {
  2945. lc_needle = php_string_tolower(needle);
  2946. end = lc_haystack + ZSTR_LEN(haystack);
  2947. for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
  2948. if (!new_str) {
  2949. new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
  2950. }
  2951. memcpy(ZSTR_VAL(new_str) + (r - lc_haystack), str, str_len);
  2952. (*replace_count)++;
  2953. }
  2954. zend_string_release_ex(lc_needle, 0);
  2955. if (!new_str) {
  2956. goto nothing_todo;
  2957. }
  2958. return new_str;
  2959. } else {
  2960. size_t count = 0;
  2961. const char *o = lc_haystack;
  2962. const char *n;
  2963. const char *endp = o + ZSTR_LEN(haystack);
  2964. lc_needle = php_string_tolower(needle);
  2965. n = ZSTR_VAL(lc_needle);
  2966. while ((o = (char*)php_memnstr(o, n, ZSTR_LEN(lc_needle), endp))) {
  2967. o += ZSTR_LEN(lc_needle);
  2968. count++;
  2969. }
  2970. if (count == 0) {
  2971. /* Needle doesn't occur, shortcircuit the actual replacement. */
  2972. zend_string_release_ex(lc_needle, 0);
  2973. goto nothing_todo;
  2974. }
  2975. if (str_len > ZSTR_LEN(lc_needle)) {
  2976. new_str = zend_string_safe_alloc(count, str_len - ZSTR_LEN(lc_needle), ZSTR_LEN(haystack), 0);
  2977. } else {
  2978. new_str = zend_string_alloc(count * (str_len - ZSTR_LEN(lc_needle)) + ZSTR_LEN(haystack), 0);
  2979. }
  2980. e = ZSTR_VAL(new_str);
  2981. end = lc_haystack + ZSTR_LEN(haystack);
  2982. for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
  2983. memcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), r - p);
  2984. e += r - p;
  2985. memcpy(e, str, str_len);
  2986. e += str_len;
  2987. (*replace_count)++;
  2988. }
  2989. if (p < end) {
  2990. memcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), end - p);
  2991. e += end - p;
  2992. }
  2993. *e = '\0';
  2994. zend_string_release_ex(lc_needle, 0);
  2995. return new_str;
  2996. }
  2997. } else if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
  2998. nothing_todo:
  2999. return zend_string_copy(haystack);
  3000. } else {
  3001. lc_needle = php_string_tolower(needle);
  3002. if (memcmp(lc_haystack, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle))) {
  3003. zend_string_release_ex(lc_needle, 0);
  3004. goto nothing_todo;
  3005. }
  3006. zend_string_release_ex(lc_needle, 0);
  3007. new_str = zend_string_init(str, str_len, 0);
  3008. (*replace_count)++;
  3009. return new_str;
  3010. }
  3011. }
  3012. /* }}} */
  3013. /* {{{ php_str_to_str
  3014. */
  3015. PHPAPI zend_string *php_str_to_str(const char *haystack, size_t length, const char *needle, size_t needle_len, const char *str, size_t str_len)
  3016. {
  3017. zend_string *new_str;
  3018. if (needle_len < length) {
  3019. const char *end;
  3020. const char *s, *p;
  3021. char *e, *r;
  3022. if (needle_len == str_len) {
  3023. new_str = zend_string_init(haystack, length, 0);
  3024. end = ZSTR_VAL(new_str) + length;
  3025. for (p = ZSTR_VAL(new_str); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
  3026. memcpy(r, str, str_len);
  3027. }
  3028. return new_str;
  3029. } else {
  3030. if (str_len < needle_len) {
  3031. new_str = zend_string_alloc(length, 0);
  3032. } else {
  3033. size_t count = 0;
  3034. const char *o = haystack;
  3035. const char *n = needle;
  3036. const char *endp = o + length;
  3037. while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
  3038. o += needle_len;
  3039. count++;
  3040. }
  3041. if (count == 0) {
  3042. /* Needle doesn't occur, shortcircuit the actual replacement. */
  3043. new_str = zend_string_init(haystack, length, 0);
  3044. return new_str;
  3045. } else {
  3046. if (str_len > needle_len) {
  3047. new_str = zend_string_safe_alloc(count, str_len - needle_len, length, 0);
  3048. } else {
  3049. new_str = zend_string_alloc(count * (str_len - needle_len) + length, 0);
  3050. }
  3051. }
  3052. }
  3053. s = e = ZSTR_VAL(new_str);
  3054. end = haystack + length;
  3055. for (p = haystack; (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
  3056. memcpy(e, p, r - p);
  3057. e += r - p;
  3058. memcpy(e, str, str_len);
  3059. e += str_len;
  3060. }
  3061. if (p < end) {
  3062. memcpy(e, p, end - p);
  3063. e += end - p;
  3064. }
  3065. *e = '\0';
  3066. new_str = zend_string_truncate(new_str, e - s, 0);
  3067. return new_str;
  3068. }
  3069. } else if (needle_len > length || memcmp(haystack, needle, length)) {
  3070. new_str = zend_string_init(haystack, length, 0);
  3071. return new_str;
  3072. } else {
  3073. new_str = zend_string_init(str, str_len, 0);
  3074. return new_str;
  3075. }
  3076. }
  3077. /* }}} */
  3078. /* {{{ proto string strtr(string str, string from[, string to])
  3079. Translates characters in str using given translation tables */
  3080. PHP_FUNCTION(strtr)
  3081. {
  3082. zval *from;
  3083. zend_string *str;
  3084. char *to = NULL;
  3085. size_t to_len = 0;
  3086. int ac = ZEND_NUM_ARGS();
  3087. ZEND_PARSE_PARAMETERS_START(2, 3)
  3088. Z_PARAM_STR(str)
  3089. Z_PARAM_ZVAL(from)
  3090. Z_PARAM_OPTIONAL
  3091. Z_PARAM_STRING(to, to_len)
  3092. ZEND_PARSE_PARAMETERS_END();
  3093. if (ac == 2 && Z_TYPE_P(from) != IS_ARRAY) {
  3094. php_error_docref(NULL, E_WARNING, "The second argument is not an array");
  3095. RETURN_FALSE;
  3096. }
  3097. /* shortcut for empty string */
  3098. if (ZSTR_LEN(str) == 0) {
  3099. RETURN_EMPTY_STRING();
  3100. }
  3101. if (ac == 2) {
  3102. HashTable *pats = Z_ARRVAL_P(from);
  3103. if (zend_hash_num_elements(pats) < 1) {
  3104. RETURN_STR_COPY(str);
  3105. } else if (zend_hash_num_elements(pats) == 1) {
  3106. zend_long num_key;
  3107. zend_string *str_key, *tmp_str, *replace, *tmp_replace;
  3108. zval *entry;
  3109. ZEND_HASH_FOREACH_KEY_VAL_IND(pats, num_key, str_key, entry) {
  3110. tmp_str = NULL;
  3111. if (UNEXPECTED(!str_key)) {
  3112. str_key = tmp_str = zend_long_to_str(num_key);
  3113. }
  3114. replace = zval_get_tmp_string(entry, &tmp_replace);
  3115. if (ZSTR_LEN(str_key) < 1) {
  3116. RETVAL_STR_COPY(str);
  3117. } else if (ZSTR_LEN(str_key) == 1) {
  3118. RETVAL_STR(php_char_to_str_ex(str,
  3119. ZSTR_VAL(str_key)[0],
  3120. ZSTR_VAL(replace),
  3121. ZSTR_LEN(replace),
  3122. 1,
  3123. NULL));
  3124. } else {
  3125. zend_long dummy;
  3126. RETVAL_STR(php_str_to_str_ex(str,
  3127. ZSTR_VAL(str_key), ZSTR_LEN(str_key),
  3128. ZSTR_VAL(replace), ZSTR_LEN(replace), &dummy));
  3129. }
  3130. zend_tmp_string_release(tmp_str);
  3131. zend_tmp_string_release(tmp_replace);
  3132. return;
  3133. } ZEND_HASH_FOREACH_END();
  3134. } else {
  3135. php_strtr_array(return_value, str, pats);
  3136. }
  3137. } else {
  3138. convert_to_string_ex(from);
  3139. RETURN_STR(php_strtr_ex(str,
  3140. Z_STRVAL_P(from),
  3141. to,
  3142. MIN(Z_STRLEN_P(from), to_len)));
  3143. }
  3144. }
  3145. /* }}} */
  3146. /* {{{ proto string strrev(string str)
  3147. Reverse a string */
  3148. #if ZEND_INTRIN_SSSE3_NATIVE
  3149. #include <tmmintrin.h>
  3150. #endif
  3151. PHP_FUNCTION(strrev)
  3152. {
  3153. zend_string *str;
  3154. const char *s, *e;
  3155. char *p;
  3156. zend_string *n;
  3157. ZEND_PARSE_PARAMETERS_START(1, 1)
  3158. Z_PARAM_STR(str)
  3159. ZEND_PARSE_PARAMETERS_END();
  3160. n = zend_string_alloc(ZSTR_LEN(str), 0);
  3161. p = ZSTR_VAL(n);
  3162. s = ZSTR_VAL(str);
  3163. e = s + ZSTR_LEN(str);
  3164. --e;
  3165. #if ZEND_INTRIN_SSSE3_NATIVE
  3166. if (e - s > 15) {
  3167. const __m128i map = _mm_set_epi8(
  3168. 0, 1, 2, 3,
  3169. 4, 5, 6, 7,
  3170. 8, 9, 10, 11,
  3171. 12, 13, 14, 15);
  3172. do {
  3173. const __m128i str = _mm_loadu_si128((__m128i *)(e - 15));
  3174. _mm_storeu_si128((__m128i *)p, _mm_shuffle_epi8(str, map));
  3175. p += 16;
  3176. e -= 16;
  3177. } while (e - s > 15);
  3178. }
  3179. #endif
  3180. while (e >= s) {
  3181. *p++ = *e--;
  3182. }
  3183. *p = '\0';
  3184. RETVAL_NEW_STR(n);
  3185. }
  3186. /* }}} */
  3187. /* {{{ php_similar_str
  3188. */
  3189. static void php_similar_str(const char *txt1, size_t len1, const char *txt2, size_t len2, size_t *pos1, size_t *pos2, size_t *max, size_t *count)
  3190. {
  3191. const char *p, *q;
  3192. const char *end1 = (char *) txt1 + len1;
  3193. const char *end2 = (char *) txt2 + len2;
  3194. size_t l;
  3195. *max = 0;
  3196. *count = 0;
  3197. for (p = (char *) txt1; p < end1; p++) {
  3198. for (q = (char *) txt2; q < end2; q++) {
  3199. for (l = 0; (p + l < end1) && (q + l < end2) && (p[l] == q[l]); l++);
  3200. if (l > *max) {
  3201. *max = l;
  3202. *count += 1;
  3203. *pos1 = p - txt1;
  3204. *pos2 = q - txt2;
  3205. }
  3206. }
  3207. }
  3208. }
  3209. /* }}} */
  3210. /* {{{ php_similar_char
  3211. */
  3212. static size_t php_similar_char(const char *txt1, size_t len1, const char *txt2, size_t len2)
  3213. {
  3214. size_t sum;
  3215. size_t pos1 = 0, pos2 = 0, max, count;
  3216. php_similar_str(txt1, len1, txt2, len2, &pos1, &pos2, &max, &count);
  3217. if ((sum = max)) {
  3218. if (pos1 && pos2 && count > 1) {
  3219. sum += php_similar_char(txt1, pos1,
  3220. txt2, pos2);
  3221. }
  3222. if ((pos1 + max < len1) && (pos2 + max < len2)) {
  3223. sum += php_similar_char(txt1 + pos1 + max, len1 - pos1 - max,
  3224. txt2 + pos2 + max, len2 - pos2 - max);
  3225. }
  3226. }
  3227. return sum;
  3228. }
  3229. /* }}} */
  3230. /* {{{ proto int similar_text(string str1, string str2 [, float percent])
  3231. Calculates the similarity between two strings */
  3232. PHP_FUNCTION(similar_text)
  3233. {
  3234. zend_string *t1, *t2;
  3235. zval *percent = NULL;
  3236. int ac = ZEND_NUM_ARGS();
  3237. size_t sim;
  3238. ZEND_PARSE_PARAMETERS_START(2, 3)
  3239. Z_PARAM_STR(t1)
  3240. Z_PARAM_STR(t2)
  3241. Z_PARAM_OPTIONAL
  3242. Z_PARAM_ZVAL_DEREF(percent)
  3243. ZEND_PARSE_PARAMETERS_END();
  3244. if (ac > 2) {
  3245. convert_to_double_ex(percent);
  3246. }
  3247. if (ZSTR_LEN(t1) + ZSTR_LEN(t2) == 0) {
  3248. if (ac > 2) {
  3249. Z_DVAL_P(percent) = 0;
  3250. }
  3251. RETURN_LONG(0);
  3252. }
  3253. sim = php_similar_char(ZSTR_VAL(t1), ZSTR_LEN(t1), ZSTR_VAL(t2), ZSTR_LEN(t2));
  3254. if (ac > 2) {
  3255. Z_DVAL_P(percent) = sim * 200.0 / (ZSTR_LEN(t1) + ZSTR_LEN(t2));
  3256. }
  3257. RETURN_LONG(sim);
  3258. }
  3259. /* }}} */
  3260. /* {{{ proto string addcslashes(string str, string charlist)
  3261. Escapes all chars mentioned in charlist with backslash. It creates octal representations if asked to backslash characters with 8th bit set or with ASCII<32 (except '\n', '\r', '\t' etc...) */
  3262. PHP_FUNCTION(addcslashes)
  3263. {
  3264. zend_string *str, *what;
  3265. ZEND_PARSE_PARAMETERS_START(2, 2)
  3266. Z_PARAM_STR(str)
  3267. Z_PARAM_STR(what)
  3268. ZEND_PARSE_PARAMETERS_END();
  3269. if (ZSTR_LEN(str) == 0) {
  3270. RETURN_EMPTY_STRING();
  3271. }
  3272. if (ZSTR_LEN(what) == 0) {
  3273. RETURN_STR_COPY(str);
  3274. }
  3275. RETURN_STR(php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), ZSTR_VAL(what), ZSTR_LEN(what)));
  3276. }
  3277. /* }}} */
  3278. /* {{{ proto string addslashes(string str)
  3279. Escapes single quote, double quotes and backslash characters in a string with backslashes */
  3280. PHP_FUNCTION(addslashes)
  3281. {
  3282. zend_string *str;
  3283. ZEND_PARSE_PARAMETERS_START(1, 1)
  3284. Z_PARAM_STR(str)
  3285. ZEND_PARSE_PARAMETERS_END();
  3286. if (ZSTR_LEN(str) == 0) {
  3287. RETURN_EMPTY_STRING();
  3288. }
  3289. RETURN_STR(php_addslashes(str));
  3290. }
  3291. /* }}} */
  3292. /* {{{ proto string stripcslashes(string str)
  3293. Strips backslashes from a string. Uses C-style conventions */
  3294. PHP_FUNCTION(stripcslashes)
  3295. {
  3296. zend_string *str;
  3297. ZEND_PARSE_PARAMETERS_START(1, 1)
  3298. Z_PARAM_STR(str)
  3299. ZEND_PARSE_PARAMETERS_END();
  3300. ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
  3301. php_stripcslashes(Z_STR_P(return_value));
  3302. }
  3303. /* }}} */
  3304. /* {{{ proto string stripslashes(string str)
  3305. Strips backslashes from a string */
  3306. PHP_FUNCTION(stripslashes)
  3307. {
  3308. zend_string *str;
  3309. ZEND_PARSE_PARAMETERS_START(1, 1)
  3310. Z_PARAM_STR(str)
  3311. ZEND_PARSE_PARAMETERS_END();
  3312. ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
  3313. php_stripslashes(Z_STR_P(return_value));
  3314. }
  3315. /* }}} */
  3316. #ifndef HAVE_STRERROR
  3317. /* {{{ php_strerror
  3318. */
  3319. char *php_strerror(int errnum)
  3320. {
  3321. extern int sys_nerr;
  3322. extern char *sys_errlist[];
  3323. if ((unsigned int) errnum < sys_nerr) {
  3324. return(sys_errlist[errnum]);
  3325. }
  3326. (void) snprintf(BG(str_ebuf), sizeof(php_basic_globals.str_ebuf), "Unknown error: %d", errnum);
  3327. return(BG(str_ebuf));
  3328. }
  3329. /* }}} */
  3330. #endif
  3331. /* {{{ php_stripcslashes
  3332. */
  3333. PHPAPI void php_stripcslashes(zend_string *str)
  3334. {
  3335. const char *source, *end;
  3336. char *target;
  3337. size_t nlen = ZSTR_LEN(str), i;
  3338. char numtmp[4];
  3339. for (source = (char*)ZSTR_VAL(str), end = source + ZSTR_LEN(str), target = ZSTR_VAL(str); source < end; source++) {
  3340. if (*source == '\\' && source + 1 < end) {
  3341. source++;
  3342. switch (*source) {
  3343. case 'n': *target++='\n'; nlen--; break;
  3344. case 'r': *target++='\r'; nlen--; break;
  3345. case 'a': *target++='\a'; nlen--; break;
  3346. case 't': *target++='\t'; nlen--; break;
  3347. case 'v': *target++='\v'; nlen--; break;
  3348. case 'b': *target++='\b'; nlen--; break;
  3349. case 'f': *target++='\f'; nlen--; break;
  3350. case '\\': *target++='\\'; nlen--; break;
  3351. case 'x':
  3352. if (source+1 < end && isxdigit((int)(*(source+1)))) {
  3353. numtmp[0] = *++source;
  3354. if (source+1 < end && isxdigit((int)(*(source+1)))) {
  3355. numtmp[1] = *++source;
  3356. numtmp[2] = '\0';
  3357. nlen-=3;
  3358. } else {
  3359. numtmp[1] = '\0';
  3360. nlen-=2;
  3361. }
  3362. *target++=(char)strtol(numtmp, NULL, 16);
  3363. break;
  3364. }
  3365. /* break is left intentionally */
  3366. default:
  3367. i=0;
  3368. while (source < end && *source >= '0' && *source <= '7' && i<3) {
  3369. numtmp[i++] = *source++;
  3370. }
  3371. if (i) {
  3372. numtmp[i]='\0';
  3373. *target++=(char)strtol(numtmp, NULL, 8);
  3374. nlen-=i;
  3375. source--;
  3376. } else {
  3377. *target++=*source;
  3378. nlen--;
  3379. }
  3380. }
  3381. } else {
  3382. *target++=*source;
  3383. }
  3384. }
  3385. if (nlen != 0) {
  3386. *target='\0';
  3387. }
  3388. ZSTR_LEN(str) = nlen;
  3389. }
  3390. /* }}} */
  3391. /* {{{ php_addcslashes_str
  3392. */
  3393. PHPAPI zend_string *php_addcslashes_str(const char *str, size_t len, char *what, size_t wlength)
  3394. {
  3395. char flags[256];
  3396. char *target;
  3397. const char *source, *end;
  3398. char c;
  3399. size_t newlen;
  3400. zend_string *new_str = zend_string_safe_alloc(4, len, 0, 0);
  3401. php_charmask((unsigned char *)what, wlength, flags);
  3402. for (source = str, end = source + len, target = ZSTR_VAL(new_str); source < end; source++) {
  3403. c = *source;
  3404. if (flags[(unsigned char)c]) {
  3405. if ((unsigned char) c < 32 || (unsigned char) c > 126) {
  3406. *target++ = '\\';
  3407. switch (c) {
  3408. case '\n': *target++ = 'n'; break;
  3409. case '\t': *target++ = 't'; break;
  3410. case '\r': *target++ = 'r'; break;
  3411. case '\a': *target++ = 'a'; break;
  3412. case '\v': *target++ = 'v'; break;
  3413. case '\b': *target++ = 'b'; break;
  3414. case '\f': *target++ = 'f'; break;
  3415. default: target += sprintf(target, "%03o", (unsigned char) c);
  3416. }
  3417. continue;
  3418. }
  3419. *target++ = '\\';
  3420. }
  3421. *target++ = c;
  3422. }
  3423. *target = 0;
  3424. newlen = target - ZSTR_VAL(new_str);
  3425. if (newlen < len * 4) {
  3426. new_str = zend_string_truncate(new_str, newlen, 0);
  3427. }
  3428. return new_str;
  3429. }
  3430. /* }}} */
  3431. /* {{{ php_addcslashes
  3432. */
  3433. PHPAPI zend_string *php_addcslashes(zend_string *str, char *what, size_t wlength)
  3434. {
  3435. return php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), what, wlength);
  3436. }
  3437. /* }}} */
  3438. /* {{{ php_addslashes */
  3439. #if ZEND_INTRIN_SSE4_2_NATIVE
  3440. # include <nmmintrin.h>
  3441. # include "Zend/zend_bitset.h"
  3442. #elif ZEND_INTRIN_SSE4_2_RESOLVER
  3443. # include <nmmintrin.h>
  3444. # include "Zend/zend_bitset.h"
  3445. # include "Zend/zend_cpuinfo.h"
  3446. ZEND_INTRIN_SSE4_2_FUNC_DECL(zend_string *php_addslashes_sse42(zend_string *str));
  3447. zend_string *php_addslashes_default(zend_string *str);
  3448. ZEND_INTRIN_SSE4_2_FUNC_DECL(void php_stripslashes_sse42(zend_string *str));
  3449. void php_stripslashes_default(zend_string *str);
  3450. # if ZEND_INTRIN_SSE4_2_FUNC_PROTO
  3451. PHPAPI zend_string *php_addslashes(zend_string *str) __attribute__((ifunc("resolve_addslashes")));
  3452. PHPAPI void php_stripslashes(zend_string *str) __attribute__((ifunc("resolve_stripslashes")));
  3453. ZEND_NO_SANITIZE_ADDRESS
  3454. static void *resolve_addslashes() {
  3455. if (zend_cpu_supports_sse42()) {
  3456. return php_addslashes_sse42;
  3457. }
  3458. return php_addslashes_default;
  3459. }
  3460. ZEND_NO_SANITIZE_ADDRESS
  3461. static void *resolve_stripslashes() {
  3462. if (zend_cpu_supports_sse42()) {
  3463. return php_stripslashes_sse42;
  3464. }
  3465. return php_stripslashes_default;
  3466. }
  3467. # else /* ZEND_INTRIN_SSE4_2_FUNC_PTR */
  3468. PHPAPI zend_string *(*php_addslashes)(zend_string *str) = NULL;
  3469. PHPAPI void (*php_stripslashes)(zend_string *str) = NULL;
  3470. /* {{{ PHP_MINIT_FUNCTION
  3471. */
  3472. PHP_MINIT_FUNCTION(string_intrin)
  3473. {
  3474. if (zend_cpu_supports(ZEND_CPU_FEATURE_SSE42)) {
  3475. php_addslashes = php_addslashes_sse42;
  3476. php_stripslashes = php_stripslashes_sse42;
  3477. } else {
  3478. php_addslashes = php_addslashes_default;
  3479. php_stripslashes = php_stripslashes_default;
  3480. }
  3481. return SUCCESS;
  3482. }
  3483. /* }}} */
  3484. # endif
  3485. #endif
  3486. #if ZEND_INTRIN_SSE4_2_NATIVE || ZEND_INTRIN_SSE4_2_RESOLVER
  3487. # if ZEND_INTRIN_SSE4_2_NATIVE
  3488. PHPAPI zend_string *php_addslashes(zend_string *str) /* {{{ */
  3489. # elif ZEND_INTRIN_SSE4_2_RESOLVER
  3490. zend_string *php_addslashes_sse42(zend_string *str)
  3491. # endif
  3492. {
  3493. ZEND_SET_ALIGNED(16, static const char slashchars[16]) = "\'\"\\\0";
  3494. __m128i w128, s128;
  3495. uint32_t res = 0;
  3496. /* maximum string length, worst case situation */
  3497. char *target;
  3498. const char *source, *end;
  3499. size_t offset;
  3500. zend_string *new_str;
  3501. if (!str) {
  3502. return ZSTR_EMPTY_ALLOC();
  3503. }
  3504. source = ZSTR_VAL(str);
  3505. end = source + ZSTR_LEN(str);
  3506. if (ZSTR_LEN(str) > 15) {
  3507. w128 = _mm_load_si128((__m128i *)slashchars);
  3508. do {
  3509. s128 = _mm_loadu_si128((__m128i *)source);
  3510. res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
  3511. if (res) {
  3512. goto do_escape;
  3513. }
  3514. source += 16;
  3515. } while ((end - source) > 15);
  3516. }
  3517. while (source < end) {
  3518. switch (*source) {
  3519. case '\0':
  3520. case '\'':
  3521. case '\"':
  3522. case '\\':
  3523. goto do_escape;
  3524. default:
  3525. source++;
  3526. break;
  3527. }
  3528. }
  3529. return zend_string_copy(str);
  3530. do_escape:
  3531. offset = source - (char *)ZSTR_VAL(str);
  3532. new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
  3533. memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
  3534. target = ZSTR_VAL(new_str) + offset;
  3535. if (res) {
  3536. int pos = 0;
  3537. do {
  3538. int i, n = zend_ulong_ntz(res);
  3539. for (i = 0; i < n; i++) {
  3540. *target++ = source[pos + i];
  3541. }
  3542. pos += n;
  3543. *target++ = '\\';
  3544. if (source[pos] == '\0') {
  3545. *target++ = '0';
  3546. } else {
  3547. *target++ = source[pos];
  3548. }
  3549. pos++;
  3550. res = res >> (n + 1);
  3551. } while (res);
  3552. for (; pos < 16; pos++) {
  3553. *target++ = source[pos];
  3554. }
  3555. source += 16;
  3556. } else if (end - source > 15) {
  3557. w128 = _mm_load_si128((__m128i *)slashchars);
  3558. }
  3559. for (; end - source > 15; source += 16) {
  3560. int pos = 0;
  3561. s128 = _mm_loadu_si128((__m128i *)source);
  3562. res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
  3563. if (res) {
  3564. do {
  3565. int i, n = zend_ulong_ntz(res);
  3566. for (i = 0; i < n; i++) {
  3567. *target++ = source[pos + i];
  3568. }
  3569. pos += n;
  3570. *target++ = '\\';
  3571. if (source[pos] == '\0') {
  3572. *target++ = '0';
  3573. } else {
  3574. *target++ = source[pos];
  3575. }
  3576. pos++;
  3577. res = res >> (n + 1);
  3578. } while (res);
  3579. for (; pos < 16; pos++) {
  3580. *target++ = source[pos];
  3581. }
  3582. } else {
  3583. _mm_storeu_si128((__m128i*)target, s128);
  3584. target += 16;
  3585. }
  3586. }
  3587. while (source < end) {
  3588. switch (*source) {
  3589. case '\0':
  3590. *target++ = '\\';
  3591. *target++ = '0';
  3592. break;
  3593. case '\'':
  3594. case '\"':
  3595. case '\\':
  3596. *target++ = '\\';
  3597. /* break is missing *intentionally* */
  3598. default:
  3599. *target++ = *source;
  3600. break;
  3601. }
  3602. source++;
  3603. }
  3604. *target = '\0';
  3605. if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
  3606. new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
  3607. } else {
  3608. ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
  3609. }
  3610. return new_str;
  3611. }
  3612. /* }}} */
  3613. #endif
  3614. #if !ZEND_INTRIN_SSE4_2_NATIVE
  3615. # if ZEND_INTRIN_SSE4_2_RESOLVER
  3616. zend_string *php_addslashes_default(zend_string *str) /* {{{ */
  3617. # else
  3618. PHPAPI zend_string *php_addslashes(zend_string *str)
  3619. # endif
  3620. {
  3621. /* maximum string length, worst case situation */
  3622. char *target;
  3623. const char *source, *end;
  3624. size_t offset;
  3625. zend_string *new_str;
  3626. if (!str) {
  3627. return ZSTR_EMPTY_ALLOC();
  3628. }
  3629. source = ZSTR_VAL(str);
  3630. end = source + ZSTR_LEN(str);
  3631. while (source < end) {
  3632. switch (*source) {
  3633. case '\0':
  3634. case '\'':
  3635. case '\"':
  3636. case '\\':
  3637. goto do_escape;
  3638. default:
  3639. source++;
  3640. break;
  3641. }
  3642. }
  3643. return zend_string_copy(str);
  3644. do_escape:
  3645. offset = source - (char *)ZSTR_VAL(str);
  3646. new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
  3647. memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
  3648. target = ZSTR_VAL(new_str) + offset;
  3649. while (source < end) {
  3650. switch (*source) {
  3651. case '\0':
  3652. *target++ = '\\';
  3653. *target++ = '0';
  3654. break;
  3655. case '\'':
  3656. case '\"':
  3657. case '\\':
  3658. *target++ = '\\';
  3659. /* break is missing *intentionally* */
  3660. default:
  3661. *target++ = *source;
  3662. break;
  3663. }
  3664. source++;
  3665. }
  3666. *target = '\0';
  3667. if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
  3668. new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
  3669. } else {
  3670. ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
  3671. }
  3672. return new_str;
  3673. }
  3674. #endif
  3675. /* }}} */
  3676. /* }}} */
  3677. /* {{{ php_stripslashes
  3678. *
  3679. * be careful, this edits the string in-place */
  3680. static zend_always_inline char *php_stripslashes_impl(const char *str, char *out, size_t len)
  3681. {
  3682. while (len > 0) {
  3683. if (*str == '\\') {
  3684. str++; /* skip the slash */
  3685. len--;
  3686. if (len > 0) {
  3687. if (*str == '0') {
  3688. *out++='\0';
  3689. str++;
  3690. } else {
  3691. *out++ = *str++; /* preserve the next character */
  3692. }
  3693. len--;
  3694. }
  3695. } else {
  3696. *out++ = *str++;
  3697. len--;
  3698. }
  3699. }
  3700. return out;
  3701. }
  3702. #if ZEND_INTRIN_SSE4_2_NATIVE || ZEND_INTRIN_SSE4_2_RESOLVER
  3703. # if ZEND_INTRIN_SSE4_2_NATIVE
  3704. PHPAPI void php_stripslashes(zend_string *str)
  3705. # elif ZEND_INTRIN_SSE4_2_RESOLVER
  3706. void php_stripslashes_sse42(zend_string *str)
  3707. # endif
  3708. {
  3709. const char *s = ZSTR_VAL(str);
  3710. char *t = ZSTR_VAL(str);
  3711. size_t l = ZSTR_LEN(str);
  3712. if (l > 15) {
  3713. const __m128i slash = _mm_set1_epi8('\\');
  3714. do {
  3715. __m128i in = _mm_loadu_si128((__m128i *)s);
  3716. __m128i any_slash = _mm_cmpeq_epi8(in, slash);
  3717. uint32_t res = _mm_movemask_epi8(any_slash);
  3718. if (res) {
  3719. int i, n = zend_ulong_ntz(res);
  3720. const char *e = s + 15;
  3721. l -= n;
  3722. for (i = 0; i < n; i++) {
  3723. *t++ = *s++;
  3724. }
  3725. for (; s < e; s++) {
  3726. if (*s == '\\') {
  3727. s++;
  3728. l--;
  3729. if (*s == '0') {
  3730. *t = '\0';
  3731. } else {
  3732. *t = *s;
  3733. }
  3734. } else {
  3735. *t = *s;
  3736. }
  3737. t++;
  3738. l--;
  3739. }
  3740. } else {
  3741. _mm_storeu_si128((__m128i *)t, in);
  3742. s += 16;
  3743. t += 16;
  3744. l -= 16;
  3745. }
  3746. } while (l > 15);
  3747. }
  3748. t = php_stripslashes_impl(s, t, l);
  3749. if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
  3750. ZSTR_LEN(str) = t - ZSTR_VAL(str);
  3751. ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
  3752. }
  3753. }
  3754. #endif
  3755. #if !ZEND_INTRIN_SSE4_2_NATIVE
  3756. # if ZEND_INTRIN_SSE4_2_RESOLVER
  3757. void php_stripslashes_default(zend_string *str) /* {{{ */
  3758. # else
  3759. PHPAPI void php_stripslashes(zend_string *str)
  3760. # endif
  3761. {
  3762. const char *t = php_stripslashes_impl(ZSTR_VAL(str), ZSTR_VAL(str), ZSTR_LEN(str));
  3763. if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
  3764. ZSTR_LEN(str) = t - ZSTR_VAL(str);
  3765. ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
  3766. }
  3767. }
  3768. /* }}} */
  3769. #endif
  3770. /* }}} */
  3771. #define _HEB_BLOCK_TYPE_ENG 1
  3772. #define _HEB_BLOCK_TYPE_HEB 2
  3773. #define isheb(c) (((((unsigned char) c) >= 224) && (((unsigned char) c) <= 250)) ? 1 : 0)
  3774. #define _isblank(c) (((((unsigned char) c) == ' ' || ((unsigned char) c) == '\t')) ? 1 : 0)
  3775. #define _isnewline(c) (((((unsigned char) c) == '\n' || ((unsigned char) c) == '\r')) ? 1 : 0)
  3776. /* {{{ php_str_replace_in_subject
  3777. */
  3778. static zend_long php_str_replace_in_subject(zval *search, zval *replace, zval *subject, zval *result, int case_sensitivity)
  3779. {
  3780. zval *search_entry;
  3781. zend_string *tmp_result,
  3782. *tmp_subject_str;
  3783. char *replace_value = NULL;
  3784. size_t replace_len = 0;
  3785. zend_long replace_count = 0;
  3786. zend_string *subject_str;
  3787. zend_string *lc_subject_str = NULL;
  3788. uint32_t replace_idx;
  3789. /* Make sure we're dealing with strings. */
  3790. subject_str = zval_get_tmp_string(subject, &tmp_subject_str);
  3791. if (ZSTR_LEN(subject_str) == 0) {
  3792. zend_tmp_string_release(tmp_subject_str);
  3793. ZVAL_EMPTY_STRING(result);
  3794. return 0;
  3795. }
  3796. /* If search is an array */
  3797. if (Z_TYPE_P(search) == IS_ARRAY) {
  3798. /* Duplicate subject string for repeated replacement */
  3799. zend_string_addref(subject_str);
  3800. if (Z_TYPE_P(replace) == IS_ARRAY) {
  3801. replace_idx = 0;
  3802. } else {
  3803. /* Set replacement value to the passed one */
  3804. replace_value = Z_STRVAL_P(replace);
  3805. replace_len = Z_STRLEN_P(replace);
  3806. }
  3807. /* For each entry in the search array, get the entry */
  3808. ZEND_HASH_FOREACH_VAL_IND(Z_ARRVAL_P(search), search_entry) {
  3809. /* Make sure we're dealing with strings. */
  3810. zend_string *tmp_search_str;
  3811. zend_string *search_str = zval_get_tmp_string(search_entry, &tmp_search_str);
  3812. zend_string *replace_entry_str, *tmp_replace_entry_str = NULL;
  3813. /* If replace is an array. */
  3814. if (Z_TYPE_P(replace) == IS_ARRAY) {
  3815. /* Get current entry */
  3816. zval *replace_entry = NULL;
  3817. while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
  3818. replace_entry = &Z_ARRVAL_P(replace)->arData[replace_idx].val;
  3819. if (Z_TYPE_P(replace_entry) != IS_UNDEF) {
  3820. break;
  3821. }
  3822. replace_idx++;
  3823. }
  3824. if (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
  3825. /* Make sure we're dealing with strings. */
  3826. replace_entry_str = zval_get_tmp_string(replace_entry, &tmp_replace_entry_str);
  3827. /* Set replacement value to the one we got from array */
  3828. replace_value = ZSTR_VAL(replace_entry_str);
  3829. replace_len = ZSTR_LEN(replace_entry_str);
  3830. replace_idx++;
  3831. } else {
  3832. /* We've run out of replacement strings, so use an empty one. */
  3833. replace_value = "";
  3834. replace_len = 0;
  3835. }
  3836. }
  3837. if (ZSTR_LEN(search_str) == 1) {
  3838. zend_long old_replace_count = replace_count;
  3839. tmp_result = php_char_to_str_ex(subject_str,
  3840. ZSTR_VAL(search_str)[0],
  3841. replace_value,
  3842. replace_len,
  3843. case_sensitivity,
  3844. &replace_count);
  3845. if (lc_subject_str && replace_count != old_replace_count) {
  3846. zend_string_release_ex(lc_subject_str, 0);
  3847. lc_subject_str = NULL;
  3848. }
  3849. } else if (ZSTR_LEN(search_str) > 1) {
  3850. if (case_sensitivity) {
  3851. tmp_result = php_str_to_str_ex(subject_str,
  3852. ZSTR_VAL(search_str), ZSTR_LEN(search_str),
  3853. replace_value, replace_len, &replace_count);
  3854. } else {
  3855. zend_long old_replace_count = replace_count;
  3856. if (!lc_subject_str) {
  3857. lc_subject_str = php_string_tolower(subject_str);
  3858. }
  3859. tmp_result = php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
  3860. search_str, replace_value, replace_len, &replace_count);
  3861. if (replace_count != old_replace_count) {
  3862. zend_string_release_ex(lc_subject_str, 0);
  3863. lc_subject_str = NULL;
  3864. }
  3865. }
  3866. } else {
  3867. zend_tmp_string_release(tmp_search_str);
  3868. zend_tmp_string_release(tmp_replace_entry_str);
  3869. continue;
  3870. }
  3871. zend_tmp_string_release(tmp_search_str);
  3872. zend_tmp_string_release(tmp_replace_entry_str);
  3873. if (subject_str == tmp_result) {
  3874. zend_string_delref(subject_str);
  3875. } else {
  3876. zend_string_release_ex(subject_str, 0);
  3877. subject_str = tmp_result;
  3878. if (ZSTR_LEN(subject_str) == 0) {
  3879. zend_string_release_ex(subject_str, 0);
  3880. ZVAL_EMPTY_STRING(result);
  3881. if (lc_subject_str) {
  3882. zend_string_release_ex(lc_subject_str, 0);
  3883. }
  3884. zend_tmp_string_release(tmp_subject_str);
  3885. return replace_count;
  3886. }
  3887. }
  3888. } ZEND_HASH_FOREACH_END();
  3889. ZVAL_STR(result, subject_str);
  3890. if (lc_subject_str) {
  3891. zend_string_release_ex(lc_subject_str, 0);
  3892. }
  3893. } else {
  3894. ZEND_ASSERT(Z_TYPE_P(search) == IS_STRING);
  3895. if (Z_STRLEN_P(search) == 1) {
  3896. ZVAL_STR(result,
  3897. php_char_to_str_ex(subject_str,
  3898. Z_STRVAL_P(search)[0],
  3899. Z_STRVAL_P(replace),
  3900. Z_STRLEN_P(replace),
  3901. case_sensitivity,
  3902. &replace_count));
  3903. } else if (Z_STRLEN_P(search) > 1) {
  3904. if (case_sensitivity) {
  3905. ZVAL_STR(result, php_str_to_str_ex(subject_str,
  3906. Z_STRVAL_P(search), Z_STRLEN_P(search),
  3907. Z_STRVAL_P(replace), Z_STRLEN_P(replace), &replace_count));
  3908. } else {
  3909. lc_subject_str = php_string_tolower(subject_str);
  3910. ZVAL_STR(result, php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
  3911. Z_STR_P(search),
  3912. Z_STRVAL_P(replace), Z_STRLEN_P(replace), &replace_count));
  3913. zend_string_release_ex(lc_subject_str, 0);
  3914. }
  3915. } else {
  3916. ZVAL_STR_COPY(result, subject_str);
  3917. }
  3918. }
  3919. zend_tmp_string_release(tmp_subject_str);
  3920. return replace_count;
  3921. }
  3922. /* }}} */
  3923. /* {{{ php_str_replace_common
  3924. */
  3925. static void php_str_replace_common(INTERNAL_FUNCTION_PARAMETERS, int case_sensitivity)
  3926. {
  3927. zval *subject, *search, *replace, *subject_entry, *zcount = NULL;
  3928. zval result;
  3929. zend_string *string_key;
  3930. zend_ulong num_key;
  3931. zend_long count = 0;
  3932. int argc = ZEND_NUM_ARGS();
  3933. ZEND_PARSE_PARAMETERS_START(3, 4)
  3934. Z_PARAM_ZVAL(search)
  3935. Z_PARAM_ZVAL(replace)
  3936. Z_PARAM_ZVAL(subject)
  3937. Z_PARAM_OPTIONAL
  3938. Z_PARAM_ZVAL_DEREF(zcount)
  3939. ZEND_PARSE_PARAMETERS_END();
  3940. /* Make sure we're dealing with strings and do the replacement. */
  3941. if (Z_TYPE_P(search) != IS_ARRAY) {
  3942. convert_to_string_ex(search);
  3943. if (Z_TYPE_P(replace) != IS_STRING) {
  3944. convert_to_string_ex(replace);
  3945. }
  3946. } else if (Z_TYPE_P(replace) != IS_ARRAY) {
  3947. convert_to_string_ex(replace);
  3948. }
  3949. /* if subject is an array */
  3950. if (Z_TYPE_P(subject) == IS_ARRAY) {
  3951. array_init(return_value);
  3952. /* For each subject entry, convert it to string, then perform replacement
  3953. and add the result to the return_value array. */
  3954. ZEND_HASH_FOREACH_KEY_VAL_IND(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
  3955. ZVAL_DEREF(subject_entry);
  3956. if (Z_TYPE_P(subject_entry) != IS_ARRAY && Z_TYPE_P(subject_entry) != IS_OBJECT) {
  3957. count += php_str_replace_in_subject(search, replace, subject_entry, &result, case_sensitivity);
  3958. } else {
  3959. ZVAL_COPY(&result, subject_entry);
  3960. }
  3961. /* Add to return array */
  3962. if (string_key) {
  3963. zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &result);
  3964. } else {
  3965. zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &result);
  3966. }
  3967. } ZEND_HASH_FOREACH_END();
  3968. } else { /* if subject is not an array */
  3969. count = php_str_replace_in_subject(search, replace, subject, return_value, case_sensitivity);
  3970. }
  3971. if (argc > 3) {
  3972. zval_ptr_dtor(zcount);
  3973. ZVAL_LONG(zcount, count);
  3974. }
  3975. }
  3976. /* }}} */
  3977. /* {{{ proto mixed str_replace(mixed search, mixed replace, mixed subject [, int &replace_count])
  3978. Replaces all occurrences of search in haystack with replace */
  3979. PHP_FUNCTION(str_replace)
  3980. {
  3981. php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  3982. }
  3983. /* }}} */
  3984. /* {{{ proto mixed str_ireplace(mixed search, mixed replace, mixed subject [, int &replace_count])
  3985. Replaces all occurrences of search in haystack with replace / case-insensitive */
  3986. PHP_FUNCTION(str_ireplace)
  3987. {
  3988. php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  3989. }
  3990. /* }}} */
  3991. /* {{{ php_hebrev
  3992. *
  3993. * Converts Logical Hebrew text (Hebrew Windows style) to Visual text
  3994. * Cheers/complaints/flames - Zeev Suraski <zeev@php.net>
  3995. */
  3996. static void php_hebrev(INTERNAL_FUNCTION_PARAMETERS, int convert_newlines)
  3997. {
  3998. char *str, *heb_str, *target;
  3999. const char *tmp;
  4000. size_t block_start, block_end, block_type, block_length, i;
  4001. zend_long max_chars=0, char_count;
  4002. size_t begin, end, orig_begin;
  4003. size_t str_len;
  4004. zend_string *broken_str;
  4005. ZEND_PARSE_PARAMETERS_START(1, 2)
  4006. Z_PARAM_STRING(str, str_len)
  4007. Z_PARAM_OPTIONAL
  4008. Z_PARAM_LONG(max_chars)
  4009. ZEND_PARSE_PARAMETERS_END();
  4010. if (str_len == 0) {
  4011. RETURN_FALSE;
  4012. }
  4013. tmp = str;
  4014. block_start=block_end=0;
  4015. heb_str = (char *) emalloc(str_len+1);
  4016. target = heb_str+str_len;
  4017. *target = 0;
  4018. target--;
  4019. block_length=0;
  4020. if (isheb(*tmp)) {
  4021. block_type = _HEB_BLOCK_TYPE_HEB;
  4022. } else {
  4023. block_type = _HEB_BLOCK_TYPE_ENG;
  4024. }
  4025. do {
  4026. if (block_type == _HEB_BLOCK_TYPE_HEB) {
  4027. while ((isheb((int)*(tmp+1)) || _isblank((int)*(tmp+1)) || ispunct((int)*(tmp+1)) || (int)*(tmp+1)=='\n' ) && block_end<str_len-1) {
  4028. tmp++;
  4029. block_end++;
  4030. block_length++;
  4031. }
  4032. for (i = block_start+1; i<= block_end+1; i++) {
  4033. *target = str[i-1];
  4034. switch (*target) {
  4035. case '(':
  4036. *target = ')';
  4037. break;
  4038. case ')':
  4039. *target = '(';
  4040. break;
  4041. case '[':
  4042. *target = ']';
  4043. break;
  4044. case ']':
  4045. *target = '[';
  4046. break;
  4047. case '{':
  4048. *target = '}';
  4049. break;
  4050. case '}':
  4051. *target = '{';
  4052. break;
  4053. case '<':
  4054. *target = '>';
  4055. break;
  4056. case '>':
  4057. *target = '<';
  4058. break;
  4059. case '\\':
  4060. *target = '/';
  4061. break;
  4062. case '/':
  4063. *target = '\\';
  4064. break;
  4065. default:
  4066. break;
  4067. }
  4068. target--;
  4069. }
  4070. block_type = _HEB_BLOCK_TYPE_ENG;
  4071. } else {
  4072. while (!isheb(*(tmp+1)) && (int)*(tmp+1)!='\n' && block_end < str_len-1) {
  4073. tmp++;
  4074. block_end++;
  4075. block_length++;
  4076. }
  4077. while ((_isblank((int)*tmp) || ispunct((int)*tmp)) && *tmp!='/' && *tmp!='-' && block_end > block_start) {
  4078. tmp--;
  4079. block_end--;
  4080. }
  4081. for (i = block_end+1; i >= block_start+1; i--) {
  4082. *target = str[i-1];
  4083. target--;
  4084. }
  4085. block_type = _HEB_BLOCK_TYPE_HEB;
  4086. }
  4087. block_start=block_end+1;
  4088. } while (block_end < str_len-1);
  4089. broken_str = zend_string_alloc(str_len, 0);
  4090. begin = end = str_len-1;
  4091. target = ZSTR_VAL(broken_str);
  4092. while (1) {
  4093. char_count=0;
  4094. while ((!max_chars || (max_chars > 0 && char_count < max_chars)) && begin > 0) {
  4095. char_count++;
  4096. begin--;
  4097. if (_isnewline(heb_str[begin])) {
  4098. while (begin > 0 && _isnewline(heb_str[begin-1])) {
  4099. begin--;
  4100. char_count++;
  4101. }
  4102. break;
  4103. }
  4104. }
  4105. if (max_chars >= 0 && char_count == max_chars) { /* try to avoid breaking words */
  4106. size_t new_char_count=char_count, new_begin=begin;
  4107. while (new_char_count > 0) {
  4108. if (_isblank(heb_str[new_begin]) || _isnewline(heb_str[new_begin])) {
  4109. break;
  4110. }
  4111. new_begin++;
  4112. new_char_count--;
  4113. }
  4114. if (new_char_count > 0) {
  4115. begin=new_begin;
  4116. }
  4117. }
  4118. orig_begin=begin;
  4119. if (_isblank(heb_str[begin])) {
  4120. heb_str[begin]='\n';
  4121. }
  4122. while (begin <= end && _isnewline(heb_str[begin])) { /* skip leading newlines */
  4123. begin++;
  4124. }
  4125. for (i = begin; i <= end; i++) { /* copy content */
  4126. *target = heb_str[i];
  4127. target++;
  4128. }
  4129. for (i = orig_begin; i <= end && _isnewline(heb_str[i]); i++) {
  4130. *target = heb_str[i];
  4131. target++;
  4132. }
  4133. begin=orig_begin;
  4134. if (begin == 0) {
  4135. *target = 0;
  4136. break;
  4137. }
  4138. begin--;
  4139. end=begin;
  4140. }
  4141. efree(heb_str);
  4142. if (convert_newlines) {
  4143. RETVAL_STR(php_char_to_str_ex(broken_str, '\n', "<br />\n", 7, 1, NULL));
  4144. zend_string_release_ex(broken_str, 0);
  4145. } else {
  4146. RETURN_NEW_STR(broken_str);
  4147. }
  4148. }
  4149. /* }}} */
  4150. /* {{{ proto string hebrev(string str [, int max_chars_per_line])
  4151. Converts logical Hebrew text to visual text */
  4152. PHP_FUNCTION(hebrev)
  4153. {
  4154. php_hebrev(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  4155. }
  4156. /* }}} */
  4157. /* {{{ proto string hebrevc(string str [, int max_chars_per_line])
  4158. Converts logical Hebrew text to visual text with newline conversion */
  4159. PHP_FUNCTION(hebrevc)
  4160. {
  4161. php_hebrev(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  4162. }
  4163. /* }}} */
  4164. /* {{{ proto string nl2br(string str [, bool is_xhtml])
  4165. Converts newlines to HTML line breaks */
  4166. PHP_FUNCTION(nl2br)
  4167. {
  4168. /* in brief this inserts <br /> or <br> before matched regexp \n\r?|\r\n? */
  4169. const char *tmp, *end;
  4170. zend_string *str;
  4171. char *target;
  4172. size_t repl_cnt = 0;
  4173. zend_bool is_xhtml = 1;
  4174. zend_string *result;
  4175. ZEND_PARSE_PARAMETERS_START(1, 2)
  4176. Z_PARAM_STR(str)
  4177. Z_PARAM_OPTIONAL
  4178. Z_PARAM_BOOL(is_xhtml)
  4179. ZEND_PARSE_PARAMETERS_END();
  4180. tmp = ZSTR_VAL(str);
  4181. end = ZSTR_VAL(str) + ZSTR_LEN(str);
  4182. /* it is really faster to scan twice and allocate mem once instead of scanning once
  4183. and constantly reallocing */
  4184. while (tmp < end) {
  4185. if (*tmp == '\r') {
  4186. if (*(tmp+1) == '\n') {
  4187. tmp++;
  4188. }
  4189. repl_cnt++;
  4190. } else if (*tmp == '\n') {
  4191. if (*(tmp+1) == '\r') {
  4192. tmp++;
  4193. }
  4194. repl_cnt++;
  4195. }
  4196. tmp++;
  4197. }
  4198. if (repl_cnt == 0) {
  4199. RETURN_STR_COPY(str);
  4200. }
  4201. {
  4202. size_t repl_len = is_xhtml ? (sizeof("<br />") - 1) : (sizeof("<br>") - 1);
  4203. result = zend_string_safe_alloc(repl_cnt, repl_len, ZSTR_LEN(str), 0);
  4204. target = ZSTR_VAL(result);
  4205. }
  4206. tmp = ZSTR_VAL(str);
  4207. while (tmp < end) {
  4208. switch (*tmp) {
  4209. case '\r':
  4210. case '\n':
  4211. *target++ = '<';
  4212. *target++ = 'b';
  4213. *target++ = 'r';
  4214. if (is_xhtml) {
  4215. *target++ = ' ';
  4216. *target++ = '/';
  4217. }
  4218. *target++ = '>';
  4219. if ((*tmp == '\r' && *(tmp+1) == '\n') || (*tmp == '\n' && *(tmp+1) == '\r')) {
  4220. *target++ = *tmp++;
  4221. }
  4222. /* lack of a break; is intentional */
  4223. default:
  4224. *target++ = *tmp;
  4225. }
  4226. tmp++;
  4227. }
  4228. *target = '\0';
  4229. RETURN_NEW_STR(result);
  4230. }
  4231. /* }}} */
  4232. /* {{{ proto string strip_tags(string str [, string allowable_tags])
  4233. Strips HTML and PHP tags from a string */
  4234. PHP_FUNCTION(strip_tags)
  4235. {
  4236. zend_string *buf;
  4237. zend_string *str;
  4238. zval *allow=NULL;
  4239. const char *allowed_tags=NULL;
  4240. size_t allowed_tags_len=0;
  4241. ZEND_PARSE_PARAMETERS_START(1, 2)
  4242. Z_PARAM_STR(str)
  4243. Z_PARAM_OPTIONAL
  4244. Z_PARAM_ZVAL(allow)
  4245. ZEND_PARSE_PARAMETERS_END();
  4246. /* To maintain a certain BC, we allow anything for the second parameter and return original string */
  4247. if (allow) {
  4248. convert_to_string(allow);
  4249. allowed_tags = Z_STRVAL_P(allow);
  4250. allowed_tags_len = Z_STRLEN_P(allow);
  4251. }
  4252. buf = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
  4253. ZSTR_LEN(buf) = php_strip_tags_ex(ZSTR_VAL(buf), ZSTR_LEN(str), NULL, allowed_tags, allowed_tags_len, 0);
  4254. RETURN_NEW_STR(buf);
  4255. }
  4256. /* }}} */
  4257. /* {{{ proto string setlocale(mixed category, string locale [, string ...])
  4258. Set locale information */
  4259. PHP_FUNCTION(setlocale)
  4260. {
  4261. zval *args = NULL;
  4262. zval *plocale;
  4263. zend_string *loc;
  4264. const char *retval;
  4265. zend_long cat;
  4266. int num_args, i = 0;
  4267. uint32_t idx;
  4268. ZEND_PARSE_PARAMETERS_START(2, -1)
  4269. Z_PARAM_LONG(cat)
  4270. Z_PARAM_VARIADIC('+', args, num_args)
  4271. ZEND_PARSE_PARAMETERS_END();
  4272. #ifdef HAVE_SETLOCALE
  4273. idx = 0;
  4274. while (1) {
  4275. if (Z_TYPE(args[0]) == IS_ARRAY) {
  4276. while (idx < Z_ARRVAL(args[0])->nNumUsed) {
  4277. plocale = &Z_ARRVAL(args[0])->arData[idx].val;
  4278. if (Z_TYPE_P(plocale) != IS_UNDEF) {
  4279. break;
  4280. }
  4281. idx++;
  4282. }
  4283. if (idx >= Z_ARRVAL(args[0])->nNumUsed) {
  4284. break;
  4285. }
  4286. } else {
  4287. plocale = &args[i];
  4288. }
  4289. loc = zval_get_string(plocale);
  4290. if (!strcmp("0", ZSTR_VAL(loc))) {
  4291. zend_string_release_ex(loc, 0);
  4292. loc = NULL;
  4293. } else {
  4294. if (ZSTR_LEN(loc) >= 255) {
  4295. php_error_docref(NULL, E_WARNING, "Specified locale name is too long");
  4296. zend_string_release_ex(loc, 0);
  4297. break;
  4298. }
  4299. }
  4300. # ifndef PHP_WIN32
  4301. retval = php_my_setlocale(cat, loc ? ZSTR_VAL(loc) : NULL);
  4302. # else
  4303. if (loc) {
  4304. /* BC: don't try /^[a-z]{2}_[A-Z]{2}($|\..*)/ except for /^u[ks]_U[KS]$/ */
  4305. char *locp = ZSTR_VAL(loc);
  4306. if (ZSTR_LEN(loc) >= 5 && locp[2] == '_'
  4307. && locp[0] >= 'a' && locp[0] <= 'z' && locp[1] >= 'a' && locp[1] <= 'z'
  4308. && locp[3] >= 'A' && locp[3] <= 'Z' && locp[4] >= 'A' && locp[4] <= 'Z'
  4309. && (locp[5] == '\0' || locp[5] == '.')
  4310. && !(locp[0] == 'u' && (locp[1] == 'k' || locp[1] == 's')
  4311. && locp[3] == 'U' && (locp[4] == 'K' || locp[4] == 'S')
  4312. && locp[5] == '\0')
  4313. ) {
  4314. retval = NULL;
  4315. } else {
  4316. retval = php_my_setlocale(cat, ZSTR_VAL(loc));
  4317. }
  4318. } else {
  4319. retval = php_my_setlocale(cat, NULL);
  4320. }
  4321. # endif
  4322. zend_update_current_locale();
  4323. if (retval) {
  4324. if (loc) {
  4325. /* Remember if locale was changed */
  4326. size_t len = strlen(retval);
  4327. BG(locale_changed) = 1;
  4328. if (cat == LC_CTYPE || cat == LC_ALL) {
  4329. if (BG(locale_string)) {
  4330. zend_string_release_ex(BG(locale_string), 0);
  4331. }
  4332. if (len == ZSTR_LEN(loc) && !memcmp(ZSTR_VAL(loc), retval, len)) {
  4333. BG(locale_string) = zend_string_copy(loc);
  4334. RETURN_STR(BG(locale_string));
  4335. } else {
  4336. BG(locale_string) = zend_string_init(retval, len, 0);
  4337. zend_string_release_ex(loc, 0);
  4338. RETURN_STR_COPY(BG(locale_string));
  4339. }
  4340. } else if (len == ZSTR_LEN(loc) && !memcmp(ZSTR_VAL(loc), retval, len)) {
  4341. RETURN_STR(loc);
  4342. }
  4343. zend_string_release_ex(loc, 0);
  4344. }
  4345. RETURN_STRING(retval);
  4346. }
  4347. if (loc) {
  4348. zend_string_release_ex(loc, 0);
  4349. }
  4350. if (Z_TYPE(args[0]) == IS_ARRAY) {
  4351. idx++;
  4352. } else {
  4353. if (++i >= num_args) break;
  4354. }
  4355. }
  4356. #endif
  4357. RETURN_FALSE;
  4358. }
  4359. /* }}} */
  4360. /* {{{ proto void parse_str(string encoded_string [, array &result])
  4361. Parses GET/POST/COOKIE data and sets global variables */
  4362. PHP_FUNCTION(parse_str)
  4363. {
  4364. char *arg;
  4365. zval *arrayArg = NULL;
  4366. char *res = NULL;
  4367. size_t arglen;
  4368. ZEND_PARSE_PARAMETERS_START(1, 2)
  4369. Z_PARAM_STRING(arg, arglen)
  4370. Z_PARAM_OPTIONAL
  4371. Z_PARAM_ZVAL_DEREF(arrayArg)
  4372. ZEND_PARSE_PARAMETERS_END();
  4373. res = estrndup(arg, arglen);
  4374. if (arrayArg == NULL) {
  4375. zval tmp;
  4376. zend_array *symbol_table;
  4377. if (zend_forbid_dynamic_call("parse_str() with a single argument") == FAILURE) {
  4378. efree(res);
  4379. return;
  4380. }
  4381. php_error_docref(NULL, E_DEPRECATED, "Calling parse_str() without the result argument is deprecated");
  4382. symbol_table = zend_rebuild_symbol_table();
  4383. ZVAL_ARR(&tmp, symbol_table);
  4384. sapi_module.treat_data(PARSE_STRING, res, &tmp);
  4385. if (UNEXPECTED(zend_hash_del(symbol_table, ZSTR_KNOWN(ZEND_STR_THIS)) == SUCCESS)) {
  4386. zend_throw_error(NULL, "Cannot re-assign $this");
  4387. }
  4388. } else {
  4389. /* Clear out the array that was passed in. */
  4390. zval_ptr_dtor(arrayArg);
  4391. array_init(arrayArg);
  4392. sapi_module.treat_data(PARSE_STRING, res, arrayArg);
  4393. }
  4394. }
  4395. /* }}} */
  4396. #define PHP_TAG_BUF_SIZE 1023
  4397. /* {{{ php_tag_find
  4398. *
  4399. * Check if tag is in a set of tags
  4400. *
  4401. * states:
  4402. *
  4403. * 0 start tag
  4404. * 1 first non-whitespace char seen
  4405. */
  4406. int php_tag_find(char *tag, size_t len, const char *set) {
  4407. char c, *n;
  4408. const char *t;
  4409. int state=0, done=0;
  4410. char *norm;
  4411. if (len == 0) {
  4412. return 0;
  4413. }
  4414. norm = emalloc(len+1);
  4415. n = norm;
  4416. t = tag;
  4417. c = tolower(*t);
  4418. /*
  4419. normalize the tag removing leading and trailing whitespace
  4420. and turn any <a whatever...> into just <a> and any </tag>
  4421. into <tag>
  4422. */
  4423. while (!done) {
  4424. switch (c) {
  4425. case '<':
  4426. *(n++) = c;
  4427. break;
  4428. case '>':
  4429. done =1;
  4430. break;
  4431. default:
  4432. if (!isspace((int)c)) {
  4433. if (state == 0) {
  4434. state=1;
  4435. }
  4436. if (c != '/' || (*(t-1) != '<' && *(t+1) != '>')) {
  4437. *(n++) = c;
  4438. }
  4439. } else {
  4440. if (state == 1)
  4441. done=1;
  4442. }
  4443. break;
  4444. }
  4445. c = tolower(*(++t));
  4446. }
  4447. *(n++) = '>';
  4448. *n = '\0';
  4449. if (strstr(set, norm)) {
  4450. done=1;
  4451. } else {
  4452. done=0;
  4453. }
  4454. efree(norm);
  4455. return done;
  4456. }
  4457. /* }}} */
  4458. PHPAPI size_t php_strip_tags(char *rbuf, size_t len, uint8_t *stateptr, const char *allow, size_t allow_len) /* {{{ */
  4459. {
  4460. return php_strip_tags_ex(rbuf, len, stateptr, allow, allow_len, 0);
  4461. }
  4462. /* }}} */
  4463. /* {{{ php_strip_tags
  4464. A simple little state-machine to strip out html and php tags
  4465. State 0 is the output state, State 1 means we are inside a
  4466. normal html tag and state 2 means we are inside a php tag.
  4467. The state variable is passed in to allow a function like fgetss
  4468. to maintain state across calls to the function.
  4469. lc holds the last significant character read and br is a bracket
  4470. counter.
  4471. When an allow string is passed in we keep track of the string
  4472. in state 1 and when the tag is closed check it against the
  4473. allow string to see if we should allow it.
  4474. swm: Added ability to strip <?xml tags without assuming it PHP
  4475. code.
  4476. */
  4477. PHPAPI size_t php_strip_tags_ex(char *rbuf, size_t len, uint8_t *stateptr, const char *allow, size_t allow_len, zend_bool allow_tag_spaces)
  4478. {
  4479. char *tbuf, *tp, *rp, c, lc;
  4480. const char *buf, *p, *end;
  4481. int br, depth=0, in_q = 0;
  4482. uint8_t state = 0;
  4483. size_t pos;
  4484. char *allow_free = NULL;
  4485. const char *allow_actual;
  4486. char is_xml = 0;
  4487. buf = estrndup(rbuf, len);
  4488. end = buf + len;
  4489. lc = '\0';
  4490. p = buf;
  4491. rp = rbuf;
  4492. br = 0;
  4493. if (allow) {
  4494. allow_free = zend_str_tolower_dup_ex(allow, allow_len);
  4495. allow_actual = allow_free ? allow_free : allow;
  4496. tbuf = emalloc(PHP_TAG_BUF_SIZE + 1);
  4497. tp = tbuf;
  4498. } else {
  4499. tbuf = tp = NULL;
  4500. }
  4501. if (stateptr) {
  4502. state = *stateptr;
  4503. switch (state) {
  4504. case 1: goto state_1;
  4505. case 2: goto state_2;
  4506. case 3: goto state_3;
  4507. case 4: goto state_4;
  4508. default:
  4509. break;
  4510. }
  4511. }
  4512. state_0:
  4513. if (p >= end) {
  4514. goto finish;
  4515. }
  4516. c = *p;
  4517. switch (c) {
  4518. case '\0':
  4519. break;
  4520. case '<':
  4521. if (in_q) {
  4522. break;
  4523. }
  4524. if (isspace(*(p + 1)) && !allow_tag_spaces) {
  4525. *(rp++) = c;
  4526. break;
  4527. }
  4528. lc = '<';
  4529. state = 1;
  4530. if (allow) {
  4531. if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
  4532. pos = tp - tbuf;
  4533. tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
  4534. tp = tbuf + pos;
  4535. }
  4536. *(tp++) = '<';
  4537. }
  4538. p++;
  4539. goto state_1;
  4540. case '>':
  4541. if (depth) {
  4542. depth--;
  4543. break;
  4544. }
  4545. if (in_q) {
  4546. break;
  4547. }
  4548. *(rp++) = c;
  4549. break;
  4550. default:
  4551. *(rp++) = c;
  4552. break;
  4553. }
  4554. p++;
  4555. goto state_0;
  4556. state_1:
  4557. if (p >= end) {
  4558. goto finish;
  4559. }
  4560. c = *p;
  4561. switch (c) {
  4562. case '\0':
  4563. break;
  4564. case '<':
  4565. if (in_q) {
  4566. break;
  4567. }
  4568. if (isspace(*(p + 1)) && !allow_tag_spaces) {
  4569. goto reg_char_1;
  4570. }
  4571. depth++;
  4572. break;
  4573. case '>':
  4574. if (depth) {
  4575. depth--;
  4576. break;
  4577. }
  4578. if (in_q) {
  4579. break;
  4580. }
  4581. lc = '>';
  4582. if (is_xml && p >= buf + 1 && *(p -1) == '-') {
  4583. break;
  4584. }
  4585. in_q = state = is_xml = 0;
  4586. if (allow) {
  4587. if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
  4588. pos = tp - tbuf;
  4589. tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
  4590. tp = tbuf + pos;
  4591. }
  4592. *(tp++) = '>';
  4593. *tp='\0';
  4594. if (php_tag_find(tbuf, tp-tbuf, allow_actual)) {
  4595. memcpy(rp, tbuf, tp-tbuf);
  4596. rp += tp-tbuf;
  4597. }
  4598. tp = tbuf;
  4599. }
  4600. p++;
  4601. goto state_0;
  4602. case '"':
  4603. case '\'':
  4604. if (p != buf && (!in_q || *p == in_q)) {
  4605. if (in_q) {
  4606. in_q = 0;
  4607. } else {
  4608. in_q = *p;
  4609. }
  4610. }
  4611. goto reg_char_1;
  4612. case '!':
  4613. /* JavaScript & Other HTML scripting languages */
  4614. if (p >= buf + 1 && *(p-1) == '<') {
  4615. state = 3;
  4616. lc = c;
  4617. p++;
  4618. goto state_3;
  4619. } else {
  4620. goto reg_char_1;
  4621. }
  4622. break;
  4623. case '?':
  4624. if (p >= buf + 1 && *(p-1) == '<') {
  4625. br=0;
  4626. state = 2;
  4627. p++;
  4628. goto state_2;
  4629. } else {
  4630. goto reg_char_1;
  4631. }
  4632. break;
  4633. default:
  4634. reg_char_1:
  4635. if (allow) {
  4636. if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
  4637. pos = tp - tbuf;
  4638. tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
  4639. tp = tbuf + pos;
  4640. }
  4641. *(tp++) = c;
  4642. }
  4643. break;
  4644. }
  4645. p++;
  4646. goto state_1;
  4647. state_2:
  4648. if (p >= end) {
  4649. goto finish;
  4650. }
  4651. c = *p;
  4652. switch (c) {
  4653. case '(':
  4654. if (lc != '"' && lc != '\'') {
  4655. lc = '(';
  4656. br++;
  4657. }
  4658. break;
  4659. case ')':
  4660. if (lc != '"' && lc != '\'') {
  4661. lc = ')';
  4662. br--;
  4663. }
  4664. break;
  4665. case '>':
  4666. if (depth) {
  4667. depth--;
  4668. break;
  4669. }
  4670. if (in_q) {
  4671. break;
  4672. }
  4673. if (!br && p >= buf + 1 && lc != '\"' && *(p-1) == '?') {
  4674. in_q = state = 0;
  4675. tp = tbuf;
  4676. p++;
  4677. goto state_0;
  4678. }
  4679. break;
  4680. case '"':
  4681. case '\'':
  4682. if (p >= buf + 1 && *(p-1) != '\\') {
  4683. if (lc == c) {
  4684. lc = '\0';
  4685. } else if (lc != '\\') {
  4686. lc = c;
  4687. }
  4688. if (p != buf && (!in_q || *p == in_q)) {
  4689. if (in_q) {
  4690. in_q = 0;
  4691. } else {
  4692. in_q = *p;
  4693. }
  4694. }
  4695. }
  4696. break;
  4697. case 'l':
  4698. case 'L':
  4699. /* swm: If we encounter '<?xml' then we shouldn't be in
  4700. * state == 2 (PHP). Switch back to HTML.
  4701. */
  4702. if (state == 2 && p > buf+4
  4703. && (*(p-1) == 'm' || *(p-1) == 'M')
  4704. && (*(p-2) == 'x' || *(p-2) == 'X')
  4705. && *(p-3) == '?'
  4706. && *(p-4) == '<') {
  4707. state = 1; is_xml=1;
  4708. p++;
  4709. goto state_1;
  4710. }
  4711. break;
  4712. default:
  4713. break;
  4714. }
  4715. p++;
  4716. goto state_2;
  4717. state_3:
  4718. if (p >= end) {
  4719. goto finish;
  4720. }
  4721. c = *p;
  4722. switch (c) {
  4723. case '>':
  4724. if (depth) {
  4725. depth--;
  4726. break;
  4727. }
  4728. if (in_q) {
  4729. break;
  4730. }
  4731. in_q = state = 0;
  4732. tp = tbuf;
  4733. p++;
  4734. goto state_0;
  4735. case '"':
  4736. case '\'':
  4737. if (p != buf && *(p-1) != '\\' && (!in_q || *p == in_q)) {
  4738. if (in_q) {
  4739. in_q = 0;
  4740. } else {
  4741. in_q = *p;
  4742. }
  4743. }
  4744. break;
  4745. case '-':
  4746. if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '!') {
  4747. state = 4;
  4748. p++;
  4749. goto state_4;
  4750. }
  4751. break;
  4752. case 'E':
  4753. case 'e':
  4754. /* !DOCTYPE exception */
  4755. if (p > buf+6
  4756. && (*(p-1) == 'p' || *(p-1) == 'P')
  4757. && (*(p-2) == 'y' || *(p-2) == 'Y')
  4758. && (*(p-3) == 't' || *(p-3) == 'T')
  4759. && (*(p-4) == 'c' || *(p-4) == 'C')
  4760. && (*(p-5) == 'o' || *(p-5) == 'O')
  4761. && (*(p-6) == 'd' || *(p-6) == 'D')) {
  4762. state = 1;
  4763. p++;
  4764. goto state_1;
  4765. }
  4766. break;
  4767. default:
  4768. break;
  4769. }
  4770. p++;
  4771. goto state_3;
  4772. state_4:
  4773. while (p < end) {
  4774. c = *p;
  4775. if (c == '>' && !in_q) {
  4776. if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '-') {
  4777. in_q = state = 0;
  4778. tp = tbuf;
  4779. p++;
  4780. goto state_0;
  4781. }
  4782. }
  4783. p++;
  4784. }
  4785. finish:
  4786. if (rp < rbuf + len) {
  4787. *rp = '\0';
  4788. }
  4789. efree((void *)buf);
  4790. if (allow) {
  4791. efree(tbuf);
  4792. if (allow_free) {
  4793. efree(allow_free);
  4794. }
  4795. }
  4796. if (stateptr)
  4797. *stateptr = state;
  4798. return (size_t)(rp - rbuf);
  4799. }
  4800. /* }}} */
  4801. /* {{{ proto array str_getcsv(string input[, string delimiter[, string enclosure[, string escape]]])
  4802. Parse a CSV string into an array */
  4803. PHP_FUNCTION(str_getcsv)
  4804. {
  4805. zend_string *str;
  4806. char delim = ',', enc = '"', esc = '\\';
  4807. char *delim_str = NULL, *enc_str = NULL, *esc_str = NULL;
  4808. size_t delim_len = 0, enc_len = 0, esc_len = 0;
  4809. ZEND_PARSE_PARAMETERS_START(1, 4)
  4810. Z_PARAM_STR(str)
  4811. Z_PARAM_OPTIONAL
  4812. Z_PARAM_STRING(delim_str, delim_len)
  4813. Z_PARAM_STRING(enc_str, enc_len)
  4814. Z_PARAM_STRING(esc_str, esc_len)
  4815. ZEND_PARSE_PARAMETERS_END();
  4816. delim = delim_len ? delim_str[0] : delim;
  4817. enc = enc_len ? enc_str[0] : enc;
  4818. esc = esc_len ? esc_str[0] : esc;
  4819. php_fgetcsv(NULL, delim, enc, esc, ZSTR_LEN(str), ZSTR_VAL(str), return_value);
  4820. }
  4821. /* }}} */
  4822. /* {{{ proto string str_repeat(string input, int mult)
  4823. Returns the input string repeat mult times */
  4824. PHP_FUNCTION(str_repeat)
  4825. {
  4826. zend_string *input_str; /* Input string */
  4827. zend_long mult; /* Multiplier */
  4828. zend_string *result; /* Resulting string */
  4829. size_t result_len; /* Length of the resulting string */
  4830. ZEND_PARSE_PARAMETERS_START(2, 2)
  4831. Z_PARAM_STR(input_str)
  4832. Z_PARAM_LONG(mult)
  4833. ZEND_PARSE_PARAMETERS_END();
  4834. if (mult < 0) {
  4835. php_error_docref(NULL, E_WARNING, "Second argument has to be greater than or equal to 0");
  4836. return;
  4837. }
  4838. /* Don't waste our time if it's empty */
  4839. /* ... or if the multiplier is zero */
  4840. if (ZSTR_LEN(input_str) == 0 || mult == 0)
  4841. RETURN_EMPTY_STRING();
  4842. /* Initialize the result string */
  4843. result = zend_string_safe_alloc(ZSTR_LEN(input_str), mult, 0, 0);
  4844. result_len = ZSTR_LEN(input_str) * mult;
  4845. /* Heavy optimization for situations where input string is 1 byte long */
  4846. if (ZSTR_LEN(input_str) == 1) {
  4847. memset(ZSTR_VAL(result), *ZSTR_VAL(input_str), mult);
  4848. } else {
  4849. const char *s, *ee;
  4850. char *e;
  4851. ptrdiff_t l=0;
  4852. memcpy(ZSTR_VAL(result), ZSTR_VAL(input_str), ZSTR_LEN(input_str));
  4853. s = ZSTR_VAL(result);
  4854. e = ZSTR_VAL(result) + ZSTR_LEN(input_str);
  4855. ee = ZSTR_VAL(result) + result_len;
  4856. while (e<ee) {
  4857. l = (e-s) < (ee-e) ? (e-s) : (ee-e);
  4858. memmove(e, s, l);
  4859. e += l;
  4860. }
  4861. }
  4862. ZSTR_VAL(result)[result_len] = '\0';
  4863. RETURN_NEW_STR(result);
  4864. }
  4865. /* }}} */
  4866. /* {{{ proto mixed count_chars(string input [, int mode])
  4867. Returns info about what characters are used in input */
  4868. PHP_FUNCTION(count_chars)
  4869. {
  4870. zend_string *input;
  4871. int chars[256];
  4872. zend_long mymode=0;
  4873. const unsigned char *buf;
  4874. int inx;
  4875. char retstr[256];
  4876. size_t retlen=0;
  4877. size_t tmp = 0;
  4878. ZEND_PARSE_PARAMETERS_START(1, 2)
  4879. Z_PARAM_STR(input)
  4880. Z_PARAM_OPTIONAL
  4881. Z_PARAM_LONG(mymode)
  4882. ZEND_PARSE_PARAMETERS_END();
  4883. if (mymode < 0 || mymode > 4) {
  4884. php_error_docref(NULL, E_WARNING, "Unknown mode");
  4885. RETURN_FALSE;
  4886. }
  4887. buf = (const unsigned char *) ZSTR_VAL(input);
  4888. memset((void*) chars, 0, sizeof(chars));
  4889. while (tmp < ZSTR_LEN(input)) {
  4890. chars[*buf]++;
  4891. buf++;
  4892. tmp++;
  4893. }
  4894. if (mymode < 3) {
  4895. array_init(return_value);
  4896. }
  4897. for (inx = 0; inx < 256; inx++) {
  4898. switch (mymode) {
  4899. case 0:
  4900. add_index_long(return_value, inx, chars[inx]);
  4901. break;
  4902. case 1:
  4903. if (chars[inx] != 0) {
  4904. add_index_long(return_value, inx, chars[inx]);
  4905. }
  4906. break;
  4907. case 2:
  4908. if (chars[inx] == 0) {
  4909. add_index_long(return_value, inx, chars[inx]);
  4910. }
  4911. break;
  4912. case 3:
  4913. if (chars[inx] != 0) {
  4914. retstr[retlen++] = inx;
  4915. }
  4916. break;
  4917. case 4:
  4918. if (chars[inx] == 0) {
  4919. retstr[retlen++] = inx;
  4920. }
  4921. break;
  4922. }
  4923. }
  4924. if (mymode >= 3 && mymode <= 4) {
  4925. RETURN_STRINGL(retstr, retlen);
  4926. }
  4927. }
  4928. /* }}} */
  4929. /* {{{ php_strnatcmp
  4930. */
  4931. static void php_strnatcmp(INTERNAL_FUNCTION_PARAMETERS, int fold_case)
  4932. {
  4933. zend_string *s1, *s2;
  4934. ZEND_PARSE_PARAMETERS_START(2, 2)
  4935. Z_PARAM_STR(s1)
  4936. Z_PARAM_STR(s2)
  4937. ZEND_PARSE_PARAMETERS_END();
  4938. RETURN_LONG(strnatcmp_ex(ZSTR_VAL(s1), ZSTR_LEN(s1),
  4939. ZSTR_VAL(s2), ZSTR_LEN(s2),
  4940. fold_case));
  4941. }
  4942. /* }}} */
  4943. PHPAPI int string_natural_compare_function_ex(zval *result, zval *op1, zval *op2, zend_bool case_insensitive) /* {{{ */
  4944. {
  4945. zend_string *tmp_str1, *tmp_str2;
  4946. zend_string *str1 = zval_get_tmp_string(op1, &tmp_str1);
  4947. zend_string *str2 = zval_get_tmp_string(op2, &tmp_str2);
  4948. ZVAL_LONG(result, strnatcmp_ex(ZSTR_VAL(str1), ZSTR_LEN(str1), ZSTR_VAL(str2), ZSTR_LEN(str2), case_insensitive));
  4949. zend_tmp_string_release(tmp_str1);
  4950. zend_tmp_string_release(tmp_str2);
  4951. return SUCCESS;
  4952. }
  4953. /* }}} */
  4954. PHPAPI int string_natural_case_compare_function(zval *result, zval *op1, zval *op2) /* {{{ */
  4955. {
  4956. return string_natural_compare_function_ex(result, op1, op2, 1);
  4957. }
  4958. /* }}} */
  4959. PHPAPI int string_natural_compare_function(zval *result, zval *op1, zval *op2) /* {{{ */
  4960. {
  4961. return string_natural_compare_function_ex(result, op1, op2, 0);
  4962. }
  4963. /* }}} */
  4964. /* {{{ proto int strnatcmp(string s1, string s2)
  4965. Returns the result of string comparison using 'natural' algorithm */
  4966. PHP_FUNCTION(strnatcmp)
  4967. {
  4968. php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  4969. }
  4970. /* }}} */
  4971. /* {{{ proto array localeconv(void)
  4972. Returns numeric formatting information based on the current locale */
  4973. PHP_FUNCTION(localeconv)
  4974. {
  4975. zval grouping, mon_grouping;
  4976. int len, i;
  4977. /* We don't need no stinkin' parameters... */
  4978. if (zend_parse_parameters_none() == FAILURE) {
  4979. return;
  4980. }
  4981. array_init(return_value);
  4982. array_init(&grouping);
  4983. array_init(&mon_grouping);
  4984. #ifdef HAVE_LOCALECONV
  4985. {
  4986. struct lconv currlocdata;
  4987. localeconv_r( &currlocdata );
  4988. /* Grab the grouping data out of the array */
  4989. len = (int)strlen(currlocdata.grouping);
  4990. for (i = 0; i < len; i++) {
  4991. add_index_long(&grouping, i, currlocdata.grouping[i]);
  4992. }
  4993. /* Grab the monetary grouping data out of the array */
  4994. len = (int)strlen(currlocdata.mon_grouping);
  4995. for (i = 0; i < len; i++) {
  4996. add_index_long(&mon_grouping, i, currlocdata.mon_grouping[i]);
  4997. }
  4998. add_assoc_string(return_value, "decimal_point", currlocdata.decimal_point);
  4999. add_assoc_string(return_value, "thousands_sep", currlocdata.thousands_sep);
  5000. add_assoc_string(return_value, "int_curr_symbol", currlocdata.int_curr_symbol);
  5001. add_assoc_string(return_value, "currency_symbol", currlocdata.currency_symbol);
  5002. add_assoc_string(return_value, "mon_decimal_point", currlocdata.mon_decimal_point);
  5003. add_assoc_string(return_value, "mon_thousands_sep", currlocdata.mon_thousands_sep);
  5004. add_assoc_string(return_value, "positive_sign", currlocdata.positive_sign);
  5005. add_assoc_string(return_value, "negative_sign", currlocdata.negative_sign);
  5006. add_assoc_long( return_value, "int_frac_digits", currlocdata.int_frac_digits);
  5007. add_assoc_long( return_value, "frac_digits", currlocdata.frac_digits);
  5008. add_assoc_long( return_value, "p_cs_precedes", currlocdata.p_cs_precedes);
  5009. add_assoc_long( return_value, "p_sep_by_space", currlocdata.p_sep_by_space);
  5010. add_assoc_long( return_value, "n_cs_precedes", currlocdata.n_cs_precedes);
  5011. add_assoc_long( return_value, "n_sep_by_space", currlocdata.n_sep_by_space);
  5012. add_assoc_long( return_value, "p_sign_posn", currlocdata.p_sign_posn);
  5013. add_assoc_long( return_value, "n_sign_posn", currlocdata.n_sign_posn);
  5014. }
  5015. #else
  5016. /* Ok, it doesn't look like we have locale info floating around, so I guess it
  5017. wouldn't hurt to just go ahead and return the POSIX locale information? */
  5018. add_index_long(&grouping, 0, -1);
  5019. add_index_long(&mon_grouping, 0, -1);
  5020. add_assoc_string(return_value, "decimal_point", "\x2E");
  5021. add_assoc_string(return_value, "thousands_sep", "");
  5022. add_assoc_string(return_value, "int_curr_symbol", "");
  5023. add_assoc_string(return_value, "currency_symbol", "");
  5024. add_assoc_string(return_value, "mon_decimal_point", "\x2E");
  5025. add_assoc_string(return_value, "mon_thousands_sep", "");
  5026. add_assoc_string(return_value, "positive_sign", "");
  5027. add_assoc_string(return_value, "negative_sign", "");
  5028. add_assoc_long( return_value, "int_frac_digits", CHAR_MAX);
  5029. add_assoc_long( return_value, "frac_digits", CHAR_MAX);
  5030. add_assoc_long( return_value, "p_cs_precedes", CHAR_MAX);
  5031. add_assoc_long( return_value, "p_sep_by_space", CHAR_MAX);
  5032. add_assoc_long( return_value, "n_cs_precedes", CHAR_MAX);
  5033. add_assoc_long( return_value, "n_sep_by_space", CHAR_MAX);
  5034. add_assoc_long( return_value, "p_sign_posn", CHAR_MAX);
  5035. add_assoc_long( return_value, "n_sign_posn", CHAR_MAX);
  5036. #endif
  5037. zend_hash_str_update(Z_ARRVAL_P(return_value), "grouping", sizeof("grouping")-1, &grouping);
  5038. zend_hash_str_update(Z_ARRVAL_P(return_value), "mon_grouping", sizeof("mon_grouping")-1, &mon_grouping);
  5039. }
  5040. /* }}} */
  5041. /* {{{ proto int strnatcasecmp(string s1, string s2)
  5042. Returns the result of case-insensitive string comparison using 'natural' algorithm */
  5043. PHP_FUNCTION(strnatcasecmp)
  5044. {
  5045. php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  5046. }
  5047. /* }}} */
  5048. /* {{{ proto int substr_count(string haystack, string needle [, int offset [, int length]])
  5049. Returns the number of times a substring occurs in the string */
  5050. PHP_FUNCTION(substr_count)
  5051. {
  5052. char *haystack, *needle;
  5053. zend_long offset = 0, length = 0;
  5054. int ac = ZEND_NUM_ARGS();
  5055. zend_long count = 0;
  5056. size_t haystack_len, needle_len;
  5057. const char *p, *endp;
  5058. char cmp;
  5059. ZEND_PARSE_PARAMETERS_START(2, 4)
  5060. Z_PARAM_STRING(haystack, haystack_len)
  5061. Z_PARAM_STRING(needle, needle_len)
  5062. Z_PARAM_OPTIONAL
  5063. Z_PARAM_LONG(offset)
  5064. Z_PARAM_LONG(length)
  5065. ZEND_PARSE_PARAMETERS_END();
  5066. if (needle_len == 0) {
  5067. php_error_docref(NULL, E_WARNING, "Empty substring");
  5068. RETURN_FALSE;
  5069. }
  5070. p = haystack;
  5071. endp = p + haystack_len;
  5072. if (offset < 0) {
  5073. offset += (zend_long)haystack_len;
  5074. }
  5075. if ((offset < 0) || ((size_t)offset > haystack_len)) {
  5076. php_error_docref(NULL, E_WARNING, "Offset not contained in string");
  5077. RETURN_FALSE;
  5078. }
  5079. p += offset;
  5080. if (ac == 4) {
  5081. if (length < 0) {
  5082. length += (haystack_len - offset);
  5083. }
  5084. if (length < 0 || ((size_t)length > (haystack_len - offset))) {
  5085. php_error_docref(NULL, E_WARNING, "Invalid length value");
  5086. RETURN_FALSE;
  5087. }
  5088. endp = p + length;
  5089. }
  5090. if (needle_len == 1) {
  5091. cmp = needle[0];
  5092. while ((p = memchr(p, cmp, endp - p))) {
  5093. count++;
  5094. p++;
  5095. }
  5096. } else {
  5097. while ((p = (char*)php_memnstr(p, needle, needle_len, endp))) {
  5098. p += needle_len;
  5099. count++;
  5100. }
  5101. }
  5102. RETURN_LONG(count);
  5103. }
  5104. /* }}} */
  5105. /* {{{ proto string str_pad(string input, int pad_length [, string pad_string [, int pad_type]])
  5106. Returns input string padded on the left or right to specified length with pad_string */
  5107. PHP_FUNCTION(str_pad)
  5108. {
  5109. /* Input arguments */
  5110. zend_string *input; /* Input string */
  5111. zend_long pad_length; /* Length to pad to */
  5112. /* Helper variables */
  5113. size_t num_pad_chars; /* Number of padding characters (total - input size) */
  5114. char *pad_str = " "; /* Pointer to padding string */
  5115. size_t pad_str_len = 1;
  5116. zend_long pad_type_val = STR_PAD_RIGHT; /* The padding type value */
  5117. size_t i, left_pad=0, right_pad=0;
  5118. zend_string *result = NULL; /* Resulting string */
  5119. ZEND_PARSE_PARAMETERS_START(2, 4)
  5120. Z_PARAM_STR(input)
  5121. Z_PARAM_LONG(pad_length)
  5122. Z_PARAM_OPTIONAL
  5123. Z_PARAM_STRING(pad_str, pad_str_len)
  5124. Z_PARAM_LONG(pad_type_val)
  5125. ZEND_PARSE_PARAMETERS_END();
  5126. /* If resulting string turns out to be shorter than input string,
  5127. we simply copy the input and return. */
  5128. if (pad_length < 0 || (size_t)pad_length <= ZSTR_LEN(input)) {
  5129. RETURN_STR_COPY(input);
  5130. }
  5131. if (pad_str_len == 0) {
  5132. php_error_docref(NULL, E_WARNING, "Padding string cannot be empty");
  5133. return;
  5134. }
  5135. if (pad_type_val < STR_PAD_LEFT || pad_type_val > STR_PAD_BOTH) {
  5136. php_error_docref(NULL, E_WARNING, "Padding type has to be STR_PAD_LEFT, STR_PAD_RIGHT, or STR_PAD_BOTH");
  5137. return;
  5138. }
  5139. num_pad_chars = pad_length - ZSTR_LEN(input);
  5140. if (num_pad_chars >= INT_MAX) {
  5141. php_error_docref(NULL, E_WARNING, "Padding length is too long");
  5142. return;
  5143. }
  5144. result = zend_string_safe_alloc(1, ZSTR_LEN(input), num_pad_chars, 0);
  5145. ZSTR_LEN(result) = 0;
  5146. /* We need to figure out the left/right padding lengths. */
  5147. switch (pad_type_val) {
  5148. case STR_PAD_RIGHT:
  5149. left_pad = 0;
  5150. right_pad = num_pad_chars;
  5151. break;
  5152. case STR_PAD_LEFT:
  5153. left_pad = num_pad_chars;
  5154. right_pad = 0;
  5155. break;
  5156. case STR_PAD_BOTH:
  5157. left_pad = num_pad_chars / 2;
  5158. right_pad = num_pad_chars - left_pad;
  5159. break;
  5160. }
  5161. /* First we pad on the left. */
  5162. for (i = 0; i < left_pad; i++)
  5163. ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
  5164. /* Then we copy the input string. */
  5165. memcpy(ZSTR_VAL(result) + ZSTR_LEN(result), ZSTR_VAL(input), ZSTR_LEN(input));
  5166. ZSTR_LEN(result) += ZSTR_LEN(input);
  5167. /* Finally, we pad on the right. */
  5168. for (i = 0; i < right_pad; i++)
  5169. ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
  5170. ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
  5171. RETURN_NEW_STR(result);
  5172. }
  5173. /* }}} */
  5174. /* {{{ proto mixed sscanf(string str, string format [, string ...])
  5175. Implements an ANSI C compatible sscanf */
  5176. PHP_FUNCTION(sscanf)
  5177. {
  5178. zval *args = NULL;
  5179. char *str, *format;
  5180. size_t str_len, format_len;
  5181. int result, num_args = 0;
  5182. ZEND_PARSE_PARAMETERS_START(2, -1)
  5183. Z_PARAM_STRING(str, str_len)
  5184. Z_PARAM_STRING(format, format_len)
  5185. Z_PARAM_VARIADIC('*', args, num_args)
  5186. ZEND_PARSE_PARAMETERS_END();
  5187. result = php_sscanf_internal(str, format, num_args, args, 0, return_value);
  5188. if (SCAN_ERROR_WRONG_PARAM_COUNT == result) {
  5189. WRONG_PARAM_COUNT;
  5190. }
  5191. }
  5192. /* }}} */
  5193. /* static zend_string *php_str_rot13(zend_string *str) {{{ */
  5194. #ifdef __SSE2__
  5195. #include <emmintrin.h>
  5196. #endif
  5197. static zend_string *php_str_rot13(zend_string *str)
  5198. {
  5199. zend_string *ret;
  5200. const char *p, *e;
  5201. char *target;
  5202. if (UNEXPECTED(ZSTR_LEN(str) == 0)) {
  5203. return ZSTR_EMPTY_ALLOC();
  5204. }
  5205. ret = zend_string_alloc(ZSTR_LEN(str), 0);
  5206. p = ZSTR_VAL(str);
  5207. e = p + ZSTR_LEN(str);
  5208. target = ZSTR_VAL(ret);
  5209. #ifdef __SSE2__
  5210. if (e - p > 15) {
  5211. const __m128i a_minus_1 = _mm_set1_epi8('a' - 1);
  5212. const __m128i m_plus_1 = _mm_set1_epi8('m' + 1);
  5213. const __m128i n_minus_1 = _mm_set1_epi8('n' - 1);
  5214. const __m128i z_plus_1 = _mm_set1_epi8('z' + 1);
  5215. const __m128i A_minus_1 = _mm_set1_epi8('A' - 1);
  5216. const __m128i M_plus_1 = _mm_set1_epi8('M' + 1);
  5217. const __m128i N_minus_1 = _mm_set1_epi8('N' - 1);
  5218. const __m128i Z_plus_1 = _mm_set1_epi8('Z' + 1);
  5219. const __m128i add = _mm_set1_epi8(13);
  5220. const __m128i sub = _mm_set1_epi8(-13);
  5221. do {
  5222. __m128i in, gt, lt, cmp, delta;
  5223. delta = _mm_setzero_si128();
  5224. in = _mm_loadu_si128((__m128i *)p);
  5225. gt = _mm_cmpgt_epi8(in, a_minus_1);
  5226. lt = _mm_cmplt_epi8(in, m_plus_1);
  5227. cmp = _mm_and_si128(lt, gt);
  5228. if (_mm_movemask_epi8(cmp)) {
  5229. cmp = _mm_and_si128(cmp, add);
  5230. delta = _mm_or_si128(delta, cmp);
  5231. }
  5232. gt = _mm_cmpgt_epi8(in, n_minus_1);
  5233. lt = _mm_cmplt_epi8(in, z_plus_1);
  5234. cmp = _mm_and_si128(lt, gt);
  5235. if (_mm_movemask_epi8(cmp)) {
  5236. cmp = _mm_and_si128(cmp, sub);
  5237. delta = _mm_or_si128(delta, cmp);
  5238. }
  5239. gt = _mm_cmpgt_epi8(in, A_minus_1);
  5240. lt = _mm_cmplt_epi8(in, M_plus_1);
  5241. cmp = _mm_and_si128(lt, gt);
  5242. if (_mm_movemask_epi8(cmp)) {
  5243. cmp = _mm_and_si128(cmp, add);
  5244. delta = _mm_or_si128(delta, cmp);
  5245. }
  5246. gt = _mm_cmpgt_epi8(in, N_minus_1);
  5247. lt = _mm_cmplt_epi8(in, Z_plus_1);
  5248. cmp = _mm_and_si128(lt, gt);
  5249. if (_mm_movemask_epi8(cmp)) {
  5250. cmp = _mm_and_si128(cmp, sub);
  5251. delta = _mm_or_si128(delta, cmp);
  5252. }
  5253. in = _mm_add_epi8(in, delta);
  5254. _mm_storeu_si128((__m128i *)target, in);
  5255. p += 16;
  5256. target += 16;
  5257. } while (e - p > 15);
  5258. }
  5259. #endif
  5260. while (p < e) {
  5261. if (*p >= 'a' && *p <= 'z') {
  5262. *target++ = 'a' + (((*p++ - 'a') + 13) % 26);
  5263. } else if (*p >= 'A' && *p <= 'Z') {
  5264. *target++ = 'A' + (((*p++ - 'A') + 13) % 26);
  5265. } else {
  5266. *target++ = *p++;
  5267. }
  5268. }
  5269. *target = '\0';
  5270. return ret;
  5271. }
  5272. /* }}} */
  5273. /* {{{ proto string str_rot13(string str)
  5274. Perform the rot13 transform on a string */
  5275. PHP_FUNCTION(str_rot13)
  5276. {
  5277. zend_string *arg;
  5278. ZEND_PARSE_PARAMETERS_START(1, 1)
  5279. Z_PARAM_STR(arg)
  5280. ZEND_PARSE_PARAMETERS_END();
  5281. RETURN_STR(php_str_rot13(arg));
  5282. }
  5283. /* }}} */
  5284. static void php_string_shuffle(char *str, zend_long len) /* {{{ */
  5285. {
  5286. zend_long n_elems, rnd_idx, n_left;
  5287. char temp;
  5288. /* The implementation is stolen from array_data_shuffle */
  5289. /* Thus the characteristics of the randomization are the same */
  5290. n_elems = len;
  5291. if (n_elems <= 1) {
  5292. return;
  5293. }
  5294. n_left = n_elems;
  5295. while (--n_left) {
  5296. rnd_idx = php_mt_rand_range(0, n_left);
  5297. if (rnd_idx != n_left) {
  5298. temp = str[n_left];
  5299. str[n_left] = str[rnd_idx];
  5300. str[rnd_idx] = temp;
  5301. }
  5302. }
  5303. }
  5304. /* }}} */
  5305. /* {{{ proto void str_shuffle(string str)
  5306. Shuffles string. One permutation of all possible is created */
  5307. PHP_FUNCTION(str_shuffle)
  5308. {
  5309. zend_string *arg;
  5310. ZEND_PARSE_PARAMETERS_START(1, 1)
  5311. Z_PARAM_STR(arg)
  5312. ZEND_PARSE_PARAMETERS_END();
  5313. RETVAL_STRINGL(ZSTR_VAL(arg), ZSTR_LEN(arg));
  5314. if (Z_STRLEN_P(return_value) > 1) {
  5315. php_string_shuffle(Z_STRVAL_P(return_value), (zend_long) Z_STRLEN_P(return_value));
  5316. }
  5317. }
  5318. /* }}} */
  5319. /* {{{ proto mixed str_word_count(string str, [int format [, string charlist]])
  5320. Counts the number of words inside a string. If format of 1 is specified,
  5321. then the function will return an array containing all the words
  5322. found inside the string. If format of 2 is specified, then the function
  5323. will return an associated array where the position of the word is the key
  5324. and the word itself is the value.
  5325. For the purpose of this function, 'word' is defined as a locale dependent
  5326. string containing alphabetic characters, which also may contain, but not start
  5327. with "'" and "-" characters.
  5328. */
  5329. PHP_FUNCTION(str_word_count)
  5330. {
  5331. zend_string *str;
  5332. char *char_list = NULL, ch[256];
  5333. const char *p, *e, *s;
  5334. size_t char_list_len = 0, word_count = 0;
  5335. zend_long type = 0;
  5336. ZEND_PARSE_PARAMETERS_START(1, 3)
  5337. Z_PARAM_STR(str)
  5338. Z_PARAM_OPTIONAL
  5339. Z_PARAM_LONG(type)
  5340. Z_PARAM_STRING(char_list, char_list_len)
  5341. ZEND_PARSE_PARAMETERS_END();
  5342. switch(type) {
  5343. case 1:
  5344. case 2:
  5345. array_init(return_value);
  5346. if (!ZSTR_LEN(str)) {
  5347. return;
  5348. }
  5349. break;
  5350. case 0:
  5351. if (!ZSTR_LEN(str)) {
  5352. RETURN_LONG(0);
  5353. }
  5354. /* nothing to be done */
  5355. break;
  5356. default:
  5357. php_error_docref(NULL, E_WARNING, "Invalid format value " ZEND_LONG_FMT, type);
  5358. RETURN_FALSE;
  5359. }
  5360. if (char_list) {
  5361. php_charmask((unsigned char *)char_list, char_list_len, ch);
  5362. }
  5363. p = ZSTR_VAL(str);
  5364. e = ZSTR_VAL(str) + ZSTR_LEN(str);
  5365. /* first character cannot be ' or -, unless explicitly allowed by the user */
  5366. if ((*p == '\'' && (!char_list || !ch['\''])) || (*p == '-' && (!char_list || !ch['-']))) {
  5367. p++;
  5368. }
  5369. /* last character cannot be -, unless explicitly allowed by the user */
  5370. if (*(e - 1) == '-' && (!char_list || !ch['-'])) {
  5371. e--;
  5372. }
  5373. while (p < e) {
  5374. s = p;
  5375. while (p < e && (isalpha((unsigned char)*p) || (char_list && ch[(unsigned char)*p]) || *p == '\'' || *p == '-')) {
  5376. p++;
  5377. }
  5378. if (p > s) {
  5379. switch (type)
  5380. {
  5381. case 1:
  5382. add_next_index_stringl(return_value, s, p - s);
  5383. break;
  5384. case 2:
  5385. add_index_stringl(return_value, (s - ZSTR_VAL(str)), s, p - s);
  5386. break;
  5387. default:
  5388. word_count++;
  5389. break;
  5390. }
  5391. }
  5392. p++;
  5393. }
  5394. if (!type) {
  5395. RETURN_LONG(word_count);
  5396. }
  5397. }
  5398. /* }}} */
  5399. #if HAVE_STRFMON
  5400. /* {{{ proto string money_format(string format , float value)
  5401. Convert monetary value(s) to string */
  5402. PHP_FUNCTION(money_format)
  5403. {
  5404. size_t format_len = 0;
  5405. char *format, *p, *e;
  5406. double value;
  5407. zend_bool check = 0;
  5408. zend_string *str;
  5409. ssize_t res_len;
  5410. ZEND_PARSE_PARAMETERS_START(2, 2)
  5411. Z_PARAM_STRING(format, format_len)
  5412. Z_PARAM_DOUBLE(value)
  5413. ZEND_PARSE_PARAMETERS_END();
  5414. p = format;
  5415. e = p + format_len;
  5416. while ((p = memchr(p, '%', (e - p)))) {
  5417. if (*(p + 1) == '%') {
  5418. p += 2;
  5419. } else if (!check) {
  5420. check = 1;
  5421. p++;
  5422. } else {
  5423. php_error_docref(NULL, E_WARNING, "Only a single %%i or %%n token can be used");
  5424. RETURN_FALSE;
  5425. }
  5426. }
  5427. str = zend_string_safe_alloc(format_len, 1, 1024, 0);
  5428. if ((res_len = strfmon(ZSTR_VAL(str), ZSTR_LEN(str), format, value)) < 0) {
  5429. zend_string_efree(str);
  5430. RETURN_FALSE;
  5431. }
  5432. #ifdef _AIX
  5433. /*
  5434. On AIX strfmon seems to include the terminating \0 in the length returned by strfmon,
  5435. despite the documentation indicating it is not included.
  5436. */
  5437. ZSTR_LEN(str) = strlen(ZSTR_VAL(str));
  5438. #else
  5439. ZSTR_LEN(str) = (size_t)res_len;
  5440. #endif
  5441. ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
  5442. RETURN_NEW_STR(zend_string_truncate(str, ZSTR_LEN(str), 0));
  5443. }
  5444. /* }}} */
  5445. #endif
  5446. /* {{{ proto array str_split(string str [, int split_length])
  5447. Convert a string to an array. If split_length is specified, break the string down into chunks each split_length characters long. */
  5448. PHP_FUNCTION(str_split)
  5449. {
  5450. zend_string *str;
  5451. zend_long split_length = 1;
  5452. const char *p;
  5453. size_t n_reg_segments;
  5454. ZEND_PARSE_PARAMETERS_START(1, 2)
  5455. Z_PARAM_STR(str)
  5456. Z_PARAM_OPTIONAL
  5457. Z_PARAM_LONG(split_length)
  5458. ZEND_PARSE_PARAMETERS_END();
  5459. if (split_length <= 0) {
  5460. php_error_docref(NULL, E_WARNING, "The length of each segment must be greater than zero");
  5461. RETURN_FALSE;
  5462. }
  5463. if (0 == ZSTR_LEN(str) || (size_t)split_length >= ZSTR_LEN(str)) {
  5464. array_init_size(return_value, 1);
  5465. add_next_index_stringl(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
  5466. return;
  5467. }
  5468. array_init_size(return_value, (uint32_t)(((ZSTR_LEN(str) - 1) / split_length) + 1));
  5469. n_reg_segments = ZSTR_LEN(str) / split_length;
  5470. p = ZSTR_VAL(str);
  5471. while (n_reg_segments-- > 0) {
  5472. add_next_index_stringl(return_value, p, split_length);
  5473. p += split_length;
  5474. }
  5475. if (p != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
  5476. add_next_index_stringl(return_value, p, (ZSTR_VAL(str) + ZSTR_LEN(str) - p));
  5477. }
  5478. }
  5479. /* }}} */
  5480. /* {{{ proto array strpbrk(string haystack, string char_list)
  5481. Search a string for any of a set of characters */
  5482. PHP_FUNCTION(strpbrk)
  5483. {
  5484. zend_string *haystack, *char_list;
  5485. const char *haystack_ptr, *cl_ptr;
  5486. ZEND_PARSE_PARAMETERS_START(2, 2)
  5487. Z_PARAM_STR(haystack)
  5488. Z_PARAM_STR(char_list)
  5489. ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
  5490. if (!ZSTR_LEN(char_list)) {
  5491. php_error_docref(NULL, E_WARNING, "The character list cannot be empty");
  5492. RETURN_FALSE;
  5493. }
  5494. for (haystack_ptr = ZSTR_VAL(haystack); haystack_ptr < (ZSTR_VAL(haystack) + ZSTR_LEN(haystack)); ++haystack_ptr) {
  5495. for (cl_ptr = ZSTR_VAL(char_list); cl_ptr < (ZSTR_VAL(char_list) + ZSTR_LEN(char_list)); ++cl_ptr) {
  5496. if (*cl_ptr == *haystack_ptr) {
  5497. RETURN_STRINGL(haystack_ptr, (ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - haystack_ptr));
  5498. }
  5499. }
  5500. }
  5501. RETURN_FALSE;
  5502. }
  5503. /* }}} */
  5504. /* {{{ proto int substr_compare(string main_str, string str, int offset [, int length [, bool case_sensitivity]])
  5505. Binary safe optionally case insensitive comparison of 2 strings from an offset, up to length characters */
  5506. PHP_FUNCTION(substr_compare)
  5507. {
  5508. zend_string *s1, *s2;
  5509. zend_long offset, len=0;
  5510. zend_bool len_is_default=1;
  5511. zend_bool cs=0;
  5512. size_t cmp_len;
  5513. ZEND_PARSE_PARAMETERS_START(3, 5)
  5514. Z_PARAM_STR(s1)
  5515. Z_PARAM_STR(s2)
  5516. Z_PARAM_LONG(offset)
  5517. Z_PARAM_OPTIONAL
  5518. Z_PARAM_LONG_EX(len, len_is_default, 1, 0)
  5519. Z_PARAM_BOOL(cs)
  5520. ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
  5521. if (!len_is_default && len <= 0) {
  5522. if (len == 0) {
  5523. RETURN_LONG(0L);
  5524. } else {
  5525. php_error_docref(NULL, E_WARNING, "The length must be greater than or equal to zero");
  5526. RETURN_FALSE;
  5527. }
  5528. }
  5529. if (offset < 0) {
  5530. offset = ZSTR_LEN(s1) + offset;
  5531. offset = (offset < 0) ? 0 : offset;
  5532. }
  5533. if ((size_t)offset > ZSTR_LEN(s1)) {
  5534. php_error_docref(NULL, E_WARNING, "The start position cannot exceed initial string length");
  5535. RETURN_FALSE;
  5536. }
  5537. cmp_len = len ? (size_t)len : MAX(ZSTR_LEN(s2), (ZSTR_LEN(s1) - offset));
  5538. if (!cs) {
  5539. RETURN_LONG(zend_binary_strncmp(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
  5540. } else {
  5541. RETURN_LONG(zend_binary_strncasecmp_l(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
  5542. }
  5543. }
  5544. /* }}} */
  5545. /* {{{ */
  5546. static zend_string *php_utf8_encode(const char *s, size_t len)
  5547. {
  5548. size_t pos = len;
  5549. zend_string *str;
  5550. unsigned char c;
  5551. str = zend_string_safe_alloc(len, 2, 0, 0);
  5552. ZSTR_LEN(str) = 0;
  5553. while (pos > 0) {
  5554. /* The lower 256 codepoints of Unicode are identical to Latin-1,
  5555. * so we don't need to do any mapping here. */
  5556. c = (unsigned char)(*s);
  5557. if (c < 0x80) {
  5558. ZSTR_VAL(str)[ZSTR_LEN(str)++] = (char) c;
  5559. /* We only account for the single-byte and two-byte cases because
  5560. * we're only dealing with the first 256 Unicode codepoints. */
  5561. } else {
  5562. ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0xc0 | (c >> 6));
  5563. ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0x80 | (c & 0x3f));
  5564. }
  5565. pos--;
  5566. s++;
  5567. }
  5568. ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
  5569. str = zend_string_truncate(str, ZSTR_LEN(str), 0);
  5570. return str;
  5571. }
  5572. /* }}} */
  5573. /* {{{ */
  5574. static zend_string *php_utf8_decode(const char *s, size_t len)
  5575. {
  5576. size_t pos = 0;
  5577. unsigned int c;
  5578. zend_string *str;
  5579. str = zend_string_alloc(len, 0);
  5580. ZSTR_LEN(str) = 0;
  5581. while (pos < len) {
  5582. int status = FAILURE;
  5583. c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
  5584. /* The lower 256 codepoints of Unicode are identical to Latin-1,
  5585. * so we don't need to do any mapping here beyond replacing non-Latin-1
  5586. * characters. */
  5587. if (status == FAILURE || c > 0xFFU) {
  5588. c = '?';
  5589. }
  5590. ZSTR_VAL(str)[ZSTR_LEN(str)++] = c;
  5591. }
  5592. ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
  5593. if (ZSTR_LEN(str) < len) {
  5594. str = zend_string_truncate(str, ZSTR_LEN(str), 0);
  5595. }
  5596. return str;
  5597. }
  5598. /* }}} */
  5599. /* {{{ proto string utf8_encode(string data)
  5600. Encodes an ISO-8859-1 string to UTF-8 */
  5601. PHP_FUNCTION(utf8_encode)
  5602. {
  5603. char *arg;
  5604. size_t arg_len;
  5605. ZEND_PARSE_PARAMETERS_START(1, 1)
  5606. Z_PARAM_STRING(arg, arg_len)
  5607. ZEND_PARSE_PARAMETERS_END();
  5608. RETURN_STR(php_utf8_encode(arg, arg_len));
  5609. }
  5610. /* }}} */
  5611. /* {{{ proto string utf8_decode(string data)
  5612. Converts a UTF-8 encoded string to ISO-8859-1 */
  5613. PHP_FUNCTION(utf8_decode)
  5614. {
  5615. char *arg;
  5616. size_t arg_len;
  5617. ZEND_PARSE_PARAMETERS_START(1, 1)
  5618. Z_PARAM_STRING(arg, arg_len)
  5619. ZEND_PARSE_PARAMETERS_END();
  5620. RETURN_STR(php_utf8_decode(arg, arg_len));
  5621. }
  5622. /* }}} */
  5623. /*
  5624. * Local variables:
  5625. * tab-width: 4
  5626. * c-basic-offset: 4
  5627. * End:
  5628. * vim600: noet sw=4 ts=4 fdm=marker
  5629. * vim<600: noet sw=4 ts=4
  5630. */