string.c 143 KB


  1. /*
  2. +----------------------------------------------------------------------+
  3. | Copyright (c) The PHP Group |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | https://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Authors: Rasmus Lerdorf <rasmus@php.net> |
  14. | Stig Sæther Bakken <ssb@php.net> |
  15. | Zeev Suraski <zeev@php.net> |
  16. +----------------------------------------------------------------------+
  17. */
  18. #include <stdio.h>
  19. #include "php.h"
  20. #include "php_rand.h"
  21. #include "php_string.h"
  22. #include "php_variables.h"
  23. #include <locale.h>
  24. #ifdef HAVE_LANGINFO_H
  25. # include <langinfo.h>
  26. #endif
  27. #ifdef HAVE_LIBINTL
  28. # include <libintl.h> /* For LC_MESSAGES */
  29. #endif
  30. #include "scanf.h"
  31. #include "zend_API.h"
  32. #include "zend_execute.h"
  33. #include "php_globals.h"
  34. #include "basic_functions.h"
  35. #include "zend_smart_str.h"
  36. #include <Zend/zend_exceptions.h>
  37. #ifdef ZTS
  38. #include "TSRM.h"
  39. #endif
  40. /* For str_getcsv() support */
  41. #include "ext/standard/file.h"
  42. /* For php_next_utf8_char() */
  43. #include "ext/standard/html.h"
  44. #define STR_PAD_LEFT 0
  45. #define STR_PAD_RIGHT 1
  46. #define STR_PAD_BOTH 2
  47. #define PHP_PATHINFO_DIRNAME 1
  48. #define PHP_PATHINFO_BASENAME 2
  49. #define PHP_PATHINFO_EXTENSION 4
  50. #define PHP_PATHINFO_FILENAME 8
  51. #define PHP_PATHINFO_ALL (PHP_PATHINFO_DIRNAME | PHP_PATHINFO_BASENAME | PHP_PATHINFO_EXTENSION | PHP_PATHINFO_FILENAME)
  52. #define STR_STRSPN 0
  53. #define STR_STRCSPN 1
  54. /* {{{ register_string_constants */
  55. void register_string_constants(INIT_FUNC_ARGS)
  56. {
  57. REGISTER_LONG_CONSTANT("STR_PAD_LEFT", STR_PAD_LEFT, CONST_CS | CONST_PERSISTENT);
  58. REGISTER_LONG_CONSTANT("STR_PAD_RIGHT", STR_PAD_RIGHT, CONST_CS | CONST_PERSISTENT);
  59. REGISTER_LONG_CONSTANT("STR_PAD_BOTH", STR_PAD_BOTH, CONST_CS | CONST_PERSISTENT);
  60. REGISTER_LONG_CONSTANT("PATHINFO_DIRNAME", PHP_PATHINFO_DIRNAME, CONST_CS | CONST_PERSISTENT);
  61. REGISTER_LONG_CONSTANT("PATHINFO_BASENAME", PHP_PATHINFO_BASENAME, CONST_CS | CONST_PERSISTENT);
  62. REGISTER_LONG_CONSTANT("PATHINFO_EXTENSION", PHP_PATHINFO_EXTENSION, CONST_CS | CONST_PERSISTENT);
  63. REGISTER_LONG_CONSTANT("PATHINFO_FILENAME", PHP_PATHINFO_FILENAME, CONST_CS | CONST_PERSISTENT);
  64. REGISTER_LONG_CONSTANT("PATHINFO_ALL", PHP_PATHINFO_ALL, CONST_CS | CONST_PERSISTENT);
  65. /* If last members of struct lconv equal CHAR_MAX, no grouping is done */
  66. REGISTER_LONG_CONSTANT("CHAR_MAX", CHAR_MAX, CONST_CS | CONST_PERSISTENT);
  67. REGISTER_LONG_CONSTANT("LC_CTYPE", LC_CTYPE, CONST_CS | CONST_PERSISTENT);
  68. REGISTER_LONG_CONSTANT("LC_NUMERIC", LC_NUMERIC, CONST_CS | CONST_PERSISTENT);
  69. REGISTER_LONG_CONSTANT("LC_TIME", LC_TIME, CONST_CS | CONST_PERSISTENT);
  70. REGISTER_LONG_CONSTANT("LC_COLLATE", LC_COLLATE, CONST_CS | CONST_PERSISTENT);
  71. REGISTER_LONG_CONSTANT("LC_MONETARY", LC_MONETARY, CONST_CS | CONST_PERSISTENT);
  72. REGISTER_LONG_CONSTANT("LC_ALL", LC_ALL, CONST_CS | CONST_PERSISTENT);
  73. # ifdef LC_MESSAGES
  74. REGISTER_LONG_CONSTANT("LC_MESSAGES", LC_MESSAGES, CONST_CS | CONST_PERSISTENT);
  75. # endif
  76. }
  77. /* }}} */
  78. int php_tag_find(char *tag, size_t len, const char *set);
  79. /* this is read-only, so it's ok */
  80. ZEND_SET_ALIGNED(16, static const char hexconvtab[]) = "0123456789abcdef";
  81. /* localeconv mutex */
  82. #ifdef ZTS
  83. static MUTEX_T locale_mutex = NULL;
  84. #endif
  85. /* {{{ php_bin2hex */
  86. static zend_string *php_bin2hex(const unsigned char *old, const size_t oldlen)
  87. {
  88. zend_string *result;
  89. size_t i, j;
  90. result = zend_string_safe_alloc(oldlen, 2 * sizeof(char), 0, 0);
  91. for (i = j = 0; i < oldlen; i++) {
  92. ZSTR_VAL(result)[j++] = hexconvtab[old[i] >> 4];
  93. ZSTR_VAL(result)[j++] = hexconvtab[old[i] & 15];
  94. }
  95. ZSTR_VAL(result)[j] = '\0';
  96. return result;
  97. }
  98. /* }}} */
  99. /* {{{ php_hex2bin */
  100. static zend_string *php_hex2bin(const unsigned char *old, const size_t oldlen)
  101. {
  102. size_t target_length = oldlen >> 1;
  103. zend_string *str = zend_string_alloc(target_length, 0);
  104. unsigned char *ret = (unsigned char *)ZSTR_VAL(str);
  105. size_t i, j;
  106. for (i = j = 0; i < target_length; i++) {
  107. unsigned char c = old[j++];
  108. unsigned char l = c & ~0x20;
  109. int is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
  110. unsigned char d;
  111. /* basically (c >= '0' && c <= '9') || (l >= 'A' && l <= 'F') */
  112. if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
  113. d = (l - 0x10 - 0x27 * is_letter) << 4;
  114. } else {
  115. zend_string_efree(str);
  116. return NULL;
  117. }
  118. c = old[j++];
  119. l = c & ~0x20;
  120. is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
  121. if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
  122. d |= l - 0x10 - 0x27 * is_letter;
  123. } else {
  124. zend_string_efree(str);
  125. return NULL;
  126. }
  127. ret[i] = d;
  128. }
  129. ret[i] = '\0';
  130. return str;
  131. }
  132. /* }}} */
  133. /* {{{ localeconv_r
  134. * glibc's localeconv is not reentrant, so lets make it so ... sorta */
  135. PHPAPI struct lconv *localeconv_r(struct lconv *out)
  136. {
  137. #ifdef ZTS
  138. tsrm_mutex_lock( locale_mutex );
  139. #endif
  140. /* cur->locinfo is struct __crt_locale_info which implementation is
  141. hidden in vc14. TODO revisit this and check if a workaround available
  142. and needed. */
  143. #if defined(PHP_WIN32) && _MSC_VER < 1900 && defined(ZTS)
  144. {
  145. /* Even with the enabled per thread locale, localeconv
  146. won't check any locale change in the master thread. */
  147. _locale_t cur = _get_current_locale();
  148. *out = *cur->locinfo->lconv;
  149. _free_locale(cur);
  150. }
  151. #else
  152. /* localeconv doesn't return an error condition */
  153. *out = *localeconv();
  154. #endif
  155. #ifdef ZTS
  156. tsrm_mutex_unlock( locale_mutex );
  157. #endif
  158. return out;
  159. }
  160. /* }}} */
  161. #ifdef ZTS
  162. /* {{{ PHP_MINIT_FUNCTION */
  163. PHP_MINIT_FUNCTION(localeconv)
  164. {
  165. locale_mutex = tsrm_mutex_alloc();
  166. return SUCCESS;
  167. }
  168. /* }}} */
  169. /* {{{ PHP_MSHUTDOWN_FUNCTION */
  170. PHP_MSHUTDOWN_FUNCTION(localeconv)
  171. {
  172. tsrm_mutex_free( locale_mutex );
  173. locale_mutex = NULL;
  174. return SUCCESS;
  175. }
  176. /* }}} */
  177. #endif
  178. /* {{{ Converts the binary representation of data to hex */
  179. PHP_FUNCTION(bin2hex)
  180. {
  181. zend_string *result;
  182. zend_string *data;
  183. ZEND_PARSE_PARAMETERS_START(1, 1)
  184. Z_PARAM_STR(data)
  185. ZEND_PARSE_PARAMETERS_END();
  186. result = php_bin2hex((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
  187. RETURN_STR(result);
  188. }
  189. /* }}} */
  190. /* {{{ Converts the hex representation of data to binary */
  191. PHP_FUNCTION(hex2bin)
  192. {
  193. zend_string *result, *data;
  194. ZEND_PARSE_PARAMETERS_START(1, 1)
  195. Z_PARAM_STR(data)
  196. ZEND_PARSE_PARAMETERS_END();
  197. if (ZSTR_LEN(data) % 2 != 0) {
  198. php_error_docref(NULL, E_WARNING, "Hexadecimal input string must have an even length");
  199. RETURN_FALSE;
  200. }
  201. result = php_hex2bin((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
  202. if (!result) {
  203. php_error_docref(NULL, E_WARNING, "Input string must be hexadecimal string");
  204. RETURN_FALSE;
  205. }
  206. RETVAL_STR(result);
  207. }
  208. /* }}} */
  209. static void php_spn_common_handler(INTERNAL_FUNCTION_PARAMETERS, int behavior) /* {{{ */
  210. {
  211. zend_string *s11, *s22;
  212. zend_long start = 0, len = 0;
  213. bool len_is_null = 1;
  214. ZEND_PARSE_PARAMETERS_START(2, 4)
  215. Z_PARAM_STR(s11)
  216. Z_PARAM_STR(s22)
  217. Z_PARAM_OPTIONAL
  218. Z_PARAM_LONG(start)
  219. Z_PARAM_LONG_OR_NULL(len, len_is_null)
  220. ZEND_PARSE_PARAMETERS_END();
  221. size_t remain_len = ZSTR_LEN(s11);
  222. if (start < 0) {
  223. start += remain_len;
  224. if (start < 0) {
  225. start = 0;
  226. }
  227. } else if ((size_t) start > remain_len) {
  228. start = remain_len;
  229. }
  230. remain_len -= start;
  231. if (!len_is_null) {
  232. if (len < 0) {
  233. len += remain_len;
  234. if (len < 0) {
  235. len = 0;
  236. }
  237. } else if ((size_t) len > remain_len) {
  238. len = remain_len;
  239. }
  240. } else {
  241. len = remain_len;
  242. }
  243. if (len == 0) {
  244. RETURN_LONG(0);
  245. }
  246. if (behavior == STR_STRSPN) {
  247. RETURN_LONG(php_strspn(ZSTR_VAL(s11) + start /*str1_start*/,
  248. ZSTR_VAL(s22) /*str2_start*/,
  249. ZSTR_VAL(s11) + start + len /*str1_end*/,
  250. ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
  251. } else {
  252. ZEND_ASSERT(behavior == STR_STRCSPN);
  253. RETURN_LONG(php_strcspn(ZSTR_VAL(s11) + start /*str1_start*/,
  254. ZSTR_VAL(s22) /*str2_start*/,
  255. ZSTR_VAL(s11) + start + len /*str1_end*/,
  256. ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
  257. }
  258. }
  259. /* }}} */
  260. /* {{{ Finds length of initial segment consisting entirely of characters found in mask. If start or/and length is provided works like strspn(substr($s,$start,$len),$good_chars) */
  261. PHP_FUNCTION(strspn)
  262. {
  263. php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, STR_STRSPN);
  264. }
  265. /* }}} */
  266. /* {{{ Finds length of initial segment consisting entirely of characters not found in mask. If start or/and length is provide works like strcspn(substr($s,$start,$len),$bad_chars) */
  267. PHP_FUNCTION(strcspn)
  268. {
  269. php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, STR_STRCSPN);
  270. }
  271. /* }}} */
  272. /* {{{ PHP_MINIT_FUNCTION(nl_langinfo) */
  273. #if HAVE_NL_LANGINFO
  274. PHP_MINIT_FUNCTION(nl_langinfo)
  275. {
  276. #define REGISTER_NL_LANGINFO_CONSTANT(x) REGISTER_LONG_CONSTANT(#x, x, CONST_CS | CONST_PERSISTENT)
  277. #ifdef ABDAY_1
  278. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_1);
  279. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_2);
  280. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_3);
  281. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_4);
  282. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_5);
  283. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_6);
  284. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_7);
  285. #endif
  286. #ifdef DAY_1
  287. REGISTER_NL_LANGINFO_CONSTANT(DAY_1);
  288. REGISTER_NL_LANGINFO_CONSTANT(DAY_2);
  289. REGISTER_NL_LANGINFO_CONSTANT(DAY_3);
  290. REGISTER_NL_LANGINFO_CONSTANT(DAY_4);
  291. REGISTER_NL_LANGINFO_CONSTANT(DAY_5);
  292. REGISTER_NL_LANGINFO_CONSTANT(DAY_6);
  293. REGISTER_NL_LANGINFO_CONSTANT(DAY_7);
  294. #endif
  295. #ifdef ABMON_1
  296. REGISTER_NL_LANGINFO_CONSTANT(ABMON_1);
  297. REGISTER_NL_LANGINFO_CONSTANT(ABMON_2);
  298. REGISTER_NL_LANGINFO_CONSTANT(ABMON_3);
  299. REGISTER_NL_LANGINFO_CONSTANT(ABMON_4);
  300. REGISTER_NL_LANGINFO_CONSTANT(ABMON_5);
  301. REGISTER_NL_LANGINFO_CONSTANT(ABMON_6);
  302. REGISTER_NL_LANGINFO_CONSTANT(ABMON_7);
  303. REGISTER_NL_LANGINFO_CONSTANT(ABMON_8);
  304. REGISTER_NL_LANGINFO_CONSTANT(ABMON_9);
  305. REGISTER_NL_LANGINFO_CONSTANT(ABMON_10);
  306. REGISTER_NL_LANGINFO_CONSTANT(ABMON_11);
  307. REGISTER_NL_LANGINFO_CONSTANT(ABMON_12);
  308. #endif
  309. #ifdef MON_1
  310. REGISTER_NL_LANGINFO_CONSTANT(MON_1);
  311. REGISTER_NL_LANGINFO_CONSTANT(MON_2);
  312. REGISTER_NL_LANGINFO_CONSTANT(MON_3);
  313. REGISTER_NL_LANGINFO_CONSTANT(MON_4);
  314. REGISTER_NL_LANGINFO_CONSTANT(MON_5);
  315. REGISTER_NL_LANGINFO_CONSTANT(MON_6);
  316. REGISTER_NL_LANGINFO_CONSTANT(MON_7);
  317. REGISTER_NL_LANGINFO_CONSTANT(MON_8);
  318. REGISTER_NL_LANGINFO_CONSTANT(MON_9);
  319. REGISTER_NL_LANGINFO_CONSTANT(MON_10);
  320. REGISTER_NL_LANGINFO_CONSTANT(MON_11);
  321. REGISTER_NL_LANGINFO_CONSTANT(MON_12);
  322. #endif
  323. #ifdef AM_STR
  324. REGISTER_NL_LANGINFO_CONSTANT(AM_STR);
  325. #endif
  326. #ifdef PM_STR
  327. REGISTER_NL_LANGINFO_CONSTANT(PM_STR);
  328. #endif
  329. #ifdef D_T_FMT
  330. REGISTER_NL_LANGINFO_CONSTANT(D_T_FMT);
  331. #endif
  332. #ifdef D_FMT
  333. REGISTER_NL_LANGINFO_CONSTANT(D_FMT);
  334. #endif
  335. #ifdef T_FMT
  336. REGISTER_NL_LANGINFO_CONSTANT(T_FMT);
  337. #endif
  338. #ifdef T_FMT_AMPM
  339. REGISTER_NL_LANGINFO_CONSTANT(T_FMT_AMPM);
  340. #endif
  341. #ifdef ERA
  342. REGISTER_NL_LANGINFO_CONSTANT(ERA);
  343. #endif
  344. #ifdef ERA_YEAR
  345. REGISTER_NL_LANGINFO_CONSTANT(ERA_YEAR);
  346. #endif
  347. #ifdef ERA_D_T_FMT
  348. REGISTER_NL_LANGINFO_CONSTANT(ERA_D_T_FMT);
  349. #endif
  350. #ifdef ERA_D_FMT
  351. REGISTER_NL_LANGINFO_CONSTANT(ERA_D_FMT);
  352. #endif
  353. #ifdef ERA_T_FMT
  354. REGISTER_NL_LANGINFO_CONSTANT(ERA_T_FMT);
  355. #endif
  356. #ifdef ALT_DIGITS
  357. REGISTER_NL_LANGINFO_CONSTANT(ALT_DIGITS);
  358. #endif
  359. #ifdef INT_CURR_SYMBOL
  360. REGISTER_NL_LANGINFO_CONSTANT(INT_CURR_SYMBOL);
  361. #endif
  362. #ifdef CURRENCY_SYMBOL
  363. REGISTER_NL_LANGINFO_CONSTANT(CURRENCY_SYMBOL);
  364. #endif
  365. #ifdef CRNCYSTR
  366. REGISTER_NL_LANGINFO_CONSTANT(CRNCYSTR);
  367. #endif
  368. #ifdef MON_DECIMAL_POINT
  369. REGISTER_NL_LANGINFO_CONSTANT(MON_DECIMAL_POINT);
  370. #endif
  371. #ifdef MON_THOUSANDS_SEP
  372. REGISTER_NL_LANGINFO_CONSTANT(MON_THOUSANDS_SEP);
  373. #endif
  374. #ifdef MON_GROUPING
  375. REGISTER_NL_LANGINFO_CONSTANT(MON_GROUPING);
  376. #endif
  377. #ifdef POSITIVE_SIGN
  378. REGISTER_NL_LANGINFO_CONSTANT(POSITIVE_SIGN);
  379. #endif
  380. #ifdef NEGATIVE_SIGN
  381. REGISTER_NL_LANGINFO_CONSTANT(NEGATIVE_SIGN);
  382. #endif
  383. #ifdef INT_FRAC_DIGITS
  384. REGISTER_NL_LANGINFO_CONSTANT(INT_FRAC_DIGITS);
  385. #endif
  386. #ifdef FRAC_DIGITS
  387. REGISTER_NL_LANGINFO_CONSTANT(FRAC_DIGITS);
  388. #endif
  389. #ifdef P_CS_PRECEDES
  390. REGISTER_NL_LANGINFO_CONSTANT(P_CS_PRECEDES);
  391. #endif
  392. #ifdef P_SEP_BY_SPACE
  393. REGISTER_NL_LANGINFO_CONSTANT(P_SEP_BY_SPACE);
  394. #endif
  395. #ifdef N_CS_PRECEDES
  396. REGISTER_NL_LANGINFO_CONSTANT(N_CS_PRECEDES);
  397. #endif
  398. #ifdef N_SEP_BY_SPACE
  399. REGISTER_NL_LANGINFO_CONSTANT(N_SEP_BY_SPACE);
  400. #endif
  401. #ifdef P_SIGN_POSN
  402. REGISTER_NL_LANGINFO_CONSTANT(P_SIGN_POSN);
  403. #endif
  404. #ifdef N_SIGN_POSN
  405. REGISTER_NL_LANGINFO_CONSTANT(N_SIGN_POSN);
  406. #endif
  407. #ifdef DECIMAL_POINT
  408. REGISTER_NL_LANGINFO_CONSTANT(DECIMAL_POINT);
  409. #endif
  410. #ifdef RADIXCHAR
  411. REGISTER_NL_LANGINFO_CONSTANT(RADIXCHAR);
  412. #endif
  413. #ifdef THOUSANDS_SEP
  414. REGISTER_NL_LANGINFO_CONSTANT(THOUSANDS_SEP);
  415. #endif
  416. #ifdef THOUSEP
  417. REGISTER_NL_LANGINFO_CONSTANT(THOUSEP);
  418. #endif
  419. #ifdef GROUPING
  420. REGISTER_NL_LANGINFO_CONSTANT(GROUPING);
  421. #endif
  422. #ifdef YESEXPR
  423. REGISTER_NL_LANGINFO_CONSTANT(YESEXPR);
  424. #endif
  425. #ifdef NOEXPR
  426. REGISTER_NL_LANGINFO_CONSTANT(NOEXPR);
  427. #endif
  428. #ifdef YESSTR
  429. REGISTER_NL_LANGINFO_CONSTANT(YESSTR);
  430. #endif
  431. #ifdef NOSTR
  432. REGISTER_NL_LANGINFO_CONSTANT(NOSTR);
  433. #endif
  434. #ifdef CODESET
  435. REGISTER_NL_LANGINFO_CONSTANT(CODESET);
  436. #endif
  437. #undef REGISTER_NL_LANGINFO_CONSTANT
  438. return SUCCESS;
  439. }
  440. /* }}} */
  441. /* {{{ Query language and locale information */
  442. PHP_FUNCTION(nl_langinfo)
  443. {
  444. zend_long item;
  445. char *value;
  446. ZEND_PARSE_PARAMETERS_START(1, 1)
  447. Z_PARAM_LONG(item)
  448. ZEND_PARSE_PARAMETERS_END();
  449. switch(item) { /* {{{ */
  450. #ifdef ABDAY_1
  451. case ABDAY_1:
  452. case ABDAY_2:
  453. case ABDAY_3:
  454. case ABDAY_4:
  455. case ABDAY_5:
  456. case ABDAY_6:
  457. case ABDAY_7:
  458. #endif
  459. #ifdef DAY_1
  460. case DAY_1:
  461. case DAY_2:
  462. case DAY_3:
  463. case DAY_4:
  464. case DAY_5:
  465. case DAY_6:
  466. case DAY_7:
  467. #endif
  468. #ifdef ABMON_1
  469. case ABMON_1:
  470. case ABMON_2:
  471. case ABMON_3:
  472. case ABMON_4:
  473. case ABMON_5:
  474. case ABMON_6:
  475. case ABMON_7:
  476. case ABMON_8:
  477. case ABMON_9:
  478. case ABMON_10:
  479. case ABMON_11:
  480. case ABMON_12:
  481. #endif
  482. #ifdef MON_1
  483. case MON_1:
  484. case MON_2:
  485. case MON_3:
  486. case MON_4:
  487. case MON_5:
  488. case MON_6:
  489. case MON_7:
  490. case MON_8:
  491. case MON_9:
  492. case MON_10:
  493. case MON_11:
  494. case MON_12:
  495. #endif
  496. #ifdef AM_STR
  497. case AM_STR:
  498. #endif
  499. #ifdef PM_STR
  500. case PM_STR:
  501. #endif
  502. #ifdef D_T_FMT
  503. case D_T_FMT:
  504. #endif
  505. #ifdef D_FMT
  506. case D_FMT:
  507. #endif
  508. #ifdef T_FMT
  509. case T_FMT:
  510. #endif
  511. #ifdef T_FMT_AMPM
  512. case T_FMT_AMPM:
  513. #endif
  514. #ifdef ERA
  515. case ERA:
  516. #endif
  517. #ifdef ERA_YEAR
  518. case ERA_YEAR:
  519. #endif
  520. #ifdef ERA_D_T_FMT
  521. case ERA_D_T_FMT:
  522. #endif
  523. #ifdef ERA_D_FMT
  524. case ERA_D_FMT:
  525. #endif
  526. #ifdef ERA_T_FMT
  527. case ERA_T_FMT:
  528. #endif
  529. #ifdef ALT_DIGITS
  530. case ALT_DIGITS:
  531. #endif
  532. #ifdef INT_CURR_SYMBOL
  533. case INT_CURR_SYMBOL:
  534. #endif
  535. #ifdef CURRENCY_SYMBOL
  536. case CURRENCY_SYMBOL:
  537. #endif
  538. #ifdef CRNCYSTR
  539. case CRNCYSTR:
  540. #endif
  541. #ifdef MON_DECIMAL_POINT
  542. case MON_DECIMAL_POINT:
  543. #endif
  544. #ifdef MON_THOUSANDS_SEP
  545. case MON_THOUSANDS_SEP:
  546. #endif
  547. #ifdef MON_GROUPING
  548. case MON_GROUPING:
  549. #endif
  550. #ifdef POSITIVE_SIGN
  551. case POSITIVE_SIGN:
  552. #endif
  553. #ifdef NEGATIVE_SIGN
  554. case NEGATIVE_SIGN:
  555. #endif
  556. #ifdef INT_FRAC_DIGITS
  557. case INT_FRAC_DIGITS:
  558. #endif
  559. #ifdef FRAC_DIGITS
  560. case FRAC_DIGITS:
  561. #endif
  562. #ifdef P_CS_PRECEDES
  563. case P_CS_PRECEDES:
  564. #endif
  565. #ifdef P_SEP_BY_SPACE
  566. case P_SEP_BY_SPACE:
  567. #endif
  568. #ifdef N_CS_PRECEDES
  569. case N_CS_PRECEDES:
  570. #endif
  571. #ifdef N_SEP_BY_SPACE
  572. case N_SEP_BY_SPACE:
  573. #endif
  574. #ifdef P_SIGN_POSN
  575. case P_SIGN_POSN:
  576. #endif
  577. #ifdef N_SIGN_POSN
  578. case N_SIGN_POSN:
  579. #endif
  580. #ifdef DECIMAL_POINT
  581. case DECIMAL_POINT:
  582. #elif defined(RADIXCHAR)
  583. case RADIXCHAR:
  584. #endif
  585. #ifdef THOUSANDS_SEP
  586. case THOUSANDS_SEP:
  587. #elif defined(THOUSEP)
  588. case THOUSEP:
  589. #endif
  590. #ifdef GROUPING
  591. case GROUPING:
  592. #endif
  593. #ifdef YESEXPR
  594. case YESEXPR:
  595. #endif
  596. #ifdef NOEXPR
  597. case NOEXPR:
  598. #endif
  599. #ifdef YESSTR
  600. case YESSTR:
  601. #endif
  602. #ifdef NOSTR
  603. case NOSTR:
  604. #endif
  605. #ifdef CODESET
  606. case CODESET:
  607. #endif
  608. break;
  609. default:
  610. php_error_docref(NULL, E_WARNING, "Item '" ZEND_LONG_FMT "' is not valid", item);
  611. RETURN_FALSE;
  612. }
  613. /* }}} */
  614. value = nl_langinfo(item);
  615. if (value == NULL) {
  616. RETURN_FALSE;
  617. } else {
  618. RETURN_STRING(value);
  619. }
  620. }
  621. #endif
  622. /* }}} */
  623. /* {{{ Compares two strings using the current locale */
  624. PHP_FUNCTION(strcoll)
  625. {
  626. zend_string *s1, *s2;
  627. ZEND_PARSE_PARAMETERS_START(2, 2)
  628. Z_PARAM_STR(s1)
  629. Z_PARAM_STR(s2)
  630. ZEND_PARSE_PARAMETERS_END();
  631. RETURN_LONG(strcoll((const char *) ZSTR_VAL(s1),
  632. (const char *) ZSTR_VAL(s2)));
  633. }
  634. /* }}} */
  635. /* {{{ php_charmask
  636. * Fills a 256-byte bytemask with input. You can specify a range like 'a..z',
  637. * it needs to be incrementing.
  638. * Returns: FAILURE/SUCCESS whether the input was correct (i.e. no range errors)
  639. */
  640. static inline int php_charmask(const unsigned char *input, size_t len, char *mask)
  641. {
  642. const unsigned char *end;
  643. unsigned char c;
  644. int result = SUCCESS;
  645. memset(mask, 0, 256);
  646. for (end = input+len; input < end; input++) {
  647. c=*input;
  648. if ((input+3 < end) && input[1] == '.' && input[2] == '.'
  649. && input[3] >= c) {
  650. memset(mask+c, 1, input[3] - c + 1);
  651. input+=3;
  652. } else if ((input+1 < end) && input[0] == '.' && input[1] == '.') {
  653. /* Error, try to be as helpful as possible:
  654. (a range ending/starting with '.' won't be captured here) */
  655. if (end-len >= input) { /* there was no 'left' char */
  656. php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the left of '..'");
  657. result = FAILURE;
  658. continue;
  659. }
  660. if (input+2 >= end) { /* there is no 'right' char */
  661. php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the right of '..'");
  662. result = FAILURE;
  663. continue;
  664. }
  665. if (input[-1] > input[2]) { /* wrong order */
  666. php_error_docref(NULL, E_WARNING, "Invalid '..'-range, '..'-range needs to be incrementing");
  667. result = FAILURE;
  668. continue;
  669. }
  670. /* FIXME: better error (a..b..c is the only left possibility?) */
  671. php_error_docref(NULL, E_WARNING, "Invalid '..'-range");
  672. result = FAILURE;
  673. continue;
  674. } else {
  675. mask[c]=1;
  676. }
  677. }
  678. return result;
  679. }
  680. /* }}} */
  681. /* {{{ php_trim_int()
  682. * mode 1 : trim left
  683. * mode 2 : trim right
  684. * mode 3 : trim left and right
  685. * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
  686. */
  687. static zend_always_inline zend_string *php_trim_int(zend_string *str, const char *what, size_t what_len, int mode)
  688. {
  689. const char *start = ZSTR_VAL(str);
  690. const char *end = start + ZSTR_LEN(str);
  691. char mask[256];
  692. if (what) {
  693. if (what_len == 1) {
  694. char p = *what;
  695. if (mode & 1) {
  696. while (start != end) {
  697. if (*start == p) {
  698. start++;
  699. } else {
  700. break;
  701. }
  702. }
  703. }
  704. if (mode & 2) {
  705. while (start != end) {
  706. if (*(end-1) == p) {
  707. end--;
  708. } else {
  709. break;
  710. }
  711. }
  712. }
  713. } else {
  714. php_charmask((const unsigned char *) what, what_len, mask);
  715. if (mode & 1) {
  716. while (start != end) {
  717. if (mask[(unsigned char)*start]) {
  718. start++;
  719. } else {
  720. break;
  721. }
  722. }
  723. }
  724. if (mode & 2) {
  725. while (start != end) {
  726. if (mask[(unsigned char)*(end-1)]) {
  727. end--;
  728. } else {
  729. break;
  730. }
  731. }
  732. }
  733. }
  734. } else {
  735. if (mode & 1) {
  736. while (start != end) {
  737. unsigned char c = (unsigned char)*start;
  738. if (c <= ' ' &&
  739. (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
  740. start++;
  741. } else {
  742. break;
  743. }
  744. }
  745. }
  746. if (mode & 2) {
  747. while (start != end) {
  748. unsigned char c = (unsigned char)*(end-1);
  749. if (c <= ' ' &&
  750. (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
  751. end--;
  752. } else {
  753. break;
  754. }
  755. }
  756. }
  757. }
  758. if (ZSTR_LEN(str) == end - start) {
  759. return zend_string_copy(str);
  760. } else if (end - start == 0) {
  761. return ZSTR_EMPTY_ALLOC();
  762. } else {
  763. return zend_string_init(start, end - start, 0);
  764. }
  765. }
  766. /* }}} */
  767. /* {{{ php_trim_int()
  768. * mode 1 : trim left
  769. * mode 2 : trim right
  770. * mode 3 : trim left and right
  771. * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
  772. */
  773. PHPAPI zend_string *php_trim(zend_string *str, const char *what, size_t what_len, int mode)
  774. {
  775. return php_trim_int(str, what, what_len, mode);
  776. }
  777. /* }}} */
  778. /* {{{ php_do_trim
  779. * Base for trim(), rtrim() and ltrim() functions.
  780. */
  781. static zend_always_inline void php_do_trim(INTERNAL_FUNCTION_PARAMETERS, int mode)
  782. {
  783. zend_string *str;
  784. zend_string *what = NULL;
  785. ZEND_PARSE_PARAMETERS_START(1, 2)
  786. Z_PARAM_STR(str)
  787. Z_PARAM_OPTIONAL
  788. Z_PARAM_STR(what)
  789. ZEND_PARSE_PARAMETERS_END();
  790. ZVAL_STR(return_value, php_trim_int(str, (what ? ZSTR_VAL(what) : NULL), (what ? ZSTR_LEN(what) : 0), mode));
  791. }
  792. /* }}} */
  793. /* {{{ Strips whitespace from the beginning and end of a string */
  794. PHP_FUNCTION(trim)
  795. {
  796. php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 3);
  797. }
  798. /* }}} */
  799. /* {{{ Removes trailing whitespace */
  800. PHP_FUNCTION(rtrim)
  801. {
  802. php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
  803. }
  804. /* }}} */
  805. /* {{{ Strips whitespace from the beginning of a string */
  806. PHP_FUNCTION(ltrim)
  807. {
  808. php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  809. }
  810. /* }}} */
  811. /* {{{ Wraps buffer to selected number of characters using string break char */
  812. PHP_FUNCTION(wordwrap)
  813. {
  814. zend_string *text;
  815. char *breakchar = "\n";
  816. size_t newtextlen, chk, breakchar_len = 1;
  817. size_t alloced;
  818. zend_long current = 0, laststart = 0, lastspace = 0;
  819. zend_long linelength = 75;
  820. bool docut = 0;
  821. zend_string *newtext;
  822. ZEND_PARSE_PARAMETERS_START(1, 4)
  823. Z_PARAM_STR(text)
  824. Z_PARAM_OPTIONAL
  825. Z_PARAM_LONG(linelength)
  826. Z_PARAM_STRING(breakchar, breakchar_len)
  827. Z_PARAM_BOOL(docut)
  828. ZEND_PARSE_PARAMETERS_END();
  829. if (ZSTR_LEN(text) == 0) {
  830. RETURN_EMPTY_STRING();
  831. }
  832. if (breakchar_len == 0) {
  833. zend_argument_value_error(3, "cannot be empty");
  834. RETURN_THROWS();
  835. }
  836. if (linelength == 0 && docut) {
  837. zend_argument_value_error(4, "cannot be true when argument #2 ($width) is 0");
  838. RETURN_THROWS();
  839. }
  840. /* Special case for a single-character break as it needs no
  841. additional storage space */
  842. if (breakchar_len == 1 && !docut) {
  843. newtext = zend_string_init(ZSTR_VAL(text), ZSTR_LEN(text), 0);
  844. laststart = lastspace = 0;
  845. for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
  846. if (ZSTR_VAL(text)[current] == breakchar[0]) {
  847. laststart = lastspace = current + 1;
  848. } else if (ZSTR_VAL(text)[current] == ' ') {
  849. if (current - laststart >= linelength) {
  850. ZSTR_VAL(newtext)[current] = breakchar[0];
  851. laststart = current + 1;
  852. }
  853. lastspace = current;
  854. } else if (current - laststart >= linelength && laststart != lastspace) {
  855. ZSTR_VAL(newtext)[lastspace] = breakchar[0];
  856. laststart = lastspace + 1;
  857. }
  858. }
  859. RETURN_NEW_STR(newtext);
  860. } else {
  861. /* Multiple character line break or forced cut */
  862. if (linelength > 0) {
  863. chk = (size_t)(ZSTR_LEN(text)/linelength + 1);
  864. newtext = zend_string_safe_alloc(chk, breakchar_len, ZSTR_LEN(text), 0);
  865. alloced = ZSTR_LEN(text) + chk * breakchar_len + 1;
  866. } else {
  867. chk = ZSTR_LEN(text);
  868. alloced = ZSTR_LEN(text) * (breakchar_len + 1) + 1;
  869. newtext = zend_string_safe_alloc(ZSTR_LEN(text), breakchar_len + 1, 0, 0);
  870. }
  871. /* now keep track of the actual new text length */
  872. newtextlen = 0;
  873. laststart = lastspace = 0;
  874. for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
  875. if (chk == 0) {
  876. alloced += (size_t) (((ZSTR_LEN(text) - current + 1)/linelength + 1) * breakchar_len) + 1;
  877. newtext = zend_string_extend(newtext, alloced, 0);
  878. chk = (size_t) ((ZSTR_LEN(text) - current)/linelength) + 1;
  879. }
  880. /* when we hit an existing break, copy to new buffer, and
  881. * fix up laststart and lastspace */
  882. if (ZSTR_VAL(text)[current] == breakchar[0]
  883. && current + breakchar_len < ZSTR_LEN(text)
  884. && !strncmp(ZSTR_VAL(text) + current, breakchar, breakchar_len)) {
  885. memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart + breakchar_len);
  886. newtextlen += current - laststart + breakchar_len;
  887. current += breakchar_len - 1;
  888. laststart = lastspace = current + 1;
  889. chk--;
  890. }
  891. /* if it is a space, check if it is at the line boundary,
  892. * copy and insert a break, or just keep track of it */
  893. else if (ZSTR_VAL(text)[current] == ' ') {
  894. if (current - laststart >= linelength) {
  895. memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
  896. newtextlen += current - laststart;
  897. memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
  898. newtextlen += breakchar_len;
  899. laststart = current + 1;
  900. chk--;
  901. }
  902. lastspace = current;
  903. }
  904. /* if we are cutting, and we've accumulated enough
  905. * characters, and we haven't see a space for this line,
  906. * copy and insert a break. */
  907. else if (current - laststart >= linelength
  908. && docut && laststart >= lastspace) {
  909. memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
  910. newtextlen += current - laststart;
  911. memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
  912. newtextlen += breakchar_len;
  913. laststart = lastspace = current;
  914. chk--;
  915. }
  916. /* if the current word puts us over the linelength, copy
  917. * back up until the last space, insert a break, and move
  918. * up the laststart */
  919. else if (current - laststart >= linelength
  920. && laststart < lastspace) {
  921. memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, lastspace - laststart);
  922. newtextlen += lastspace - laststart;
  923. memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
  924. newtextlen += breakchar_len;
  925. laststart = lastspace = lastspace + 1;
  926. chk--;
  927. }
  928. }
  929. /* copy over any stragglers */
  930. if (laststart != current) {
  931. memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
  932. newtextlen += current - laststart;
  933. }
  934. ZSTR_VAL(newtext)[newtextlen] = '\0';
  935. /* free unused memory */
  936. newtext = zend_string_truncate(newtext, newtextlen, 0);
  937. RETURN_NEW_STR(newtext);
  938. }
  939. }
  940. /* }}} */
  941. /* {{{ php_explode */
  942. PHPAPI void php_explode(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
  943. {
  944. const char *p1 = ZSTR_VAL(str);
  945. const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
  946. const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
  947. zval tmp;
  948. if (p2 == NULL) {
  949. ZVAL_STR_COPY(&tmp, str);
  950. zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
  951. } else {
  952. zend_hash_real_init_packed(Z_ARRVAL_P(return_value));
  953. ZEND_HASH_FILL_PACKED(Z_ARRVAL_P(return_value)) {
  954. do {
  955. ZEND_HASH_FILL_GROW();
  956. ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, p2 - p1));
  957. ZEND_HASH_FILL_NEXT();
  958. p1 = p2 + ZSTR_LEN(delim);
  959. p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
  960. } while (p2 != NULL && --limit > 1);
  961. if (p1 <= endp) {
  962. ZEND_HASH_FILL_GROW();
  963. ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, endp - p1));
  964. ZEND_HASH_FILL_NEXT();
  965. }
  966. } ZEND_HASH_FILL_END();
  967. }
  968. }
  969. /* }}} */
  970. /* {{{ php_explode_negative_limit */
  971. PHPAPI void php_explode_negative_limit(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
  972. {
  973. #define EXPLODE_ALLOC_STEP 64
  974. const char *p1 = ZSTR_VAL(str);
  975. const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
  976. const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
  977. zval tmp;
  978. if (p2 == NULL) {
  979. /*
  980. do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0
  981. by doing nothing we return empty array
  982. */
  983. } else {
  984. size_t allocated = EXPLODE_ALLOC_STEP, found = 0;
  985. zend_long i, to_return;
  986. const char **positions = emalloc(allocated * sizeof(char *));
  987. positions[found++] = p1;
  988. do {
  989. if (found >= allocated) {
  990. allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */
  991. positions = erealloc(ZEND_VOIDP(positions), allocated*sizeof(char *));
  992. }
  993. positions[found++] = p1 = p2 + ZSTR_LEN(delim);
  994. p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
  995. } while (p2 != NULL);
  996. to_return = limit + found;
  997. /* limit is at least -1 therefore no need of bounds checking : i will be always less than found */
  998. for (i = 0; i < to_return; i++) { /* this checks also for to_return > 0 */
  999. ZVAL_STRINGL(&tmp, positions[i], (positions[i+1] - ZSTR_LEN(delim)) - positions[i]);
  1000. zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
  1001. }
  1002. efree((void *)positions);
  1003. }
  1004. #undef EXPLODE_ALLOC_STEP
  1005. }
  1006. /* }}} */
  1007. /* {{{ Splits a string on string separator and return array of components. If limit is positive only limit number of components is returned. If limit is negative all components except the last abs(limit) are returned. */
  1008. PHP_FUNCTION(explode)
  1009. {
  1010. zend_string *str, *delim;
  1011. zend_long limit = ZEND_LONG_MAX; /* No limit */
  1012. zval tmp;
  1013. ZEND_PARSE_PARAMETERS_START(2, 3)
  1014. Z_PARAM_STR(delim)
  1015. Z_PARAM_STR(str)
  1016. Z_PARAM_OPTIONAL
  1017. Z_PARAM_LONG(limit)
  1018. ZEND_PARSE_PARAMETERS_END();
  1019. if (ZSTR_LEN(delim) == 0) {
  1020. zend_argument_value_error(1, "cannot be empty");
  1021. RETURN_THROWS();
  1022. }
  1023. array_init(return_value);
  1024. if (ZSTR_LEN(str) == 0) {
  1025. if (limit >= 0) {
  1026. ZVAL_EMPTY_STRING(&tmp);
  1027. zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
  1028. }
  1029. return;
  1030. }
  1031. if (limit > 1) {
  1032. php_explode(delim, str, return_value, limit);
  1033. } else if (limit < 0) {
  1034. php_explode_negative_limit(delim, str, return_value, limit);
  1035. } else {
  1036. ZVAL_STR_COPY(&tmp, str);
  1037. zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
  1038. }
  1039. }
  1040. /* }}} */
  1041. /* {{{ An alias for implode */
  1042. /* }}} */
  1043. /* {{{ php_implode */
  1044. PHPAPI void php_implode(const zend_string *glue, HashTable *pieces, zval *return_value)
  1045. {
  1046. zval *tmp;
  1047. int numelems;
  1048. zend_string *str;
  1049. char *cptr;
  1050. size_t len = 0;
  1051. struct {
  1052. zend_string *str;
  1053. zend_long lval;
  1054. } *strings, *ptr;
  1055. ALLOCA_FLAG(use_heap)
  1056. numelems = zend_hash_num_elements(pieces);
  1057. if (numelems == 0) {
  1058. RETURN_EMPTY_STRING();
  1059. } else if (numelems == 1) {
  1060. /* loop to search the first not undefined element... */
  1061. ZEND_HASH_FOREACH_VAL(pieces, tmp) {
  1062. RETURN_STR(zval_get_string(tmp));
  1063. } ZEND_HASH_FOREACH_END();
  1064. }
  1065. ptr = strings = do_alloca((sizeof(*strings)) * numelems, use_heap);
  1066. ZEND_HASH_FOREACH_VAL(pieces, tmp) {
  1067. if (EXPECTED(Z_TYPE_P(tmp) == IS_STRING)) {
  1068. ptr->str = Z_STR_P(tmp);
  1069. len += ZSTR_LEN(ptr->str);
  1070. ptr->lval = 0;
  1071. ptr++;
  1072. } else if (UNEXPECTED(Z_TYPE_P(tmp) == IS_LONG)) {
  1073. zend_long val = Z_LVAL_P(tmp);
  1074. ptr->str = NULL;
  1075. ptr->lval = val;
  1076. ptr++;
  1077. if (val <= 0) {
  1078. len++;
  1079. }
  1080. while (val) {
  1081. val /= 10;
  1082. len++;
  1083. }
  1084. } else {
  1085. ptr->str = zval_get_string_func(tmp);
  1086. len += ZSTR_LEN(ptr->str);
  1087. ptr->lval = 1;
  1088. ptr++;
  1089. }
  1090. } ZEND_HASH_FOREACH_END();
  1091. /* numelems can not be 0, we checked above */
  1092. str = zend_string_safe_alloc(numelems - 1, ZSTR_LEN(glue), len, 0);
  1093. cptr = ZSTR_VAL(str) + ZSTR_LEN(str);
  1094. *cptr = 0;
  1095. while (1) {
  1096. ptr--;
  1097. if (EXPECTED(ptr->str)) {
  1098. cptr -= ZSTR_LEN(ptr->str);
  1099. memcpy(cptr, ZSTR_VAL(ptr->str), ZSTR_LEN(ptr->str));
  1100. if (ptr->lval) {
  1101. zend_string_release_ex(ptr->str, 0);
  1102. }
  1103. } else {
  1104. char *oldPtr = cptr;
  1105. char oldVal = *cptr;
  1106. cptr = zend_print_long_to_buf(cptr, ptr->lval);
  1107. *oldPtr = oldVal;
  1108. }
  1109. if (ptr == strings) {
  1110. break;
  1111. }
  1112. cptr -= ZSTR_LEN(glue);
  1113. memcpy(cptr, ZSTR_VAL(glue), ZSTR_LEN(glue));
  1114. }
  1115. free_alloca(strings, use_heap);
  1116. RETURN_NEW_STR(str);
  1117. }
  1118. /* }}} */
  1119. /* {{{ Joins array elements placing glue string between items and return one string */
  1120. PHP_FUNCTION(implode)
  1121. {
  1122. zend_string *arg1_str = NULL;
  1123. HashTable *arg1_array = NULL;
  1124. zend_array *pieces = NULL;
  1125. ZEND_PARSE_PARAMETERS_START(1, 2)
  1126. Z_PARAM_ARRAY_HT_OR_STR(arg1_array, arg1_str)
  1127. Z_PARAM_OPTIONAL
  1128. Z_PARAM_ARRAY_HT_OR_NULL(pieces)
  1129. ZEND_PARSE_PARAMETERS_END();
  1130. if (pieces == NULL) {
  1131. if (arg1_array == NULL) {
  1132. zend_type_error("%s(): Argument #1 ($pieces) must be of type array, string given", get_active_function_name());
  1133. RETURN_THROWS();
  1134. }
  1135. arg1_str = ZSTR_EMPTY_ALLOC();
  1136. pieces = arg1_array;
  1137. } else {
  1138. if (arg1_str == NULL) {
  1139. zend_argument_type_error(1, "must be of type string, array given");
  1140. RETURN_THROWS();
  1141. }
  1142. }
  1143. php_implode(arg1_str, pieces, return_value);
  1144. }
  1145. /* }}} */
  1146. #define STRTOK_TABLE(p) BG(strtok_table)[(unsigned char) *p]
  1147. /* {{{ Tokenize a string */
  1148. PHP_FUNCTION(strtok)
  1149. {
  1150. zend_string *str, *tok = NULL;
  1151. char *token;
  1152. char *token_end;
  1153. char *p;
  1154. char *pe;
  1155. size_t skipped = 0;
  1156. ZEND_PARSE_PARAMETERS_START(1, 2)
  1157. Z_PARAM_STR(str)
  1158. Z_PARAM_OPTIONAL
  1159. Z_PARAM_STR_OR_NULL(tok)
  1160. ZEND_PARSE_PARAMETERS_END();
  1161. if (!tok) {
  1162. tok = str;
  1163. } else {
  1164. if (BG(strtok_string)) {
  1165. zend_string_release(BG(strtok_string));
  1166. }
  1167. BG(strtok_string) = zend_string_copy(str);
  1168. BG(strtok_last) = ZSTR_VAL(str);
  1169. BG(strtok_len) = ZSTR_LEN(str);
  1170. }
  1171. if (!BG(strtok_string)) {
  1172. /* String to tokenize not set. */
  1173. // TODO: Should this warn?
  1174. RETURN_FALSE;
  1175. }
  1176. p = BG(strtok_last); /* Where we start to search */
  1177. pe = ZSTR_VAL(BG(strtok_string)) + BG(strtok_len);
  1178. if (p >= pe) {
  1179. /* Reached the end of the string. */
  1180. RETURN_FALSE;
  1181. }
  1182. token = ZSTR_VAL(tok);
  1183. token_end = token + ZSTR_LEN(tok);
  1184. while (token < token_end) {
  1185. STRTOK_TABLE(token++) = 1;
  1186. }
  1187. /* Skip leading delimiters */
  1188. while (STRTOK_TABLE(p)) {
  1189. if (++p >= pe) {
  1190. /* no other chars left */
  1191. goto return_false;
  1192. }
  1193. skipped++;
  1194. }
  1195. /* We know at this place that *p is no delimiter, so skip it */
  1196. while (++p < pe) {
  1197. if (STRTOK_TABLE(p)) {
  1198. goto return_token;
  1199. }
  1200. }
  1201. if (p - BG(strtok_last)) {
  1202. return_token:
  1203. RETVAL_STRINGL(BG(strtok_last) + skipped, (p - BG(strtok_last)) - skipped);
  1204. BG(strtok_last) = p + 1;
  1205. } else {
  1206. return_false:
  1207. RETVAL_FALSE;
  1208. zend_string_release(BG(strtok_string));
  1209. BG(strtok_string) = NULL;
  1210. }
  1211. /* Restore table -- usually faster then memset'ing the table on every invocation */
  1212. token = ZSTR_VAL(tok);
  1213. while (token < token_end) {
  1214. STRTOK_TABLE(token++) = 0;
  1215. }
  1216. }
  1217. /* }}} */
  1218. /* {{{ php_strtoupper */
  1219. PHPAPI char *php_strtoupper(char *s, size_t len)
  1220. {
  1221. unsigned char *c;
  1222. const unsigned char *e;
  1223. c = (unsigned char *)s;
  1224. e = (unsigned char *)c+len;
  1225. while (c < e) {
  1226. *c = toupper(*c);
  1227. c++;
  1228. }
  1229. return s;
  1230. }
  1231. /* }}} */
  1232. /* {{{ php_string_toupper */
  1233. PHPAPI zend_string *php_string_toupper(zend_string *s)
  1234. {
  1235. unsigned char *c;
  1236. const unsigned char *e;
  1237. c = (unsigned char *)ZSTR_VAL(s);
  1238. e = c + ZSTR_LEN(s);
  1239. while (c < e) {
  1240. if (islower(*c)) {
  1241. unsigned char *r;
  1242. zend_string *res = zend_string_alloc(ZSTR_LEN(s), 0);
  1243. if (c != (unsigned char*)ZSTR_VAL(s)) {
  1244. memcpy(ZSTR_VAL(res), ZSTR_VAL(s), c - (unsigned char*)ZSTR_VAL(s));
  1245. }
  1246. r = c + (ZSTR_VAL(res) - ZSTR_VAL(s));
  1247. while (c < e) {
  1248. *r = toupper(*c);
  1249. r++;
  1250. c++;
  1251. }
  1252. *r = '\0';
  1253. return res;
  1254. }
  1255. c++;
  1256. }
  1257. return zend_string_copy(s);
  1258. }
  1259. /* }}} */
  1260. /* {{{ Makes a string uppercase */
  1261. PHP_FUNCTION(strtoupper)
  1262. {
  1263. zend_string *arg;
  1264. ZEND_PARSE_PARAMETERS_START(1, 1)
  1265. Z_PARAM_STR(arg)
  1266. ZEND_PARSE_PARAMETERS_END();
  1267. RETURN_STR(php_string_toupper(arg));
  1268. }
  1269. /* }}} */
  1270. /* {{{ php_strtolower */
  1271. PHPAPI char *php_strtolower(char *s, size_t len)
  1272. {
  1273. unsigned char *c;
  1274. const unsigned char *e;
  1275. c = (unsigned char *)s;
  1276. e = c+len;
  1277. while (c < e) {
  1278. *c = tolower(*c);
  1279. c++;
  1280. }
  1281. return s;
  1282. }
  1283. /* }}} */
  1284. /* {{{ php_string_tolower */
  1285. PHPAPI zend_string *php_string_tolower(zend_string *s)
  1286. {
  1287. unsigned char *c;
  1288. const unsigned char *e;
  1289. if (EXPECTED(!BG(ctype_string))) {
  1290. return zend_string_tolower(s);
  1291. } else {
  1292. c = (unsigned char *)ZSTR_VAL(s);
  1293. e = c + ZSTR_LEN(s);
  1294. while (c < e) {
  1295. if (isupper(*c)) {
  1296. unsigned char *r;
  1297. zend_string *res = zend_string_alloc(ZSTR_LEN(s), 0);
  1298. if (c != (unsigned char*)ZSTR_VAL(s)) {
  1299. memcpy(ZSTR_VAL(res), ZSTR_VAL(s), c - (unsigned char*)ZSTR_VAL(s));
  1300. }
  1301. r = c + (ZSTR_VAL(res) - ZSTR_VAL(s));
  1302. while (c < e) {
  1303. *r = tolower(*c);
  1304. r++;
  1305. c++;
  1306. }
  1307. *r = '\0';
  1308. return res;
  1309. }
  1310. c++;
  1311. }
  1312. return zend_string_copy(s);
  1313. }
  1314. }
  1315. /* }}} */
  1316. /* {{{ Makes a string lowercase */
  1317. PHP_FUNCTION(strtolower)
  1318. {
  1319. zend_string *str;
  1320. ZEND_PARSE_PARAMETERS_START(1, 1)
  1321. Z_PARAM_STR(str)
  1322. ZEND_PARSE_PARAMETERS_END();
  1323. RETURN_STR(php_string_tolower(str));
  1324. }
  1325. /* }}} */
  1326. #if defined(PHP_WIN32)
  1327. static bool _is_basename_start(const char *start, const char *pos)
  1328. {
  1329. if (pos - start >= 1
  1330. && *(pos-1) != '/'
  1331. && *(pos-1) != '\\') {
  1332. if (pos - start == 1) {
  1333. return 1;
  1334. } else if (*(pos-2) == '/' || *(pos-2) == '\\') {
  1335. return 1;
  1336. } else if (*(pos-2) == ':'
  1337. && _is_basename_start(start, pos - 2)) {
  1338. return 1;
  1339. }
  1340. }
  1341. return 0;
  1342. }
  1343. #endif
  1344. /* {{{ php_basename */
  1345. PHPAPI zend_string *php_basename(const char *s, size_t len, const char *suffix, size_t suffix_len)
  1346. {
  1347. const char *basename_start;
  1348. const char *basename_end;
  1349. if (CG(ascii_compatible_locale)) {
  1350. basename_end = s + len - 1;
  1351. /* Strip trailing slashes */
  1352. while (basename_end >= s
  1353. #if defined(PHP_WIN32)
  1354. && (*basename_end == '/'
  1355. || *basename_end == '\\'
  1356. || (*basename_end == ':'
  1357. && _is_basename_start(s, basename_end)))) {
  1358. #else
  1359. && *basename_end == '/') {
  1360. #endif
  1361. basename_end--;
  1362. }
  1363. if (basename_end < s) {
  1364. return ZSTR_EMPTY_ALLOC();
  1365. }
  1366. /* Extract filename */
  1367. basename_start = basename_end;
  1368. basename_end++;
  1369. while (basename_start > s
  1370. #if defined(PHP_WIN32)
  1371. && *(basename_start-1) != '/'
  1372. && *(basename_start-1) != '\\') {
  1373. if (*(basename_start-1) == ':' &&
  1374. _is_basename_start(s, basename_start - 1)) {
  1375. break;
  1376. }
  1377. #else
  1378. && *(basename_start-1) != '/') {
  1379. #endif
  1380. basename_start--;
  1381. }
  1382. } else {
  1383. /* State 0 is directly after a directory separator (or at the start of the string).
  1384. * State 1 is everything else. */
  1385. int state = 0;
  1386. basename_start = s;
  1387. basename_end = s;
  1388. while (len > 0) {
  1389. int inc_len = (*s == '\0' ? 1 : php_mblen(s, len));
  1390. switch (inc_len) {
  1391. case 0:
  1392. goto quit_loop;
  1393. case 1:
  1394. #if defined(PHP_WIN32)
  1395. if (*s == '/' || *s == '\\') {
  1396. #else
  1397. if (*s == '/') {
  1398. #endif
  1399. if (state == 1) {
  1400. state = 0;
  1401. basename_end = s;
  1402. }
  1403. #if defined(PHP_WIN32)
  1404. /* Catch relative paths in c:file.txt style. They're not to confuse
  1405. with the NTFS streams. This part ensures also, that no drive
  1406. letter traversing happens. */
  1407. } else if ((*s == ':' && (s - basename_start == 1))) {
  1408. if (state == 0) {
  1409. basename_start = s;
  1410. state = 1;
  1411. } else {
  1412. basename_end = s;
  1413. state = 0;
  1414. }
  1415. #endif
  1416. } else {
  1417. if (state == 0) {
  1418. basename_start = s;
  1419. state = 1;
  1420. }
  1421. }
  1422. break;
  1423. default:
  1424. if (inc_len < 0) {
  1425. /* If character is invalid, treat it like other non-significant characters. */
  1426. inc_len = 1;
  1427. php_mb_reset();
  1428. }
  1429. if (state == 0) {
  1430. basename_start = s;
  1431. state = 1;
  1432. }
  1433. break;
  1434. }
  1435. s += inc_len;
  1436. len -= inc_len;
  1437. }
  1438. quit_loop:
  1439. if (state == 1) {
  1440. basename_end = s;
  1441. }
  1442. }
  1443. if (suffix != NULL && suffix_len < (size_t)(basename_end - basename_start) &&
  1444. memcmp(basename_end - suffix_len, suffix, suffix_len) == 0) {
  1445. basename_end -= suffix_len;
  1446. }
  1447. return zend_string_init(basename_start, basename_end - basename_start, 0);
  1448. }
  1449. /* }}} */
  1450. /* {{{ Returns the filename component of the path */
  1451. PHP_FUNCTION(basename)
  1452. {
  1453. char *string, *suffix = NULL;
  1454. size_t string_len, suffix_len = 0;
  1455. ZEND_PARSE_PARAMETERS_START(1, 2)
  1456. Z_PARAM_STRING(string, string_len)
  1457. Z_PARAM_OPTIONAL
  1458. Z_PARAM_STRING(suffix, suffix_len)
  1459. ZEND_PARSE_PARAMETERS_END();
  1460. RETURN_STR(php_basename(string, string_len, suffix, suffix_len));
  1461. }
  1462. /* }}} */
  1463. /* {{{ php_dirname
  1464. Returns directory name component of path */
  1465. PHPAPI size_t php_dirname(char *path, size_t len)
  1466. {
  1467. return zend_dirname(path, len);
  1468. }
  1469. /* }}} */
  1470. /* {{{ Returns the directory name component of the path */
  1471. PHP_FUNCTION(dirname)
  1472. {
  1473. char *str;
  1474. size_t str_len;
  1475. zend_string *ret;
  1476. zend_long levels = 1;
  1477. ZEND_PARSE_PARAMETERS_START(1, 2)
  1478. Z_PARAM_STRING(str, str_len)
  1479. Z_PARAM_OPTIONAL
  1480. Z_PARAM_LONG(levels)
  1481. ZEND_PARSE_PARAMETERS_END();
  1482. ret = zend_string_init(str, str_len, 0);
  1483. if (levels == 1) {
  1484. /* Default case */
  1485. #ifdef PHP_WIN32
  1486. ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), str_len);
  1487. #else
  1488. ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), str_len);
  1489. #endif
  1490. } else if (levels < 1) {
  1491. zend_argument_value_error(2, "must be greater than or equal to 1");
  1492. zend_string_efree(ret);
  1493. RETURN_THROWS();
  1494. } else {
  1495. /* Some levels up */
  1496. do {
  1497. #ifdef PHP_WIN32
  1498. ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
  1499. #else
  1500. ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
  1501. #endif
  1502. } while (ZSTR_LEN(ret) < str_len && --levels);
  1503. }
  1504. RETURN_NEW_STR(ret);
  1505. }
  1506. /* }}} */
  1507. /* {{{ Returns information about a certain string */
  1508. PHP_FUNCTION(pathinfo)
  1509. {
  1510. zval tmp;
  1511. char *path, *dirname;
  1512. size_t path_len;
  1513. int have_basename;
  1514. zend_long opt = PHP_PATHINFO_ALL;
  1515. zend_string *ret = NULL;
  1516. ZEND_PARSE_PARAMETERS_START(1, 2)
  1517. Z_PARAM_STRING(path, path_len)
  1518. Z_PARAM_OPTIONAL
  1519. Z_PARAM_LONG(opt)
  1520. ZEND_PARSE_PARAMETERS_END();
  1521. have_basename = ((opt & PHP_PATHINFO_BASENAME) == PHP_PATHINFO_BASENAME);
  1522. array_init(&tmp);
  1523. if ((opt & PHP_PATHINFO_DIRNAME) == PHP_PATHINFO_DIRNAME) {
  1524. dirname = estrndup(path, path_len);
  1525. php_dirname(dirname, path_len);
  1526. if (*dirname) {
  1527. add_assoc_string(&tmp, "dirname", dirname);
  1528. }
  1529. efree(dirname);
  1530. }
  1531. if (have_basename) {
  1532. ret = php_basename(path, path_len, NULL, 0);
  1533. add_assoc_str(&tmp, "basename", zend_string_copy(ret));
  1534. }
  1535. if ((opt & PHP_PATHINFO_EXTENSION) == PHP_PATHINFO_EXTENSION) {
  1536. const char *p;
  1537. ptrdiff_t idx;
  1538. if (!have_basename) {
  1539. ret = php_basename(path, path_len, NULL, 0);
  1540. }
  1541. p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
  1542. if (p) {
  1543. idx = p - ZSTR_VAL(ret);
  1544. add_assoc_stringl(&tmp, "extension", ZSTR_VAL(ret) + idx + 1, ZSTR_LEN(ret) - idx - 1);
  1545. }
  1546. }
  1547. if ((opt & PHP_PATHINFO_FILENAME) == PHP_PATHINFO_FILENAME) {
  1548. const char *p;
  1549. ptrdiff_t idx;
  1550. /* Have we already looked up the basename? */
  1551. if (!have_basename && !ret) {
  1552. ret = php_basename(path, path_len, NULL, 0);
  1553. }
  1554. p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
  1555. idx = p ? (p - ZSTR_VAL(ret)) : (ptrdiff_t)ZSTR_LEN(ret);
  1556. add_assoc_stringl(&tmp, "filename", ZSTR_VAL(ret), idx);
  1557. }
  1558. if (ret) {
  1559. zend_string_release_ex(ret, 0);
  1560. }
  1561. if (opt == PHP_PATHINFO_ALL) {
  1562. RETURN_COPY_VALUE(&tmp);
  1563. } else {
  1564. zval *element;
  1565. if ((element = zend_hash_get_current_data(Z_ARRVAL(tmp))) != NULL) {
  1566. RETVAL_COPY_DEREF(element);
  1567. } else {
  1568. RETVAL_EMPTY_STRING();
  1569. }
  1570. zval_ptr_dtor(&tmp);
  1571. }
  1572. }
  1573. /* }}} */
  1574. /* {{{ php_stristr
  1575. case insensitive strstr */
  1576. PHPAPI char *php_stristr(char *s, char *t, size_t s_len, size_t t_len)
  1577. {
  1578. php_strtolower(s, s_len);
  1579. php_strtolower(t, t_len);
  1580. return (char*)php_memnstr(s, t, t_len, s + s_len);
  1581. }
  1582. /* }}} */
  1583. /* {{{ php_strspn */
  1584. PHPAPI size_t php_strspn(const char *s1, const char *s2, const char *s1_end, const char *s2_end)
  1585. {
  1586. const char *p = s1, *spanp;
  1587. char c = *p;
  1588. cont:
  1589. for (spanp = s2; p != s1_end && spanp != s2_end;) {
  1590. if (*spanp++ == c) {
  1591. c = *(++p);
  1592. goto cont;
  1593. }
  1594. }
  1595. return (p - s1);
  1596. }
  1597. /* }}} */
  1598. /* {{{ php_strcspn */
  1599. PHPAPI size_t php_strcspn(const char *s1, const char *s2, const char *s1_end, const char *s2_end)
  1600. {
  1601. const char *p, *spanp;
  1602. char c = *s1;
  1603. for (p = s1;;) {
  1604. spanp = s2;
  1605. do {
  1606. if (*spanp == c || p == s1_end) {
  1607. return p - s1;
  1608. }
  1609. } while (spanp++ < (s2_end - 1));
  1610. c = *++p;
  1611. }
  1612. /* NOTREACHED */
  1613. }
  1614. /* }}} */
  1615. /* {{{ Finds first occurrence of a string within another, case insensitive */
  1616. PHP_FUNCTION(stristr)
  1617. {
  1618. zend_string *haystack, *needle;
  1619. const char *found = NULL;
  1620. size_t found_offset;
  1621. char *haystack_dup;
  1622. char *orig_needle;
  1623. bool part = 0;
  1624. ZEND_PARSE_PARAMETERS_START(2, 3)
  1625. Z_PARAM_STR(haystack)
  1626. Z_PARAM_STR(needle)
  1627. Z_PARAM_OPTIONAL
  1628. Z_PARAM_BOOL(part)
  1629. ZEND_PARSE_PARAMETERS_END();
  1630. haystack_dup = estrndup(ZSTR_VAL(haystack), ZSTR_LEN(haystack));
  1631. orig_needle = estrndup(ZSTR_VAL(needle), ZSTR_LEN(needle));
  1632. found = php_stristr(haystack_dup, orig_needle, ZSTR_LEN(haystack), ZSTR_LEN(needle));
  1633. efree(orig_needle);
  1634. if (found) {
  1635. found_offset = found - haystack_dup;
  1636. if (part) {
  1637. RETVAL_STRINGL(ZSTR_VAL(haystack), found_offset);
  1638. } else {
  1639. RETVAL_STRINGL(ZSTR_VAL(haystack) + found_offset, ZSTR_LEN(haystack) - found_offset);
  1640. }
  1641. } else {
  1642. RETVAL_FALSE;
  1643. }
  1644. efree(haystack_dup);
  1645. }
  1646. /* }}} */
  1647. /* {{{ Finds first occurrence of a string within another */
  1648. PHP_FUNCTION(strstr)
  1649. {
  1650. zend_string *haystack, *needle;
  1651. const char *found = NULL;
  1652. zend_long found_offset;
  1653. bool part = 0;
  1654. ZEND_PARSE_PARAMETERS_START(2, 3)
  1655. Z_PARAM_STR(haystack)
  1656. Z_PARAM_STR(needle)
  1657. Z_PARAM_OPTIONAL
  1658. Z_PARAM_BOOL(part)
  1659. ZEND_PARSE_PARAMETERS_END();
  1660. found = php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
  1661. if (found) {
  1662. found_offset = found - ZSTR_VAL(haystack);
  1663. if (part) {
  1664. RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
  1665. } else {
  1666. RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
  1667. }
  1668. }
  1669. RETURN_FALSE;
  1670. }
  1671. /* }}} */
  1672. /* {{{ Checks if a string contains another */
  1673. PHP_FUNCTION(str_contains)
  1674. {
  1675. zend_string *haystack, *needle;
  1676. ZEND_PARSE_PARAMETERS_START(2, 2)
  1677. Z_PARAM_STR(haystack)
  1678. Z_PARAM_STR(needle)
  1679. ZEND_PARSE_PARAMETERS_END();
  1680. RETURN_BOOL(php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack)));
  1681. }
  1682. /* }}} */
  1683. /* {{{ Checks if haystack starts with needle */
  1684. PHP_FUNCTION(str_starts_with)
  1685. {
  1686. zend_string *haystack, *needle;
  1687. ZEND_PARSE_PARAMETERS_START(2, 2)
  1688. Z_PARAM_STR(haystack)
  1689. Z_PARAM_STR(needle)
  1690. ZEND_PARSE_PARAMETERS_END();
  1691. if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
  1692. RETURN_FALSE;
  1693. }
  1694. RETURN_BOOL(memcmp(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle)) == 0);
  1695. }
  1696. /* }}} */
  1697. /* {{{ Checks if haystack ends with needle */
  1698. PHP_FUNCTION(str_ends_with)
  1699. {
  1700. zend_string *haystack, *needle;
  1701. ZEND_PARSE_PARAMETERS_START(2, 2)
  1702. Z_PARAM_STR(haystack)
  1703. Z_PARAM_STR(needle)
  1704. ZEND_PARSE_PARAMETERS_END();
  1705. if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
  1706. RETURN_FALSE;
  1707. }
  1708. RETURN_BOOL(memcmp(
  1709. ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - ZSTR_LEN(needle),
  1710. ZSTR_VAL(needle), ZSTR_LEN(needle)) == 0);
  1711. }
  1712. /* }}} */
  1713. /* {{{ An alias for strstr */
  1714. /* }}} */
  1715. /* {{{ Finds position of first occurrence of a string within another */
  1716. PHP_FUNCTION(strpos)
  1717. {
  1718. zend_string *haystack, *needle;
  1719. const char *found = NULL;
  1720. zend_long offset = 0;
  1721. ZEND_PARSE_PARAMETERS_START(2, 3)
  1722. Z_PARAM_STR(haystack)
  1723. Z_PARAM_STR(needle)
  1724. Z_PARAM_OPTIONAL
  1725. Z_PARAM_LONG(offset)
  1726. ZEND_PARSE_PARAMETERS_END();
  1727. if (offset < 0) {
  1728. offset += (zend_long)ZSTR_LEN(haystack);
  1729. }
  1730. if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
  1731. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1732. RETURN_THROWS();
  1733. }
  1734. found = (char*)php_memnstr(ZSTR_VAL(haystack) + offset,
  1735. ZSTR_VAL(needle), ZSTR_LEN(needle),
  1736. ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
  1737. if (found) {
  1738. RETURN_LONG(found - ZSTR_VAL(haystack));
  1739. } else {
  1740. RETURN_FALSE;
  1741. }
  1742. }
  1743. /* }}} */
  1744. /* {{{ Finds position of first occurrence of a string within another, case insensitive */
  1745. PHP_FUNCTION(stripos)
  1746. {
  1747. const char *found = NULL;
  1748. zend_string *haystack, *needle;
  1749. zend_long offset = 0;
  1750. zend_string *needle_dup = NULL, *haystack_dup;
  1751. ZEND_PARSE_PARAMETERS_START(2, 3)
  1752. Z_PARAM_STR(haystack)
  1753. Z_PARAM_STR(needle)
  1754. Z_PARAM_OPTIONAL
  1755. Z_PARAM_LONG(offset)
  1756. ZEND_PARSE_PARAMETERS_END();
  1757. if (offset < 0) {
  1758. offset += (zend_long)ZSTR_LEN(haystack);
  1759. }
  1760. if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
  1761. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1762. RETURN_THROWS();
  1763. }
  1764. if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
  1765. RETURN_FALSE;
  1766. }
  1767. haystack_dup = php_string_tolower(haystack);
  1768. needle_dup = php_string_tolower(needle);
  1769. found = (char*)php_memnstr(ZSTR_VAL(haystack_dup) + offset,
  1770. ZSTR_VAL(needle_dup), ZSTR_LEN(needle_dup), ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack));
  1771. if (found) {
  1772. RETVAL_LONG(found - ZSTR_VAL(haystack_dup));
  1773. } else {
  1774. RETVAL_FALSE;
  1775. }
  1776. zend_string_release_ex(haystack_dup, 0);
  1777. zend_string_release_ex(needle_dup, 0);
  1778. }
  1779. /* }}} */
  1780. /* {{{ Finds position of last occurrence of a string within another string */
  1781. PHP_FUNCTION(strrpos)
  1782. {
  1783. zend_string *needle;
  1784. zend_string *haystack;
  1785. zend_long offset = 0;
  1786. const char *p, *e, *found;
  1787. ZEND_PARSE_PARAMETERS_START(2, 3)
  1788. Z_PARAM_STR(haystack)
  1789. Z_PARAM_STR(needle)
  1790. Z_PARAM_OPTIONAL
  1791. Z_PARAM_LONG(offset)
  1792. ZEND_PARSE_PARAMETERS_END();
  1793. if (offset >= 0) {
  1794. if ((size_t)offset > ZSTR_LEN(haystack)) {
  1795. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1796. RETURN_THROWS();
  1797. }
  1798. p = ZSTR_VAL(haystack) + (size_t)offset;
  1799. e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
  1800. } else {
  1801. if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
  1802. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1803. RETURN_THROWS();
  1804. }
  1805. p = ZSTR_VAL(haystack);
  1806. if ((size_t)-offset < ZSTR_LEN(needle)) {
  1807. e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
  1808. } else {
  1809. e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) + offset + ZSTR_LEN(needle);
  1810. }
  1811. }
  1812. if ((found = zend_memnrstr(p, ZSTR_VAL(needle), ZSTR_LEN(needle), e))) {
  1813. RETURN_LONG(found - ZSTR_VAL(haystack));
  1814. }
  1815. RETURN_FALSE;
  1816. }
  1817. /* }}} */
  1818. /* {{{ Finds position of last occurrence of a string within another string */
  1819. PHP_FUNCTION(strripos)
  1820. {
  1821. zend_string *needle;
  1822. zend_string *haystack;
  1823. zend_long offset = 0;
  1824. const char *p, *e, *found;
  1825. zend_string *needle_dup, *haystack_dup;
  1826. ZEND_PARSE_PARAMETERS_START(2, 3)
  1827. Z_PARAM_STR(haystack)
  1828. Z_PARAM_STR(needle)
  1829. Z_PARAM_OPTIONAL
  1830. Z_PARAM_LONG(offset)
  1831. ZEND_PARSE_PARAMETERS_END();
  1832. if (ZSTR_LEN(needle) == 1) {
  1833. /* Single character search can shortcut memcmps
  1834. Can also avoid tolower emallocs */
  1835. char lowered;
  1836. if (offset >= 0) {
  1837. if ((size_t)offset > ZSTR_LEN(haystack)) {
  1838. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1839. RETURN_THROWS();
  1840. }
  1841. p = ZSTR_VAL(haystack) + (size_t)offset;
  1842. e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - 1;
  1843. } else {
  1844. p = ZSTR_VAL(haystack);
  1845. if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
  1846. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1847. RETURN_THROWS();
  1848. }
  1849. e = ZSTR_VAL(haystack) + (ZSTR_LEN(haystack) + (size_t)offset);
  1850. }
  1851. /* Borrow that ord_needle buffer to avoid repeatedly tolower()ing needle */
  1852. lowered = tolower(*ZSTR_VAL(needle));
  1853. while (e >= p) {
  1854. if (tolower(*e) == lowered) {
  1855. RETURN_LONG(e - p + (offset > 0 ? offset : 0));
  1856. }
  1857. e--;
  1858. }
  1859. RETURN_FALSE;
  1860. }
  1861. haystack_dup = php_string_tolower(haystack);
  1862. if (offset >= 0) {
  1863. if ((size_t)offset > ZSTR_LEN(haystack)) {
  1864. zend_string_release_ex(haystack_dup, 0);
  1865. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1866. RETURN_THROWS();
  1867. }
  1868. p = ZSTR_VAL(haystack_dup) + offset;
  1869. e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
  1870. } else {
  1871. if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
  1872. zend_string_release_ex(haystack_dup, 0);
  1873. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1874. RETURN_THROWS();
  1875. }
  1876. p = ZSTR_VAL(haystack_dup);
  1877. if ((size_t)-offset < ZSTR_LEN(needle)) {
  1878. e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
  1879. } else {
  1880. e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack) + offset + ZSTR_LEN(needle);
  1881. }
  1882. }
  1883. needle_dup = php_string_tolower(needle);
  1884. if ((found = (char *)zend_memnrstr(p, ZSTR_VAL(needle_dup), ZSTR_LEN(needle_dup), e))) {
  1885. RETVAL_LONG(found - ZSTR_VAL(haystack_dup));
  1886. zend_string_release_ex(needle_dup, 0);
  1887. zend_string_release_ex(haystack_dup, 0);
  1888. } else {
  1889. zend_string_release_ex(needle_dup, 0);
  1890. zend_string_release_ex(haystack_dup, 0);
  1891. RETURN_FALSE;
  1892. }
  1893. }
  1894. /* }}} */
  1895. /* {{{ Finds the last occurrence of a character in a string within another */
  1896. PHP_FUNCTION(strrchr)
  1897. {
  1898. zend_string *haystack, *needle;
  1899. const char *found = NULL;
  1900. zend_long found_offset;
  1901. ZEND_PARSE_PARAMETERS_START(2, 2)
  1902. Z_PARAM_STR(haystack)
  1903. Z_PARAM_STR(needle)
  1904. ZEND_PARSE_PARAMETERS_END();
  1905. found = zend_memrchr(ZSTR_VAL(haystack), *ZSTR_VAL(needle), ZSTR_LEN(haystack));
  1906. if (found) {
  1907. found_offset = found - ZSTR_VAL(haystack);
  1908. RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
  1909. } else {
  1910. RETURN_FALSE;
  1911. }
  1912. }
  1913. /* }}} */
  1914. /* {{{ php_chunk_split */
  1915. static zend_string *php_chunk_split(const char *src, size_t srclen, const char *end, size_t endlen, size_t chunklen)
  1916. {
  1917. char *q;
  1918. const char *p;
  1919. size_t chunks;
  1920. size_t restlen;
  1921. zend_string *dest;
  1922. chunks = srclen / chunklen;
  1923. restlen = srclen - chunks * chunklen; /* srclen % chunklen */
  1924. if (restlen) {
  1925. /* We want chunks to be rounded up rather than rounded down.
  1926. * Increment can't overflow because chunks <= SIZE_MAX/2 at this point. */
  1927. chunks++;
  1928. }
  1929. dest = zend_string_safe_alloc(chunks, endlen, srclen, 0);
  1930. for (p = src, q = ZSTR_VAL(dest); p < (src + srclen - chunklen + 1); ) {
  1931. memcpy(q, p, chunklen);
  1932. q += chunklen;
  1933. memcpy(q, end, endlen);
  1934. q += endlen;
  1935. p += chunklen;
  1936. }
  1937. if (restlen) {
  1938. memcpy(q, p, restlen);
  1939. q += restlen;
  1940. memcpy(q, end, endlen);
  1941. q += endlen;
  1942. }
  1943. *q = '\0';
  1944. ZEND_ASSERT(q - ZSTR_VAL(dest) == ZSTR_LEN(dest));
  1945. return dest;
  1946. }
  1947. /* }}} */
  1948. /* {{{ Returns split line */
  1949. PHP_FUNCTION(chunk_split)
  1950. {
  1951. zend_string *str;
  1952. char *end = "\r\n";
  1953. size_t endlen = 2;
  1954. zend_long chunklen = 76;
  1955. zend_string *result;
  1956. ZEND_PARSE_PARAMETERS_START(1, 3)
  1957. Z_PARAM_STR(str)
  1958. Z_PARAM_OPTIONAL
  1959. Z_PARAM_LONG(chunklen)
  1960. Z_PARAM_STRING(end, endlen)
  1961. ZEND_PARSE_PARAMETERS_END();
  1962. if (chunklen <= 0) {
  1963. zend_argument_value_error(2, "must be greater than 0");
  1964. RETURN_THROWS();
  1965. }
  1966. if ((size_t)chunklen > ZSTR_LEN(str)) {
  1967. /* to maintain BC, we must return original string + ending */
  1968. result = zend_string_safe_alloc(ZSTR_LEN(str), 1, endlen, 0);
  1969. memcpy(ZSTR_VAL(result), ZSTR_VAL(str), ZSTR_LEN(str));
  1970. memcpy(ZSTR_VAL(result) + ZSTR_LEN(str), end, endlen);
  1971. ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
  1972. RETURN_NEW_STR(result);
  1973. }
  1974. if (!ZSTR_LEN(str)) {
  1975. RETURN_EMPTY_STRING();
  1976. }
  1977. result = php_chunk_split(ZSTR_VAL(str), ZSTR_LEN(str), end, endlen, (size_t)chunklen);
  1978. RETURN_STR(result);
  1979. }
  1980. /* }}} */
  1981. /* {{{ Returns part of a string */
  1982. PHP_FUNCTION(substr)
  1983. {
  1984. zend_string *str;
  1985. zend_long l = 0, f;
  1986. bool len_is_null = 1;
  1987. ZEND_PARSE_PARAMETERS_START(2, 3)
  1988. Z_PARAM_STR(str)
  1989. Z_PARAM_LONG(f)
  1990. Z_PARAM_OPTIONAL
  1991. Z_PARAM_LONG_OR_NULL(l, len_is_null)
  1992. ZEND_PARSE_PARAMETERS_END();
  1993. if (f < 0) {
  1994. /* if "from" position is negative, count start position from the end
  1995. * of the string
  1996. */
  1997. if (-(size_t)f > ZSTR_LEN(str)) {
  1998. f = 0;
  1999. } else {
  2000. f = (zend_long)ZSTR_LEN(str) + f;
  2001. }
  2002. } else if ((size_t)f > ZSTR_LEN(str)) {
  2003. RETURN_EMPTY_STRING();
  2004. }
  2005. if (!len_is_null) {
  2006. if (l < 0) {
  2007. /* if "length" position is negative, set it to the length
  2008. * needed to stop that many chars from the end of the string
  2009. */
  2010. if (-(size_t)l > ZSTR_LEN(str) - (size_t)f) {
  2011. l = 0;
  2012. } else {
  2013. l = (zend_long)ZSTR_LEN(str) - f + l;
  2014. }
  2015. } else if ((size_t)l > ZSTR_LEN(str) - (size_t)f) {
  2016. l = (zend_long)ZSTR_LEN(str) - f;
  2017. }
  2018. } else {
  2019. l = (zend_long)ZSTR_LEN(str) - f;
  2020. }
  2021. if (l == ZSTR_LEN(str)) {
  2022. RETURN_STR_COPY(str);
  2023. } else {
  2024. RETURN_STRINGL_FAST(ZSTR_VAL(str) + f, l);
  2025. }
  2026. }
  2027. /* }}} */
  2028. /* {{{ Replaces part of a string with another string */
  2029. PHP_FUNCTION(substr_replace)
  2030. {
  2031. zend_string *str, *repl_str;
  2032. HashTable *str_ht, *repl_ht;
  2033. HashTable *from_ht;
  2034. zend_long from_long;
  2035. HashTable *len_ht = NULL;
  2036. zend_long len_long;
  2037. bool len_is_null = 1;
  2038. zend_long l = 0;
  2039. zend_long f;
  2040. zend_string *result;
  2041. HashPosition from_idx, repl_idx, len_idx;
  2042. zval *tmp_str = NULL, *tmp_repl, *tmp_from = NULL, *tmp_len= NULL;
  2043. ZEND_PARSE_PARAMETERS_START(3, 4)
  2044. Z_PARAM_ARRAY_HT_OR_STR(str_ht, str)
  2045. Z_PARAM_ARRAY_HT_OR_STR(repl_ht, repl_str)
  2046. Z_PARAM_ARRAY_HT_OR_LONG(from_ht, from_long)
  2047. Z_PARAM_OPTIONAL
  2048. Z_PARAM_ARRAY_HT_OR_LONG_OR_NULL(len_ht, len_long, len_is_null)
  2049. ZEND_PARSE_PARAMETERS_END();
  2050. if (len_is_null) {
  2051. if (str) {
  2052. l = ZSTR_LEN(str);
  2053. }
  2054. } else if (!len_ht) {
  2055. l = len_long;
  2056. }
  2057. if (str) {
  2058. if (from_ht) {
  2059. zend_argument_type_error(3, "cannot be an array when working on a single string");
  2060. RETURN_THROWS();
  2061. }
  2062. if (len_ht) {
  2063. zend_argument_type_error(4, "cannot be an array when working on a single string");
  2064. RETURN_THROWS();
  2065. }
  2066. f = from_long;
  2067. /* if "from" position is negative, count start position from the end
  2068. * of the string
  2069. */
  2070. if (f < 0) {
  2071. f = (zend_long)ZSTR_LEN(str) + f;
  2072. if (f < 0) {
  2073. f = 0;
  2074. }
  2075. } else if ((size_t)f > ZSTR_LEN(str)) {
  2076. f = ZSTR_LEN(str);
  2077. }
  2078. /* if "length" position is negative, set it to the length
  2079. * needed to stop that many chars from the end of the string
  2080. */
  2081. if (l < 0) {
  2082. l = ((zend_long)ZSTR_LEN(str) - f) + l;
  2083. if (l < 0) {
  2084. l = 0;
  2085. }
  2086. }
  2087. if ((size_t)l > ZSTR_LEN(str) || (l < 0 && (size_t)(-l) > ZSTR_LEN(str))) {
  2088. l = ZSTR_LEN(str);
  2089. }
  2090. if ((f + l) > (zend_long)ZSTR_LEN(str)) {
  2091. l = ZSTR_LEN(str) - f;
  2092. }
  2093. zend_string *tmp_repl_str = NULL;
  2094. if (repl_ht) {
  2095. repl_idx = 0;
  2096. while (repl_idx < repl_ht->nNumUsed) {
  2097. tmp_repl = &repl_ht->arData[repl_idx].val;
  2098. if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
  2099. break;
  2100. }
  2101. repl_idx++;
  2102. }
  2103. if (repl_idx < repl_ht->nNumUsed) {
  2104. repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
  2105. } else {
  2106. repl_str = STR_EMPTY_ALLOC();
  2107. }
  2108. }
  2109. result = zend_string_safe_alloc(1, ZSTR_LEN(str) - l + ZSTR_LEN(repl_str), 0, 0);
  2110. memcpy(ZSTR_VAL(result), ZSTR_VAL(str), f);
  2111. if (ZSTR_LEN(repl_str)) {
  2112. memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
  2113. }
  2114. memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(str) + f + l, ZSTR_LEN(str) - f - l);
  2115. ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
  2116. zend_tmp_string_release(tmp_repl_str);
  2117. RETURN_NEW_STR(result);
  2118. } else { /* str is array of strings */
  2119. zend_string *str_index = NULL;
  2120. size_t result_len;
  2121. zend_ulong num_index;
  2122. /* TODO
  2123. if (!len_is_null && from_ht) {
  2124. if (zend_hash_num_elements(from_ht) != zend_hash_num_elements(len_ht)) {
  2125. php_error_docref(NULL, E_WARNING, "'start' and 'length' should have the same number of elements");
  2126. RETURN_STR_COPY(str);
  2127. }
  2128. }
  2129. */
  2130. array_init(return_value);
  2131. from_idx = len_idx = repl_idx = 0;
  2132. ZEND_HASH_FOREACH_KEY_VAL(str_ht, num_index, str_index, tmp_str) {
  2133. zend_string *tmp_orig_str;
  2134. zend_string *orig_str = zval_get_tmp_string(tmp_str, &tmp_orig_str);
  2135. if (from_ht) {
  2136. while (from_idx < from_ht->nNumUsed) {
  2137. tmp_from = &from_ht->arData[from_idx].val;
  2138. if (Z_TYPE_P(tmp_from) != IS_UNDEF) {
  2139. break;
  2140. }
  2141. from_idx++;
  2142. }
  2143. if (from_idx < from_ht->nNumUsed) {
  2144. f = zval_get_long(tmp_from);
  2145. if (f < 0) {
  2146. f = (zend_long)ZSTR_LEN(orig_str) + f;
  2147. if (f < 0) {
  2148. f = 0;
  2149. }
  2150. } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
  2151. f = ZSTR_LEN(orig_str);
  2152. }
  2153. from_idx++;
  2154. } else {
  2155. f = 0;
  2156. }
  2157. } else {
  2158. f = from_long;
  2159. if (f < 0) {
  2160. f = (zend_long)ZSTR_LEN(orig_str) + f;
  2161. if (f < 0) {
  2162. f = 0;
  2163. }
  2164. } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
  2165. f = ZSTR_LEN(orig_str);
  2166. }
  2167. }
  2168. if (len_ht) {
  2169. while (len_idx < len_ht->nNumUsed) {
  2170. tmp_len = &len_ht->arData[len_idx].val;
  2171. if (Z_TYPE_P(tmp_len) != IS_UNDEF) {
  2172. break;
  2173. }
  2174. len_idx++;
  2175. }
  2176. if (len_idx < len_ht->nNumUsed) {
  2177. l = zval_get_long(tmp_len);
  2178. len_idx++;
  2179. } else {
  2180. l = ZSTR_LEN(orig_str);
  2181. }
  2182. } else if (!len_is_null) {
  2183. l = len_long;
  2184. } else {
  2185. l = ZSTR_LEN(orig_str);
  2186. }
  2187. if (l < 0) {
  2188. l = (ZSTR_LEN(orig_str) - f) + l;
  2189. if (l < 0) {
  2190. l = 0;
  2191. }
  2192. }
  2193. ZEND_ASSERT(0 <= f && f <= ZEND_LONG_MAX);
  2194. ZEND_ASSERT(0 <= l && l <= ZEND_LONG_MAX);
  2195. if (((size_t) f + l) > ZSTR_LEN(orig_str)) {
  2196. l = ZSTR_LEN(orig_str) - f;
  2197. }
  2198. result_len = ZSTR_LEN(orig_str) - l;
  2199. if (repl_ht) {
  2200. while (repl_idx < repl_ht->nNumUsed) {
  2201. tmp_repl = &repl_ht->arData[repl_idx].val;
  2202. if (repl_ht != IS_UNDEF) {
  2203. break;
  2204. }
  2205. repl_idx++;
  2206. }
  2207. if (repl_idx < repl_ht->nNumUsed) {
  2208. zend_string *tmp_repl_str;
  2209. zend_string *repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
  2210. result_len += ZSTR_LEN(repl_str);
  2211. repl_idx++;
  2212. result = zend_string_safe_alloc(1, result_len, 0, 0);
  2213. memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
  2214. memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
  2215. memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
  2216. zend_tmp_string_release(tmp_repl_str);
  2217. } else {
  2218. result = zend_string_safe_alloc(1, result_len, 0, 0);
  2219. memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
  2220. memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
  2221. }
  2222. } else {
  2223. result_len += ZSTR_LEN(repl_str);
  2224. result = zend_string_safe_alloc(1, result_len, 0, 0);
  2225. memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
  2226. memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
  2227. memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
  2228. }
  2229. ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
  2230. if (str_index) {
  2231. zval tmp;
  2232. ZVAL_NEW_STR(&tmp, result);
  2233. zend_symtable_update(Z_ARRVAL_P(return_value), str_index, &tmp);
  2234. } else {
  2235. add_index_str(return_value, num_index, result);
  2236. }
  2237. zend_tmp_string_release(tmp_orig_str);
  2238. } ZEND_HASH_FOREACH_END();
  2239. } /* if */
  2240. }
  2241. /* }}} */
  2242. /* {{{ Quotes meta characters */
  2243. PHP_FUNCTION(quotemeta)
  2244. {
  2245. zend_string *old;
  2246. const char *old_end, *p;
  2247. char *q;
  2248. char c;
  2249. zend_string *str;
  2250. ZEND_PARSE_PARAMETERS_START(1, 1)
  2251. Z_PARAM_STR(old)
  2252. ZEND_PARSE_PARAMETERS_END();
  2253. old_end = ZSTR_VAL(old) + ZSTR_LEN(old);
  2254. if (ZSTR_LEN(old) == 0) {
  2255. RETURN_EMPTY_STRING();
  2256. }
  2257. str = zend_string_safe_alloc(2, ZSTR_LEN(old), 0, 0);
  2258. for (p = ZSTR_VAL(old), q = ZSTR_VAL(str); p != old_end; p++) {
  2259. c = *p;
  2260. switch (c) {
  2261. case '.':
  2262. case '\\':
  2263. case '+':
  2264. case '*':
  2265. case '?':
  2266. case '[':
  2267. case '^':
  2268. case ']':
  2269. case '$':
  2270. case '(':
  2271. case ')':
  2272. *q++ = '\\';
  2273. ZEND_FALLTHROUGH;
  2274. default:
  2275. *q++ = c;
  2276. }
  2277. }
  2278. *q = '\0';
  2279. RETURN_NEW_STR(zend_string_truncate(str, q - ZSTR_VAL(str), 0));
  2280. }
  2281. /* }}} */
  2282. /* {{{ Returns ASCII value of character
  2283. Warning: This function is special-cased by zend_compile.c and so is bypassed for constant string argument */
  2284. PHP_FUNCTION(ord)
  2285. {
  2286. zend_string *str;
  2287. ZEND_PARSE_PARAMETERS_START(1, 1)
  2288. Z_PARAM_STR(str)
  2289. ZEND_PARSE_PARAMETERS_END();
  2290. RETURN_LONG((unsigned char) ZSTR_VAL(str)[0]);
  2291. }
  2292. /* }}} */
  2293. /* {{{ Converts ASCII code to a character
  2294. Warning: This function is special-cased by zend_compile.c and so is bypassed for constant integer argument */
  2295. PHP_FUNCTION(chr)
  2296. {
  2297. zend_long c;
  2298. ZEND_PARSE_PARAMETERS_START(1, 1)
  2299. Z_PARAM_LONG(c)
  2300. ZEND_PARSE_PARAMETERS_END();
  2301. c &= 0xff;
  2302. RETURN_CHAR(c);
  2303. }
  2304. /* }}} */
  2305. /* {{{ php_ucfirst
  2306. Uppercase the first character of the word in a native string */
  2307. static zend_string* php_ucfirst(zend_string *str)
  2308. {
  2309. const unsigned char ch = ZSTR_VAL(str)[0];
  2310. unsigned char r = toupper(ch);
  2311. if (r == ch) {
  2312. return zend_string_copy(str);
  2313. } else {
  2314. zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
  2315. ZSTR_VAL(s)[0] = r;
  2316. return s;
  2317. }
  2318. }
  2319. /* }}} */
  2320. /* {{{ Makes a string's first character uppercase */
  2321. PHP_FUNCTION(ucfirst)
  2322. {
  2323. zend_string *str;
  2324. ZEND_PARSE_PARAMETERS_START(1, 1)
  2325. Z_PARAM_STR(str)
  2326. ZEND_PARSE_PARAMETERS_END();
  2327. if (!ZSTR_LEN(str)) {
  2328. RETURN_EMPTY_STRING();
  2329. }
  2330. RETURN_STR(php_ucfirst(str));
  2331. }
  2332. /* }}} */
  2333. /* {{{
  2334. Lowercase the first character of the word in a native string */
  2335. static zend_string* php_lcfirst(zend_string *str)
  2336. {
  2337. unsigned char r = tolower(ZSTR_VAL(str)[0]);
  2338. if (r == ZSTR_VAL(str)[0]) {
  2339. return zend_string_copy(str);
  2340. } else {
  2341. zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
  2342. ZSTR_VAL(s)[0] = r;
  2343. return s;
  2344. }
  2345. }
  2346. /* }}} */
  2347. /* {{{ Make a string's first character lowercase */
  2348. PHP_FUNCTION(lcfirst)
  2349. {
  2350. zend_string *str;
  2351. ZEND_PARSE_PARAMETERS_START(1, 1)
  2352. Z_PARAM_STR(str)
  2353. ZEND_PARSE_PARAMETERS_END();
  2354. if (!ZSTR_LEN(str)) {
  2355. RETURN_EMPTY_STRING();
  2356. }
  2357. RETURN_STR(php_lcfirst(str));
  2358. }
  2359. /* }}} */
  2360. /* {{{ Uppercase the first character of every word in a string */
  2361. PHP_FUNCTION(ucwords)
  2362. {
  2363. zend_string *str;
  2364. char *delims = " \t\r\n\f\v";
  2365. char *r;
  2366. const char *r_end;
  2367. size_t delims_len = 6;
  2368. char mask[256];
  2369. ZEND_PARSE_PARAMETERS_START(1, 2)
  2370. Z_PARAM_STR(str)
  2371. Z_PARAM_OPTIONAL
  2372. Z_PARAM_STRING(delims, delims_len)
  2373. ZEND_PARSE_PARAMETERS_END();
  2374. if (!ZSTR_LEN(str)) {
  2375. RETURN_EMPTY_STRING();
  2376. }
  2377. php_charmask((const unsigned char *) delims, delims_len, mask);
  2378. ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
  2379. r = Z_STRVAL_P(return_value);
  2380. *r = toupper((unsigned char) *r);
  2381. for (r_end = r + Z_STRLEN_P(return_value) - 1; r < r_end; ) {
  2382. if (mask[(unsigned char)*r++]) {
  2383. *r = toupper((unsigned char) *r);
  2384. }
  2385. }
  2386. }
  2387. /* }}} */
  2388. /* {{{ php_strtr */
  2389. PHPAPI char *php_strtr(char *str, size_t len, const char *str_from, const char *str_to, size_t trlen)
  2390. {
  2391. size_t i;
  2392. if (UNEXPECTED(trlen < 1)) {
  2393. return str;
  2394. } else if (trlen == 1) {
  2395. char ch_from = *str_from;
  2396. char ch_to = *str_to;
  2397. for (i = 0; i < len; i++) {
  2398. if (str[i] == ch_from) {
  2399. str[i] = ch_to;
  2400. }
  2401. }
  2402. } else {
  2403. unsigned char xlat[256];
  2404. memset(xlat, 0, sizeof(xlat));
  2405. for (i = 0; i < trlen; i++) {
  2406. xlat[(size_t)(unsigned char) str_from[i]] = str_to[i] - str_from[i];
  2407. }
  2408. for (i = 0; i < len; i++) {
  2409. str[i] += xlat[(size_t)(unsigned char) str[i]];
  2410. }
  2411. }
  2412. return str;
  2413. }
  2414. /* }}} */
  2415. /* {{{ php_strtr_ex */
  2416. static zend_string *php_strtr_ex(zend_string *str, const char *str_from, const char *str_to, size_t trlen)
  2417. {
  2418. zend_string *new_str = NULL;
  2419. size_t i;
  2420. if (UNEXPECTED(trlen < 1)) {
  2421. return zend_string_copy(str);
  2422. } else if (trlen == 1) {
  2423. char ch_from = *str_from;
  2424. char ch_to = *str_to;
  2425. for (i = 0; i < ZSTR_LEN(str); i++) {
  2426. if (ZSTR_VAL(str)[i] == ch_from) {
  2427. new_str = zend_string_alloc(ZSTR_LEN(str), 0);
  2428. memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), i);
  2429. ZSTR_VAL(new_str)[i] = ch_to;
  2430. i++;
  2431. for (; i < ZSTR_LEN(str); i++) {
  2432. ZSTR_VAL(new_str)[i] = (ZSTR_VAL(str)[i] != ch_from) ? ZSTR_VAL(str)[i] : ch_to;
  2433. }
  2434. ZSTR_VAL(new_str)[i] = 0;
  2435. return new_str;
  2436. }
  2437. }
  2438. } else {
  2439. unsigned char xlat[256];
  2440. memset(xlat, 0, sizeof(xlat));;
  2441. for (i = 0; i < trlen; i++) {
  2442. xlat[(size_t)(unsigned char) str_from[i]] = str_to[i] - str_from[i];
  2443. }
  2444. for (i = 0; i < ZSTR_LEN(str); i++) {
  2445. if (xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]]) {
  2446. new_str = zend_string_alloc(ZSTR_LEN(str), 0);
  2447. memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), i);
  2448. do {
  2449. ZSTR_VAL(new_str)[i] = ZSTR_VAL(str)[i] + xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]];
  2450. i++;
  2451. } while (i < ZSTR_LEN(str));
  2452. ZSTR_VAL(new_str)[i] = 0;
  2453. return new_str;
  2454. }
  2455. }
  2456. }
  2457. return zend_string_copy(str);
  2458. }
  2459. /* }}} */
  2460. /* {{{ php_strtr_array */
  2461. static void php_strtr_array(zval *return_value, zend_string *input, HashTable *pats)
  2462. {
  2463. const char *str = ZSTR_VAL(input);
  2464. size_t slen = ZSTR_LEN(input);
  2465. zend_ulong num_key;
  2466. zend_string *str_key;
  2467. size_t len, pos, old_pos;
  2468. int num_keys = 0;
  2469. size_t minlen = 128*1024;
  2470. size_t maxlen = 0;
  2471. HashTable str_hash;
  2472. zval *entry;
  2473. const char *key;
  2474. smart_str result = {0};
  2475. zend_ulong bitset[256/sizeof(zend_ulong)];
  2476. zend_ulong *num_bitset;
  2477. /* we will collect all possible key lengths */
  2478. num_bitset = ecalloc((slen + sizeof(zend_ulong)) / sizeof(zend_ulong), sizeof(zend_ulong));
  2479. memset(bitset, 0, sizeof(bitset));
  2480. /* check if original array has numeric keys */
  2481. ZEND_HASH_FOREACH_STR_KEY(pats, str_key) {
  2482. if (UNEXPECTED(!str_key)) {
  2483. num_keys = 1;
  2484. } else {
  2485. len = ZSTR_LEN(str_key);
  2486. if (UNEXPECTED(len < 1)) {
  2487. php_error_docref(NULL, E_WARNING, "Ignoring replacement of empty string");
  2488. continue;
  2489. } else if (UNEXPECTED(len > slen)) {
  2490. /* skip long patterns */
  2491. continue;
  2492. }
  2493. if (len > maxlen) {
  2494. maxlen = len;
  2495. }
  2496. if (len < minlen) {
  2497. minlen = len;
  2498. }
  2499. /* remember possible key length */
  2500. num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
  2501. bitset[((unsigned char)ZSTR_VAL(str_key)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(str_key)[0]) % sizeof(zend_ulong));
  2502. }
  2503. } ZEND_HASH_FOREACH_END();
  2504. if (UNEXPECTED(num_keys)) {
  2505. zend_string *key_used;
  2506. /* we have to rebuild HashTable with numeric keys */
  2507. zend_hash_init(&str_hash, zend_hash_num_elements(pats), NULL, NULL, 0);
  2508. ZEND_HASH_FOREACH_KEY_VAL(pats, num_key, str_key, entry) {
  2509. if (UNEXPECTED(!str_key)) {
  2510. key_used = zend_long_to_str(num_key);
  2511. len = ZSTR_LEN(key_used);
  2512. if (UNEXPECTED(len > slen)) {
  2513. /* skip long patterns */
  2514. zend_string_release(key_used);
  2515. continue;
  2516. }
  2517. if (len > maxlen) {
  2518. maxlen = len;
  2519. }
  2520. if (len < minlen) {
  2521. minlen = len;
  2522. }
  2523. /* remember possible key length */
  2524. num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
  2525. bitset[((unsigned char)ZSTR_VAL(key_used)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(key_used)[0]) % sizeof(zend_ulong));
  2526. } else {
  2527. key_used = str_key;
  2528. len = ZSTR_LEN(key_used);
  2529. if (UNEXPECTED(len > slen)) {
  2530. /* skip long patterns */
  2531. continue;
  2532. }
  2533. }
  2534. zend_hash_add(&str_hash, key_used, entry);
  2535. if (UNEXPECTED(!str_key)) {
  2536. zend_string_release_ex(key_used, 0);
  2537. }
  2538. } ZEND_HASH_FOREACH_END();
  2539. pats = &str_hash;
  2540. }
  2541. if (UNEXPECTED(minlen > maxlen)) {
  2542. /* return the original string */
  2543. if (pats == &str_hash) {
  2544. zend_hash_destroy(&str_hash);
  2545. }
  2546. efree(num_bitset);
  2547. RETURN_STR_COPY(input);
  2548. }
  2549. old_pos = pos = 0;
  2550. while (pos <= slen - minlen) {
  2551. key = str + pos;
  2552. if (bitset[((unsigned char)key[0]) / sizeof(zend_ulong)] & (Z_UL(1) << (((unsigned char)key[0]) % sizeof(zend_ulong)))) {
  2553. len = maxlen;
  2554. if (len > slen - pos) {
  2555. len = slen - pos;
  2556. }
  2557. while (len >= minlen) {
  2558. if ((num_bitset[len / sizeof(zend_ulong)] & (Z_UL(1) << (len % sizeof(zend_ulong))))) {
  2559. entry = zend_hash_str_find(pats, key, len);
  2560. if (entry != NULL) {
  2561. zend_string *tmp;
  2562. zend_string *s = zval_get_tmp_string(entry, &tmp);
  2563. smart_str_appendl(&result, str + old_pos, pos - old_pos);
  2564. smart_str_append(&result, s);
  2565. old_pos = pos + len;
  2566. pos = old_pos - 1;
  2567. zend_tmp_string_release(tmp);
  2568. break;
  2569. }
  2570. }
  2571. len--;
  2572. }
  2573. }
  2574. pos++;
  2575. }
  2576. if (result.s) {
  2577. smart_str_appendl(&result, str + old_pos, slen - old_pos);
  2578. smart_str_0(&result);
  2579. RETVAL_NEW_STR(result.s);
  2580. } else {
  2581. smart_str_free(&result);
  2582. RETVAL_STR_COPY(input);
  2583. }
  2584. if (pats == &str_hash) {
  2585. zend_hash_destroy(&str_hash);
  2586. }
  2587. efree(num_bitset);
  2588. }
  2589. /* }}} */
  2590. /* {{{ php_char_to_str_ex */
  2591. static zend_string* php_char_to_str_ex(zend_string *str, char from, char *to, size_t to_len, int case_sensitivity, zend_long *replace_count)
  2592. {
  2593. zend_string *result;
  2594. size_t char_count = 0;
  2595. int lc_from = 0;
  2596. const char *source, *source_end= ZSTR_VAL(str) + ZSTR_LEN(str);
  2597. char *target;
  2598. if (case_sensitivity) {
  2599. char *p = ZSTR_VAL(str), *e = p + ZSTR_LEN(str);
  2600. while ((p = memchr(p, from, (e - p)))) {
  2601. char_count++;
  2602. p++;
  2603. }
  2604. } else {
  2605. lc_from = tolower(from);
  2606. for (source = ZSTR_VAL(str); source < source_end; source++) {
  2607. if (tolower(*source) == lc_from) {
  2608. char_count++;
  2609. }
  2610. }
  2611. }
  2612. if (char_count == 0) {
  2613. return zend_string_copy(str);
  2614. }
  2615. if (to_len > 0) {
  2616. result = zend_string_safe_alloc(char_count, to_len - 1, ZSTR_LEN(str), 0);
  2617. } else {
  2618. result = zend_string_alloc(ZSTR_LEN(str) - char_count, 0);
  2619. }
  2620. target = ZSTR_VAL(result);
  2621. if (case_sensitivity) {
  2622. char *p = ZSTR_VAL(str), *e = p + ZSTR_LEN(str), *s = ZSTR_VAL(str);
  2623. while ((p = memchr(p, from, (e - p)))) {
  2624. memcpy(target, s, (p - s));
  2625. target += p - s;
  2626. memcpy(target, to, to_len);
  2627. target += to_len;
  2628. p++;
  2629. s = p;
  2630. if (replace_count) {
  2631. *replace_count += 1;
  2632. }
  2633. }
  2634. if (s < e) {
  2635. memcpy(target, s, (e - s));
  2636. target += e - s;
  2637. }
  2638. } else {
  2639. for (source = ZSTR_VAL(str); source < source_end; source++) {
  2640. if (tolower(*source) == lc_from) {
  2641. if (replace_count) {
  2642. *replace_count += 1;
  2643. }
  2644. memcpy(target, to, to_len);
  2645. target += to_len;
  2646. } else {
  2647. *target = *source;
  2648. target++;
  2649. }
  2650. }
  2651. }
  2652. *target = 0;
  2653. return result;
  2654. }
  2655. /* }}} */
  2656. /* {{{ php_str_to_str_ex */
  2657. static zend_string *php_str_to_str_ex(zend_string *haystack,
  2658. const char *needle, size_t needle_len, const char *str, size_t str_len, zend_long *replace_count)
  2659. {
  2660. if (needle_len < ZSTR_LEN(haystack)) {
  2661. zend_string *new_str;
  2662. const char *end;
  2663. const char *p, *r;
  2664. char *e;
  2665. if (needle_len == str_len) {
  2666. new_str = NULL;
  2667. end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
  2668. for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
  2669. if (!new_str) {
  2670. new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
  2671. }
  2672. memcpy(ZSTR_VAL(new_str) + (r - ZSTR_VAL(haystack)), str, str_len);
  2673. (*replace_count)++;
  2674. }
  2675. if (!new_str) {
  2676. goto nothing_todo;
  2677. }
  2678. return new_str;
  2679. } else {
  2680. size_t count = 0;
  2681. const char *o = ZSTR_VAL(haystack);
  2682. const char *n = needle;
  2683. const char *endp = o + ZSTR_LEN(haystack);
  2684. while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
  2685. o += needle_len;
  2686. count++;
  2687. }
  2688. if (count == 0) {
  2689. /* Needle doesn't occur, shortcircuit the actual replacement. */
  2690. goto nothing_todo;
  2691. }
  2692. if (str_len > needle_len) {
  2693. new_str = zend_string_safe_alloc(count, str_len - needle_len, ZSTR_LEN(haystack), 0);
  2694. } else {
  2695. new_str = zend_string_alloc(count * (str_len - needle_len) + ZSTR_LEN(haystack), 0);
  2696. }
  2697. e = ZSTR_VAL(new_str);
  2698. end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
  2699. for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
  2700. memcpy(e, p, r - p);
  2701. e += r - p;
  2702. memcpy(e, str, str_len);
  2703. e += str_len;
  2704. (*replace_count)++;
  2705. }
  2706. if (p < end) {
  2707. memcpy(e, p, end - p);
  2708. e += end - p;
  2709. }
  2710. *e = '\0';
  2711. return new_str;
  2712. }
  2713. } else if (needle_len > ZSTR_LEN(haystack) || memcmp(ZSTR_VAL(haystack), needle, ZSTR_LEN(haystack))) {
  2714. nothing_todo:
  2715. return zend_string_copy(haystack);
  2716. } else {
  2717. (*replace_count)++;
  2718. return zend_string_init_fast(str, str_len);
  2719. }
  2720. }
  2721. /* }}} */
  2722. /* {{{ php_str_to_str_i_ex */
  2723. static zend_string *php_str_to_str_i_ex(zend_string *haystack, const char *lc_haystack,
  2724. zend_string *needle, const char *str, size_t str_len, zend_long *replace_count)
  2725. {
  2726. zend_string *new_str = NULL;
  2727. zend_string *lc_needle;
  2728. if (ZSTR_LEN(needle) < ZSTR_LEN(haystack)) {
  2729. const char *end;
  2730. const char *p, *r;
  2731. char *e;
  2732. if (ZSTR_LEN(needle) == str_len) {
  2733. lc_needle = php_string_tolower(needle);
  2734. end = lc_haystack + ZSTR_LEN(haystack);
  2735. for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
  2736. if (!new_str) {
  2737. new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
  2738. }
  2739. memcpy(ZSTR_VAL(new_str) + (r - lc_haystack), str, str_len);
  2740. (*replace_count)++;
  2741. }
  2742. zend_string_release_ex(lc_needle, 0);
  2743. if (!new_str) {
  2744. goto nothing_todo;
  2745. }
  2746. return new_str;
  2747. } else {
  2748. size_t count = 0;
  2749. const char *o = lc_haystack;
  2750. const char *n;
  2751. const char *endp = o + ZSTR_LEN(haystack);
  2752. lc_needle = php_string_tolower(needle);
  2753. n = ZSTR_VAL(lc_needle);
  2754. while ((o = (char*)php_memnstr(o, n, ZSTR_LEN(lc_needle), endp))) {
  2755. o += ZSTR_LEN(lc_needle);
  2756. count++;
  2757. }
  2758. if (count == 0) {
  2759. /* Needle doesn't occur, shortcircuit the actual replacement. */
  2760. zend_string_release_ex(lc_needle, 0);
  2761. goto nothing_todo;
  2762. }
  2763. if (str_len > ZSTR_LEN(lc_needle)) {
  2764. new_str = zend_string_safe_alloc(count, str_len - ZSTR_LEN(lc_needle), ZSTR_LEN(haystack), 0);
  2765. } else {
  2766. new_str = zend_string_alloc(count * (str_len - ZSTR_LEN(lc_needle)) + ZSTR_LEN(haystack), 0);
  2767. }
  2768. e = ZSTR_VAL(new_str);
  2769. end = lc_haystack + ZSTR_LEN(haystack);
  2770. for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
  2771. memcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), r - p);
  2772. e += r - p;
  2773. memcpy(e, str, str_len);
  2774. e += str_len;
  2775. (*replace_count)++;
  2776. }
  2777. if (p < end) {
  2778. memcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), end - p);
  2779. e += end - p;
  2780. }
  2781. *e = '\0';
  2782. zend_string_release_ex(lc_needle, 0);
  2783. return new_str;
  2784. }
  2785. } else if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
  2786. nothing_todo:
  2787. return zend_string_copy(haystack);
  2788. } else {
  2789. lc_needle = php_string_tolower(needle);
  2790. if (memcmp(lc_haystack, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle))) {
  2791. zend_string_release_ex(lc_needle, 0);
  2792. goto nothing_todo;
  2793. }
  2794. zend_string_release_ex(lc_needle, 0);
  2795. new_str = zend_string_init(str, str_len, 0);
  2796. (*replace_count)++;
  2797. return new_str;
  2798. }
  2799. }
  2800. /* }}} */
  2801. /* {{{ php_str_to_str */
  2802. PHPAPI zend_string *php_str_to_str(const char *haystack, size_t length, const char *needle, size_t needle_len, const char *str, size_t str_len)
  2803. {
  2804. zend_string *new_str;
  2805. if (needle_len < length) {
  2806. const char *end;
  2807. const char *s, *p;
  2808. char *e, *r;
  2809. if (needle_len == str_len) {
  2810. new_str = zend_string_init(haystack, length, 0);
  2811. end = ZSTR_VAL(new_str) + length;
  2812. for (p = ZSTR_VAL(new_str); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
  2813. memcpy(r, str, str_len);
  2814. }
  2815. return new_str;
  2816. } else {
  2817. if (str_len < needle_len) {
  2818. new_str = zend_string_alloc(length, 0);
  2819. } else {
  2820. size_t count = 0;
  2821. const char *o = haystack;
  2822. const char *n = needle;
  2823. const char *endp = o + length;
  2824. while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
  2825. o += needle_len;
  2826. count++;
  2827. }
  2828. if (count == 0) {
  2829. /* Needle doesn't occur, shortcircuit the actual replacement. */
  2830. new_str = zend_string_init(haystack, length, 0);
  2831. return new_str;
  2832. } else {
  2833. if (str_len > needle_len) {
  2834. new_str = zend_string_safe_alloc(count, str_len - needle_len, length, 0);
  2835. } else {
  2836. new_str = zend_string_alloc(count * (str_len - needle_len) + length, 0);
  2837. }
  2838. }
  2839. }
  2840. s = e = ZSTR_VAL(new_str);
  2841. end = haystack + length;
  2842. for (p = haystack; (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
  2843. memcpy(e, p, r - p);
  2844. e += r - p;
  2845. memcpy(e, str, str_len);
  2846. e += str_len;
  2847. }
  2848. if (p < end) {
  2849. memcpy(e, p, end - p);
  2850. e += end - p;
  2851. }
  2852. *e = '\0';
  2853. new_str = zend_string_truncate(new_str, e - s, 0);
  2854. return new_str;
  2855. }
  2856. } else if (needle_len > length || memcmp(haystack, needle, length)) {
  2857. new_str = zend_string_init(haystack, length, 0);
  2858. return new_str;
  2859. } else {
  2860. new_str = zend_string_init(str, str_len, 0);
  2861. return new_str;
  2862. }
  2863. }
  2864. /* }}} */
  2865. /* {{{ Translates characters in str using given translation tables */
  2866. PHP_FUNCTION(strtr)
  2867. {
  2868. zend_string *str, *from_str = NULL;
  2869. HashTable *from_ht = NULL;
  2870. char *to = NULL;
  2871. size_t to_len = 0;
  2872. ZEND_PARSE_PARAMETERS_START(2, 3)
  2873. Z_PARAM_STR(str)
  2874. Z_PARAM_ARRAY_HT_OR_STR(from_ht, from_str)
  2875. Z_PARAM_OPTIONAL
  2876. Z_PARAM_STRING_OR_NULL(to, to_len)
  2877. ZEND_PARSE_PARAMETERS_END();
  2878. if (!to && from_ht == NULL) {
  2879. zend_argument_type_error(2, "must be of type array, string given");
  2880. RETURN_THROWS();
  2881. } else if (to && from_str == NULL) {
  2882. zend_argument_type_error(2, "must be of type string, array given");
  2883. RETURN_THROWS();
  2884. }
  2885. /* shortcut for empty string */
  2886. if (ZSTR_LEN(str) == 0) {
  2887. RETURN_EMPTY_STRING();
  2888. }
  2889. if (!to) {
  2890. if (zend_hash_num_elements(from_ht) < 1) {
  2891. RETURN_STR_COPY(str);
  2892. } else if (zend_hash_num_elements(from_ht) == 1) {
  2893. zend_long num_key;
  2894. zend_string *str_key, *tmp_str, *replace, *tmp_replace;
  2895. zval *entry;
  2896. ZEND_HASH_FOREACH_KEY_VAL(from_ht, num_key, str_key, entry) {
  2897. tmp_str = NULL;
  2898. if (UNEXPECTED(!str_key)) {
  2899. str_key = tmp_str = zend_long_to_str(num_key);
  2900. }
  2901. replace = zval_get_tmp_string(entry, &tmp_replace);
  2902. if (ZSTR_LEN(str_key) < 1) {
  2903. php_error_docref(NULL, E_WARNING, "Ignoring replacement of empty string");
  2904. RETVAL_STR_COPY(str);
  2905. } else if (ZSTR_LEN(str_key) == 1) {
  2906. RETVAL_STR(php_char_to_str_ex(str,
  2907. ZSTR_VAL(str_key)[0],
  2908. ZSTR_VAL(replace),
  2909. ZSTR_LEN(replace),
  2910. 1,
  2911. NULL));
  2912. } else {
  2913. zend_long dummy;
  2914. RETVAL_STR(php_str_to_str_ex(str,
  2915. ZSTR_VAL(str_key), ZSTR_LEN(str_key),
  2916. ZSTR_VAL(replace), ZSTR_LEN(replace), &dummy));
  2917. }
  2918. zend_tmp_string_release(tmp_str);
  2919. zend_tmp_string_release(tmp_replace);
  2920. return;
  2921. } ZEND_HASH_FOREACH_END();
  2922. } else {
  2923. php_strtr_array(return_value, str, from_ht);
  2924. }
  2925. } else {
  2926. RETURN_STR(php_strtr_ex(str,
  2927. ZSTR_VAL(from_str),
  2928. to,
  2929. MIN(ZSTR_LEN(from_str), to_len)));
  2930. }
  2931. }
  2932. /* }}} */
  2933. /* {{{ Reverse a string */
  2934. #if ZEND_INTRIN_SSSE3_NATIVE
  2935. #include <tmmintrin.h>
  2936. #elif defined(__aarch64__)
  2937. #include <arm_neon.h>
  2938. #endif
  2939. PHP_FUNCTION(strrev)
  2940. {
  2941. zend_string *str;
  2942. const char *s, *e;
  2943. char *p;
  2944. zend_string *n;
  2945. ZEND_PARSE_PARAMETERS_START(1, 1)
  2946. Z_PARAM_STR(str)
  2947. ZEND_PARSE_PARAMETERS_END();
  2948. n = zend_string_alloc(ZSTR_LEN(str), 0);
  2949. p = ZSTR_VAL(n);
  2950. s = ZSTR_VAL(str);
  2951. e = s + ZSTR_LEN(str);
  2952. --e;
  2953. #if ZEND_INTRIN_SSSE3_NATIVE
  2954. if (e - s > 15) {
  2955. const __m128i map = _mm_set_epi8(
  2956. 0, 1, 2, 3,
  2957. 4, 5, 6, 7,
  2958. 8, 9, 10, 11,
  2959. 12, 13, 14, 15);
  2960. do {
  2961. const __m128i str = _mm_loadu_si128((__m128i *)(e - 15));
  2962. _mm_storeu_si128((__m128i *)p, _mm_shuffle_epi8(str, map));
  2963. p += 16;
  2964. e -= 16;
  2965. } while (e - s > 15);
  2966. }
  2967. #elif defined(__aarch64__)
  2968. if (e - s > 15) {
  2969. do {
  2970. const uint8x16_t str = vld1q_u8((uint8_t *)(e - 15));
  2971. /* Synthesize rev128 with a rev64 + ext. */
  2972. const uint8x16_t rev = vrev64q_u8(str);
  2973. const uint8x16_t ext = (uint8x16_t)
  2974. vextq_u64((uint64x2_t)rev, (uint64x2_t)rev, 1);
  2975. vst1q_u8((uint8_t *)p, ext);
  2976. p += 16;
  2977. e -= 16;
  2978. } while (e - s > 15);
  2979. }
  2980. #endif
  2981. while (e >= s) {
  2982. *p++ = *e--;
  2983. }
  2984. *p = '\0';
  2985. RETVAL_NEW_STR(n);
  2986. }
  2987. /* }}} */
  2988. /* {{{ php_similar_str */
  2989. static void php_similar_str(const char *txt1, size_t len1, const char *txt2, size_t len2, size_t *pos1, size_t *pos2, size_t *max, size_t *count)
  2990. {
  2991. const char *p, *q;
  2992. const char *end1 = (char *) txt1 + len1;
  2993. const char *end2 = (char *) txt2 + len2;
  2994. size_t l;
  2995. *max = 0;
  2996. *count = 0;
  2997. for (p = (char *) txt1; p < end1; p++) {
  2998. for (q = (char *) txt2; q < end2; q++) {
  2999. for (l = 0; (p + l < end1) && (q + l < end2) && (p[l] == q[l]); l++);
  3000. if (l > *max) {
  3001. *max = l;
  3002. *count += 1;
  3003. *pos1 = p - txt1;
  3004. *pos2 = q - txt2;
  3005. }
  3006. }
  3007. }
  3008. }
  3009. /* }}} */
  3010. /* {{{ php_similar_char */
  3011. static size_t php_similar_char(const char *txt1, size_t len1, const char *txt2, size_t len2)
  3012. {
  3013. size_t sum;
  3014. size_t pos1 = 0, pos2 = 0, max, count;
  3015. php_similar_str(txt1, len1, txt2, len2, &pos1, &pos2, &max, &count);
  3016. if ((sum = max)) {
  3017. if (pos1 && pos2 && count > 1) {
  3018. sum += php_similar_char(txt1, pos1,
  3019. txt2, pos2);
  3020. }
  3021. if ((pos1 + max < len1) && (pos2 + max < len2)) {
  3022. sum += php_similar_char(txt1 + pos1 + max, len1 - pos1 - max,
  3023. txt2 + pos2 + max, len2 - pos2 - max);
  3024. }
  3025. }
  3026. return sum;
  3027. }
  3028. /* }}} */
  3029. /* {{{ Calculates the similarity between two strings */
  3030. PHP_FUNCTION(similar_text)
  3031. {
  3032. zend_string *t1, *t2;
  3033. zval *percent = NULL;
  3034. int ac = ZEND_NUM_ARGS();
  3035. size_t sim;
  3036. ZEND_PARSE_PARAMETERS_START(2, 3)
  3037. Z_PARAM_STR(t1)
  3038. Z_PARAM_STR(t2)
  3039. Z_PARAM_OPTIONAL
  3040. Z_PARAM_ZVAL(percent)
  3041. ZEND_PARSE_PARAMETERS_END();
  3042. if (ZSTR_LEN(t1) + ZSTR_LEN(t2) == 0) {
  3043. if (ac > 2) {
  3044. ZEND_TRY_ASSIGN_REF_DOUBLE(percent, 0);
  3045. }
  3046. RETURN_LONG(0);
  3047. }
  3048. sim = php_similar_char(ZSTR_VAL(t1), ZSTR_LEN(t1), ZSTR_VAL(t2), ZSTR_LEN(t2));
  3049. if (ac > 2) {
  3050. ZEND_TRY_ASSIGN_REF_DOUBLE(percent, sim * 200.0 / (ZSTR_LEN(t1) + ZSTR_LEN(t2)));
  3051. }
  3052. RETURN_LONG(sim);
  3053. }
  3054. /* }}} */
  3055. /* {{{ Escapes all chars mentioned in charlist with backslash. It creates octal representations if asked to backslash characters with 8th bit set or with ASCII<32 (except '\n', '\r', '\t' etc...) */
  3056. PHP_FUNCTION(addcslashes)
  3057. {
  3058. zend_string *str, *what;
  3059. ZEND_PARSE_PARAMETERS_START(2, 2)
  3060. Z_PARAM_STR(str)
  3061. Z_PARAM_STR(what)
  3062. ZEND_PARSE_PARAMETERS_END();
  3063. if (ZSTR_LEN(str) == 0) {
  3064. RETURN_EMPTY_STRING();
  3065. }
  3066. if (ZSTR_LEN(what) == 0) {
  3067. RETURN_STR_COPY(str);
  3068. }
  3069. RETURN_STR(php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), ZSTR_VAL(what), ZSTR_LEN(what)));
  3070. }
  3071. /* }}} */
  3072. /* {{{ Escapes single quote, double quotes and backslash characters in a string with backslashes */
  3073. PHP_FUNCTION(addslashes)
  3074. {
  3075. zend_string *str;
  3076. ZEND_PARSE_PARAMETERS_START(1, 1)
  3077. Z_PARAM_STR(str)
  3078. ZEND_PARSE_PARAMETERS_END();
  3079. if (ZSTR_LEN(str) == 0) {
  3080. RETURN_EMPTY_STRING();
  3081. }
  3082. RETURN_STR(php_addslashes(str));
  3083. }
  3084. /* }}} */
  3085. /* {{{ Strips backslashes from a string. Uses C-style conventions */
  3086. PHP_FUNCTION(stripcslashes)
  3087. {
  3088. zend_string *str;
  3089. ZEND_PARSE_PARAMETERS_START(1, 1)
  3090. Z_PARAM_STR(str)
  3091. ZEND_PARSE_PARAMETERS_END();
  3092. ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
  3093. php_stripcslashes(Z_STR_P(return_value));
  3094. }
  3095. /* }}} */
  3096. /* {{{ Strips backslashes from a string */
  3097. PHP_FUNCTION(stripslashes)
  3098. {
  3099. zend_string *str;
  3100. ZEND_PARSE_PARAMETERS_START(1, 1)
  3101. Z_PARAM_STR(str)
  3102. ZEND_PARSE_PARAMETERS_END();
  3103. ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
  3104. php_stripslashes(Z_STR_P(return_value));
  3105. }
  3106. /* }}} */
  3107. /* {{{ php_stripcslashes */
  3108. PHPAPI void php_stripcslashes(zend_string *str)
  3109. {
  3110. const char *source, *end;
  3111. char *target;
  3112. size_t nlen = ZSTR_LEN(str), i;
  3113. char numtmp[4];
  3114. for (source = (char*)ZSTR_VAL(str), end = source + ZSTR_LEN(str), target = ZSTR_VAL(str); source < end; source++) {
  3115. if (*source == '\\' && source + 1 < end) {
  3116. source++;
  3117. switch (*source) {
  3118. case 'n': *target++='\n'; nlen--; break;
  3119. case 'r': *target++='\r'; nlen--; break;
  3120. case 'a': *target++='\a'; nlen--; break;
  3121. case 't': *target++='\t'; nlen--; break;
  3122. case 'v': *target++='\v'; nlen--; break;
  3123. case 'b': *target++='\b'; nlen--; break;
  3124. case 'f': *target++='\f'; nlen--; break;
  3125. case '\\': *target++='\\'; nlen--; break;
  3126. case 'x':
  3127. if (source+1 < end && isxdigit((int)(*(source+1)))) {
  3128. numtmp[0] = *++source;
  3129. if (source+1 < end && isxdigit((int)(*(source+1)))) {
  3130. numtmp[1] = *++source;
  3131. numtmp[2] = '\0';
  3132. nlen-=3;
  3133. } else {
  3134. numtmp[1] = '\0';
  3135. nlen-=2;
  3136. }
  3137. *target++=(char)strtol(numtmp, NULL, 16);
  3138. break;
  3139. }
  3140. ZEND_FALLTHROUGH;
  3141. default:
  3142. i=0;
  3143. while (source < end && *source >= '0' && *source <= '7' && i<3) {
  3144. numtmp[i++] = *source++;
  3145. }
  3146. if (i) {
  3147. numtmp[i]='\0';
  3148. *target++=(char)strtol(numtmp, NULL, 8);
  3149. nlen-=i;
  3150. source--;
  3151. } else {
  3152. *target++=*source;
  3153. nlen--;
  3154. }
  3155. }
  3156. } else {
  3157. *target++=*source;
  3158. }
  3159. }
  3160. if (nlen != 0) {
  3161. *target='\0';
  3162. }
  3163. ZSTR_LEN(str) = nlen;
  3164. }
  3165. /* }}} */
  3166. /* {{{ php_addcslashes_str */
  3167. PHPAPI zend_string *php_addcslashes_str(const char *str, size_t len, const char *what, size_t wlength)
  3168. {
  3169. char flags[256];
  3170. char *target;
  3171. const char *source, *end;
  3172. char c;
  3173. size_t newlen;
  3174. zend_string *new_str = zend_string_safe_alloc(4, len, 0, 0);
  3175. php_charmask((const unsigned char *) what, wlength, flags);
  3176. for (source = str, end = source + len, target = ZSTR_VAL(new_str); source < end; source++) {
  3177. c = *source;
  3178. if (flags[(unsigned char)c]) {
  3179. if ((unsigned char) c < 32 || (unsigned char) c > 126) {
  3180. *target++ = '\\';
  3181. switch (c) {
  3182. case '\n': *target++ = 'n'; break;
  3183. case '\t': *target++ = 't'; break;
  3184. case '\r': *target++ = 'r'; break;
  3185. case '\a': *target++ = 'a'; break;
  3186. case '\v': *target++ = 'v'; break;
  3187. case '\b': *target++ = 'b'; break;
  3188. case '\f': *target++ = 'f'; break;
  3189. default: target += sprintf(target, "%03o", (unsigned char) c);
  3190. }
  3191. continue;
  3192. }
  3193. *target++ = '\\';
  3194. }
  3195. *target++ = c;
  3196. }
  3197. *target = 0;
  3198. newlen = target - ZSTR_VAL(new_str);
  3199. if (newlen < len * 4) {
  3200. new_str = zend_string_truncate(new_str, newlen, 0);
  3201. }
  3202. return new_str;
  3203. }
  3204. /* }}} */
  3205. /* {{{ php_addcslashes */
  3206. PHPAPI zend_string *php_addcslashes(zend_string *str, const char *what, size_t wlength)
  3207. {
  3208. return php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), what, wlength);
  3209. }
  3210. /* }}} */
  3211. /* {{{ php_addslashes */
  3212. #if ZEND_INTRIN_SSE4_2_NATIVE
  3213. # include <nmmintrin.h>
  3214. # include "Zend/zend_bitset.h"
  3215. #elif ZEND_INTRIN_SSE4_2_RESOLVER
  3216. # include <nmmintrin.h>
  3217. # include "Zend/zend_bitset.h"
  3218. # include "Zend/zend_cpuinfo.h"
  3219. ZEND_INTRIN_SSE4_2_FUNC_DECL(zend_string *php_addslashes_sse42(zend_string *str));
  3220. zend_string *php_addslashes_default(zend_string *str);
  3221. ZEND_INTRIN_SSE4_2_FUNC_DECL(void php_stripslashes_sse42(zend_string *str));
  3222. void php_stripslashes_default(zend_string *str);
  3223. # if ZEND_INTRIN_SSE4_2_FUNC_PROTO
  3224. PHPAPI zend_string *php_addslashes(zend_string *str) __attribute__((ifunc("resolve_addslashes")));
  3225. PHPAPI void php_stripslashes(zend_string *str) __attribute__((ifunc("resolve_stripslashes")));
  3226. typedef zend_string *(*php_addslashes_func_t)(zend_string *);
  3227. typedef void (*php_stripslashes_func_t)(zend_string *);
  3228. ZEND_NO_SANITIZE_ADDRESS
  3229. ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
  3230. static php_addslashes_func_t resolve_addslashes(void) {
  3231. if (zend_cpu_supports_sse42()) {
  3232. return php_addslashes_sse42;
  3233. }
  3234. return php_addslashes_default;
  3235. }
  3236. ZEND_NO_SANITIZE_ADDRESS
  3237. ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
  3238. static php_stripslashes_func_t resolve_stripslashes(void) {
  3239. if (zend_cpu_supports_sse42()) {
  3240. return php_stripslashes_sse42;
  3241. }
  3242. return php_stripslashes_default;
  3243. }
  3244. # else /* ZEND_INTRIN_SSE4_2_FUNC_PTR */
  3245. static zend_string *(*php_addslashes_ptr)(zend_string *str) = NULL;
  3246. static void (*php_stripslashes_ptr)(zend_string *str) = NULL;
  3247. PHPAPI zend_string *php_addslashes(zend_string *str) {
  3248. return php_addslashes_ptr(str);
  3249. }
  3250. PHPAPI void php_stripslashes(zend_string *str) {
  3251. php_stripslashes_ptr(str);
  3252. }
  3253. /* {{{ PHP_MINIT_FUNCTION */
  3254. PHP_MINIT_FUNCTION(string_intrin)
  3255. {
  3256. if (zend_cpu_supports_sse42()) {
  3257. php_addslashes_ptr = php_addslashes_sse42;
  3258. php_stripslashes_ptr = php_stripslashes_sse42;
  3259. } else {
  3260. php_addslashes_ptr = php_addslashes_default;
  3261. php_stripslashes_ptr = php_stripslashes_default;
  3262. }
  3263. return SUCCESS;
  3264. }
  3265. /* }}} */
  3266. # endif
  3267. #endif
  3268. #if ZEND_INTRIN_SSE4_2_NATIVE || ZEND_INTRIN_SSE4_2_RESOLVER
  3269. # if ZEND_INTRIN_SSE4_2_NATIVE
  3270. PHPAPI zend_string *php_addslashes(zend_string *str) /* {{{ */
  3271. # elif ZEND_INTRIN_SSE4_2_RESOLVER
  3272. zend_string *php_addslashes_sse42(zend_string *str)
  3273. # endif
  3274. {
  3275. ZEND_SET_ALIGNED(16, static const char slashchars[16]) = "\'\"\\\0";
  3276. __m128i w128, s128;
  3277. uint32_t res = 0;
  3278. /* maximum string length, worst case situation */
  3279. char *target;
  3280. const char *source, *end;
  3281. size_t offset;
  3282. zend_string *new_str;
  3283. if (!str) {
  3284. return ZSTR_EMPTY_ALLOC();
  3285. }
  3286. source = ZSTR_VAL(str);
  3287. end = source + ZSTR_LEN(str);
  3288. if (ZSTR_LEN(str) > 15) {
  3289. w128 = _mm_load_si128((__m128i *)slashchars);
  3290. do {
  3291. s128 = _mm_loadu_si128((__m128i *)source);
  3292. res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
  3293. if (res) {
  3294. goto do_escape;
  3295. }
  3296. source += 16;
  3297. } while ((end - source) > 15);
  3298. }
  3299. while (source < end) {
  3300. switch (*source) {
  3301. case '\0':
  3302. case '\'':
  3303. case '\"':
  3304. case '\\':
  3305. goto do_escape;
  3306. default:
  3307. source++;
  3308. break;
  3309. }
  3310. }
  3311. return zend_string_copy(str);
  3312. do_escape:
  3313. offset = source - (char *)ZSTR_VAL(str);
  3314. new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
  3315. memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
  3316. target = ZSTR_VAL(new_str) + offset;
  3317. if (res) {
  3318. int pos = 0;
  3319. do {
  3320. int i, n = zend_ulong_ntz(res);
  3321. for (i = 0; i < n; i++) {
  3322. *target++ = source[pos + i];
  3323. }
  3324. pos += n;
  3325. *target++ = '\\';
  3326. if (source[pos] == '\0') {
  3327. *target++ = '0';
  3328. } else {
  3329. *target++ = source[pos];
  3330. }
  3331. pos++;
  3332. res = res >> (n + 1);
  3333. } while (res);
  3334. for (; pos < 16; pos++) {
  3335. *target++ = source[pos];
  3336. }
  3337. source += 16;
  3338. } else if (end - source > 15) {
  3339. w128 = _mm_load_si128((__m128i *)slashchars);
  3340. }
  3341. for (; end - source > 15; source += 16) {
  3342. int pos = 0;
  3343. s128 = _mm_loadu_si128((__m128i *)source);
  3344. res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
  3345. if (res) {
  3346. do {
  3347. int i, n = zend_ulong_ntz(res);
  3348. for (i = 0; i < n; i++) {
  3349. *target++ = source[pos + i];
  3350. }
  3351. pos += n;
  3352. *target++ = '\\';
  3353. if (source[pos] == '\0') {
  3354. *target++ = '0';
  3355. } else {
  3356. *target++ = source[pos];
  3357. }
  3358. pos++;
  3359. res = res >> (n + 1);
  3360. } while (res);
  3361. for (; pos < 16; pos++) {
  3362. *target++ = source[pos];
  3363. }
  3364. } else {
  3365. _mm_storeu_si128((__m128i*)target, s128);
  3366. target += 16;
  3367. }
  3368. }
  3369. while (source < end) {
  3370. switch (*source) {
  3371. case '\0':
  3372. *target++ = '\\';
  3373. *target++ = '0';
  3374. break;
  3375. case '\'':
  3376. case '\"':
  3377. case '\\':
  3378. *target++ = '\\';
  3379. ZEND_FALLTHROUGH;
  3380. default:
  3381. *target++ = *source;
  3382. break;
  3383. }
  3384. source++;
  3385. }
  3386. *target = '\0';
  3387. if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
  3388. new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
  3389. } else {
  3390. ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
  3391. }
  3392. return new_str;
  3393. }
  3394. /* }}} */
  3395. #endif
  3396. #ifdef __aarch64__
  3397. typedef union {
  3398. uint8_t mem[16];
  3399. uint64_t dw[2];
  3400. } quad_word;
  3401. static zend_always_inline quad_word aarch64_contains_slash_chars(uint8x16_t x) {
  3402. uint8x16_t s0 = vceqq_u8(x, vdupq_n_u8('\0'));
  3403. uint8x16_t s1 = vceqq_u8(x, vdupq_n_u8('\''));
  3404. uint8x16_t s2 = vceqq_u8(x, vdupq_n_u8('\"'));
  3405. uint8x16_t s3 = vceqq_u8(x, vdupq_n_u8('\\'));
  3406. uint8x16_t s01 = vorrq_u8(s0, s1);
  3407. uint8x16_t s23 = vorrq_u8(s2, s3);
  3408. uint8x16_t s0123 = vorrq_u8(s01, s23);
  3409. quad_word qw;
  3410. vst1q_u8(qw.mem, s0123);
  3411. return qw;
  3412. }
  3413. static zend_always_inline char *aarch64_add_slashes(quad_word res, const char *source, char *target)
  3414. {
  3415. int i = 0;
  3416. for (; i < 16; i++) {
  3417. char s = source[i];
  3418. if (res.mem[i] == 0)
  3419. *target++ = s;
  3420. else {
  3421. *target++ = '\\';
  3422. if (s == '\0')
  3423. *target++ = '0';
  3424. else
  3425. *target++ = s;
  3426. }
  3427. }
  3428. return target;
  3429. }
  3430. #endif /* __aarch64__ */
  3431. #if !ZEND_INTRIN_SSE4_2_NATIVE
  3432. # if ZEND_INTRIN_SSE4_2_RESOLVER
  3433. zend_string *php_addslashes_default(zend_string *str) /* {{{ */
  3434. # else
  3435. PHPAPI zend_string *php_addslashes(zend_string *str)
  3436. # endif
  3437. {
  3438. /* maximum string length, worst case situation */
  3439. char *target;
  3440. const char *source, *end;
  3441. size_t offset;
  3442. zend_string *new_str;
  3443. if (!str) {
  3444. return ZSTR_EMPTY_ALLOC();
  3445. }
  3446. source = ZSTR_VAL(str);
  3447. end = source + ZSTR_LEN(str);
  3448. # ifdef __aarch64__
  3449. quad_word res = {0};
  3450. if (ZSTR_LEN(str) > 15) {
  3451. do {
  3452. res = aarch64_contains_slash_chars(vld1q_u8((uint8_t *)source));
  3453. if (res.dw[0] | res.dw[1])
  3454. goto do_escape;
  3455. source += 16;
  3456. } while ((end - source) > 15);
  3457. }
  3458. /* Finish the last 15 bytes or less with the scalar loop. */
  3459. # endif /* __aarch64__ */
  3460. while (source < end) {
  3461. switch (*source) {
  3462. case '\0':
  3463. case '\'':
  3464. case '\"':
  3465. case '\\':
  3466. goto do_escape;
  3467. default:
  3468. source++;
  3469. break;
  3470. }
  3471. }
  3472. return zend_string_copy(str);
  3473. do_escape:
  3474. offset = source - (char *)ZSTR_VAL(str);
  3475. new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
  3476. memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
  3477. target = ZSTR_VAL(new_str) + offset;
  3478. # ifdef __aarch64__
  3479. if (res.dw[0] | res.dw[1]) {
  3480. target = aarch64_add_slashes(res, source, target);
  3481. source += 16;
  3482. }
  3483. for (; end - source > 15; source += 16) {
  3484. uint8x16_t x = vld1q_u8((uint8_t *)source);
  3485. res = aarch64_contains_slash_chars(x);
  3486. if (res.dw[0] | res.dw[1]) {
  3487. target = aarch64_add_slashes(res, source, target);
  3488. } else {
  3489. vst1q_u8((uint8_t*)target, x);
  3490. target += 16;
  3491. }
  3492. }
  3493. /* Finish the last 15 bytes or less with the scalar loop. */
  3494. # endif /* __aarch64__ */
  3495. while (source < end) {
  3496. switch (*source) {
  3497. case '\0':
  3498. *target++ = '\\';
  3499. *target++ = '0';
  3500. break;
  3501. case '\'':
  3502. case '\"':
  3503. case '\\':
  3504. *target++ = '\\';
  3505. ZEND_FALLTHROUGH;
  3506. default:
  3507. *target++ = *source;
  3508. break;
  3509. }
  3510. source++;
  3511. }
  3512. *target = '\0';
  3513. if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
  3514. new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
  3515. } else {
  3516. ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
  3517. }
  3518. return new_str;
  3519. }
  3520. #endif
  3521. /* }}} */
  3522. /* }}} */
  3523. /* {{{ php_stripslashes
  3524. *
  3525. * be careful, this edits the string in-place */
  3526. static zend_always_inline char *php_stripslashes_impl(const char *str, char *out, size_t len)
  3527. {
  3528. #ifdef __aarch64__
  3529. while (len > 15) {
  3530. uint8x16_t x = vld1q_u8((uint8_t *)str);
  3531. quad_word q;
  3532. vst1q_u8(q.mem, vceqq_u8(x, vdupq_n_u8('\\')));
  3533. if (q.dw[0] | q.dw[1]) {
  3534. int i = 0;
  3535. for (; i < 16; i++) {
  3536. if (q.mem[i] == 0) {
  3537. *out++ = str[i];
  3538. continue;
  3539. }
  3540. i++; /* skip the slash */
  3541. char s = str[i];
  3542. if (s == '0')
  3543. *out++ = '\0';
  3544. else
  3545. *out++ = s; /* preserve the next character */
  3546. }
  3547. str += i;
  3548. len -= i;
  3549. } else {
  3550. vst1q_u8((uint8_t*)out, x);
  3551. out += 16;
  3552. str += 16;
  3553. len -= 16;
  3554. }
  3555. }
  3556. /* Finish the last 15 bytes or less with the scalar loop. */
  3557. #endif /* __aarch64__ */
  3558. while (len > 0) {
  3559. if (*str == '\\') {
  3560. str++; /* skip the slash */
  3561. len--;
  3562. if (len > 0) {
  3563. if (*str == '0') {
  3564. *out++='\0';
  3565. str++;
  3566. } else {
  3567. *out++ = *str++; /* preserve the next character */
  3568. }
  3569. len--;
  3570. }
  3571. } else {
  3572. *out++ = *str++;
  3573. len--;
  3574. }
  3575. }
  3576. return out;
  3577. }
  3578. #if ZEND_INTRIN_SSE4_2_NATIVE || ZEND_INTRIN_SSE4_2_RESOLVER
  3579. # if ZEND_INTRIN_SSE4_2_NATIVE
  3580. PHPAPI void php_stripslashes(zend_string *str)
  3581. # elif ZEND_INTRIN_SSE4_2_RESOLVER
  3582. void php_stripslashes_sse42(zend_string *str)
  3583. # endif
  3584. {
  3585. const char *s = ZSTR_VAL(str);
  3586. char *t = ZSTR_VAL(str);
  3587. size_t l = ZSTR_LEN(str);
  3588. if (l > 15) {
  3589. const __m128i slash = _mm_set1_epi8('\\');
  3590. do {
  3591. __m128i in = _mm_loadu_si128((__m128i *)s);
  3592. __m128i any_slash = _mm_cmpeq_epi8(in, slash);
  3593. uint32_t res = _mm_movemask_epi8(any_slash);
  3594. if (res) {
  3595. int i, n = zend_ulong_ntz(res);
  3596. const char *e = s + 15;
  3597. l -= n;
  3598. for (i = 0; i < n; i++) {
  3599. *t++ = *s++;
  3600. }
  3601. for (; s < e; s++) {
  3602. if (*s == '\\') {
  3603. s++;
  3604. l--;
  3605. if (*s == '0') {
  3606. *t = '\0';
  3607. } else {
  3608. *t = *s;
  3609. }
  3610. } else {
  3611. *t = *s;
  3612. }
  3613. t++;
  3614. l--;
  3615. }
  3616. } else {
  3617. _mm_storeu_si128((__m128i *)t, in);
  3618. s += 16;
  3619. t += 16;
  3620. l -= 16;
  3621. }
  3622. } while (l > 15);
  3623. }
  3624. t = php_stripslashes_impl(s, t, l);
  3625. if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
  3626. ZSTR_LEN(str) = t - ZSTR_VAL(str);
  3627. ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
  3628. }
  3629. }
  3630. #endif
  3631. #if !ZEND_INTRIN_SSE4_2_NATIVE
  3632. # if ZEND_INTRIN_SSE4_2_RESOLVER
  3633. void php_stripslashes_default(zend_string *str) /* {{{ */
  3634. # else
  3635. PHPAPI void php_stripslashes(zend_string *str)
  3636. # endif
  3637. {
  3638. const char *t = php_stripslashes_impl(ZSTR_VAL(str), ZSTR_VAL(str), ZSTR_LEN(str));
  3639. if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
  3640. ZSTR_LEN(str) = t - ZSTR_VAL(str);
  3641. ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
  3642. }
  3643. }
  3644. /* }}} */
  3645. #endif
  3646. /* }}} */
  3647. #define _HEB_BLOCK_TYPE_ENG 1
  3648. #define _HEB_BLOCK_TYPE_HEB 2
  3649. #define isheb(c) (((((unsigned char) c) >= 224) && (((unsigned char) c) <= 250)) ? 1 : 0)
  3650. #define _isblank(c) (((((unsigned char) c) == ' ' || ((unsigned char) c) == '\t')) ? 1 : 0)
  3651. #define _isnewline(c) (((((unsigned char) c) == '\n' || ((unsigned char) c) == '\r')) ? 1 : 0)
  3652. /* {{{ php_str_replace_in_subject */
  3653. static zend_long php_str_replace_in_subject(
  3654. zend_string *search_str, HashTable *search_ht, zend_string *replace_str, HashTable *replace_ht,
  3655. zend_string *subject_str, zval *result, int case_sensitivity
  3656. ) {
  3657. zval *search_entry;
  3658. zend_string *tmp_result;
  3659. char *replace_value = NULL;
  3660. size_t replace_len = 0;
  3661. zend_long replace_count = 0;
  3662. zend_string *lc_subject_str = NULL;
  3663. uint32_t replace_idx;
  3664. if (ZSTR_LEN(subject_str) == 0) {
  3665. ZVAL_EMPTY_STRING(result);
  3666. return 0;
  3667. }
  3668. /* If search is an array */
  3669. if (search_ht) {
  3670. /* Duplicate subject string for repeated replacement */
  3671. zend_string_addref(subject_str);
  3672. if (replace_ht) {
  3673. replace_idx = 0;
  3674. } else {
  3675. /* Set replacement value to the passed one */
  3676. replace_value = ZSTR_VAL(replace_str);
  3677. replace_len = ZSTR_LEN(replace_str);
  3678. }
  3679. /* For each entry in the search array, get the entry */
  3680. ZEND_HASH_FOREACH_VAL(search_ht, search_entry) {
  3681. /* Make sure we're dealing with strings. */
  3682. zend_string *tmp_search_str;
  3683. zend_string *search_str = zval_get_tmp_string(search_entry, &tmp_search_str);
  3684. zend_string *replace_entry_str, *tmp_replace_entry_str = NULL;
  3685. /* If replace is an array. */
  3686. if (replace_ht) {
  3687. /* Get current entry */
  3688. zval *replace_entry = NULL;
  3689. while (replace_idx < replace_ht->nNumUsed) {
  3690. replace_entry = &replace_ht->arData[replace_idx].val;
  3691. if (Z_TYPE_P(replace_entry) != IS_UNDEF) {
  3692. break;
  3693. }
  3694. replace_idx++;
  3695. }
  3696. if (replace_idx < replace_ht->nNumUsed) {
  3697. /* Make sure we're dealing with strings. */
  3698. replace_entry_str = zval_get_tmp_string(replace_entry, &tmp_replace_entry_str);
  3699. /* Set replacement value to the one we got from array */
  3700. replace_value = ZSTR_VAL(replace_entry_str);
  3701. replace_len = ZSTR_LEN(replace_entry_str);
  3702. replace_idx++;
  3703. } else {
  3704. /* We've run out of replacement strings, so use an empty one. */
  3705. replace_value = "";
  3706. replace_len = 0;
  3707. }
  3708. }
  3709. if (ZSTR_LEN(search_str) == 1) {
  3710. zend_long old_replace_count = replace_count;
  3711. tmp_result = php_char_to_str_ex(subject_str,
  3712. ZSTR_VAL(search_str)[0],
  3713. replace_value,
  3714. replace_len,
  3715. case_sensitivity,
  3716. &replace_count);
  3717. if (lc_subject_str && replace_count != old_replace_count) {
  3718. zend_string_release_ex(lc_subject_str, 0);
  3719. lc_subject_str = NULL;
  3720. }
  3721. } else if (ZSTR_LEN(search_str) > 1) {
  3722. if (case_sensitivity) {
  3723. tmp_result = php_str_to_str_ex(subject_str,
  3724. ZSTR_VAL(search_str), ZSTR_LEN(search_str),
  3725. replace_value, replace_len, &replace_count);
  3726. } else {
  3727. zend_long old_replace_count = replace_count;
  3728. if (!lc_subject_str) {
  3729. lc_subject_str = php_string_tolower(subject_str);
  3730. }
  3731. tmp_result = php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
  3732. search_str, replace_value, replace_len, &replace_count);
  3733. if (replace_count != old_replace_count) {
  3734. zend_string_release_ex(lc_subject_str, 0);
  3735. lc_subject_str = NULL;
  3736. }
  3737. }
  3738. } else {
  3739. zend_tmp_string_release(tmp_search_str);
  3740. zend_tmp_string_release(tmp_replace_entry_str);
  3741. continue;
  3742. }
  3743. zend_tmp_string_release(tmp_search_str);
  3744. zend_tmp_string_release(tmp_replace_entry_str);
  3745. if (subject_str == tmp_result) {
  3746. zend_string_delref(subject_str);
  3747. } else {
  3748. zend_string_release_ex(subject_str, 0);
  3749. subject_str = tmp_result;
  3750. if (ZSTR_LEN(subject_str) == 0) {
  3751. zend_string_release_ex(subject_str, 0);
  3752. ZVAL_EMPTY_STRING(result);
  3753. if (lc_subject_str) {
  3754. zend_string_release_ex(lc_subject_str, 0);
  3755. }
  3756. return replace_count;
  3757. }
  3758. }
  3759. } ZEND_HASH_FOREACH_END();
  3760. ZVAL_STR(result, subject_str);
  3761. if (lc_subject_str) {
  3762. zend_string_release_ex(lc_subject_str, 0);
  3763. }
  3764. } else {
  3765. ZEND_ASSERT(search_str);
  3766. if (ZSTR_LEN(search_str) == 1) {
  3767. ZVAL_STR(result,
  3768. php_char_to_str_ex(subject_str,
  3769. ZSTR_VAL(search_str)[0],
  3770. ZSTR_VAL(replace_str),
  3771. ZSTR_LEN(replace_str),
  3772. case_sensitivity,
  3773. &replace_count));
  3774. } else if (ZSTR_LEN(search_str) > 1) {
  3775. if (case_sensitivity) {
  3776. ZVAL_STR(result, php_str_to_str_ex(subject_str,
  3777. ZSTR_VAL(search_str), ZSTR_LEN(search_str),
  3778. ZSTR_VAL(replace_str), ZSTR_LEN(replace_str), &replace_count));
  3779. } else {
  3780. lc_subject_str = php_string_tolower(subject_str);
  3781. ZVAL_STR(result, php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
  3782. search_str, ZSTR_VAL(replace_str), ZSTR_LEN(replace_str), &replace_count));
  3783. zend_string_release_ex(lc_subject_str, 0);
  3784. }
  3785. } else {
  3786. ZVAL_STR_COPY(result, subject_str);
  3787. }
  3788. }
  3789. return replace_count;
  3790. }
  3791. /* }}} */
  3792. /* {{{ php_str_replace_common */
  3793. static void php_str_replace_common(INTERNAL_FUNCTION_PARAMETERS, int case_sensitivity)
  3794. {
  3795. zend_string *search_str;
  3796. HashTable *search_ht;
  3797. zend_string *replace_str;
  3798. HashTable *replace_ht;
  3799. zend_string *subject_str;
  3800. HashTable *subject_ht;
  3801. zval *subject_entry, *zcount = NULL;
  3802. zval result;
  3803. zend_string *string_key;
  3804. zend_ulong num_key;
  3805. zend_long count = 0;
  3806. ZEND_PARSE_PARAMETERS_START(3, 4)
  3807. Z_PARAM_ARRAY_HT_OR_STR(search_ht, search_str)
  3808. Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
  3809. Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
  3810. Z_PARAM_OPTIONAL
  3811. Z_PARAM_ZVAL(zcount)
  3812. ZEND_PARSE_PARAMETERS_END();
  3813. /* Make sure we're dealing with strings and do the replacement. */
  3814. if (search_str && replace_ht) {
  3815. zend_argument_type_error(2, "must be of type %s when argument #1 ($search) is %s",
  3816. search_str ? "string" : "array", search_str ? "a string" : "an array"
  3817. );
  3818. RETURN_THROWS();
  3819. }
  3820. /* if subject is an array */
  3821. if (subject_ht) {
  3822. array_init(return_value);
  3823. /* For each subject entry, convert it to string, then perform replacement
  3824. and add the result to the return_value array. */
  3825. ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
  3826. zend_string *tmp_subject_str;
  3827. ZVAL_DEREF(subject_entry);
  3828. subject_str = zval_get_tmp_string(subject_entry, &tmp_subject_str);
  3829. count += php_str_replace_in_subject(search_str, search_ht, replace_str, replace_ht, subject_str, &result, case_sensitivity);
  3830. zend_tmp_string_release(tmp_subject_str);
  3831. /* Add to return array */
  3832. if (string_key) {
  3833. zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &result);
  3834. } else {
  3835. zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &result);
  3836. }
  3837. } ZEND_HASH_FOREACH_END();
  3838. } else { /* if subject is not an array */
  3839. count = php_str_replace_in_subject(search_str, search_ht, replace_str, replace_ht, subject_str, return_value, case_sensitivity);
  3840. }
  3841. if (zcount) {
  3842. ZEND_TRY_ASSIGN_REF_LONG(zcount, count);
  3843. }
  3844. }
  3845. /* }}} */
  3846. /* {{{ Replaces all occurrences of search in haystack with replace */
  3847. PHP_FUNCTION(str_replace)
  3848. {
  3849. php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  3850. }
  3851. /* }}} */
  3852. /* {{{ Replaces all occurrences of search in haystack with replace / case-insensitive */
  3853. PHP_FUNCTION(str_ireplace)
  3854. {
  3855. php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  3856. }
  3857. /* }}} */
  3858. /* {{{ Converts logical Hebrew text to visual text */
  3859. PHP_FUNCTION(hebrev)
  3860. {
  3861. char *str, *heb_str, *target;
  3862. const char *tmp;
  3863. size_t block_start, block_end, block_type, block_length, i;
  3864. zend_long max_chars=0, char_count;
  3865. size_t begin, end, orig_begin;
  3866. size_t str_len;
  3867. zend_string *broken_str;
  3868. ZEND_PARSE_PARAMETERS_START(1, 2)
  3869. Z_PARAM_STRING(str, str_len)
  3870. Z_PARAM_OPTIONAL
  3871. Z_PARAM_LONG(max_chars)
  3872. ZEND_PARSE_PARAMETERS_END();
  3873. if (str_len == 0) {
  3874. RETURN_EMPTY_STRING();
  3875. }
  3876. tmp = str;
  3877. block_start=block_end=0;
  3878. heb_str = (char *) emalloc(str_len+1);
  3879. target = heb_str+str_len;
  3880. *target = 0;
  3881. target--;
  3882. block_length=0;
  3883. if (isheb(*tmp)) {
  3884. block_type = _HEB_BLOCK_TYPE_HEB;
  3885. } else {
  3886. block_type = _HEB_BLOCK_TYPE_ENG;
  3887. }
  3888. do {
  3889. if (block_type == _HEB_BLOCK_TYPE_HEB) {
  3890. while ((isheb((int)*(tmp+1)) || _isblank((int)*(tmp+1)) || ispunct((int)*(tmp+1)) || (int)*(tmp+1)=='\n' ) && block_end<str_len-1) {
  3891. tmp++;
  3892. block_end++;
  3893. block_length++;
  3894. }
  3895. for (i = block_start+1; i<= block_end+1; i++) {
  3896. *target = str[i-1];
  3897. switch (*target) {
  3898. case '(':
  3899. *target = ')';
  3900. break;
  3901. case ')':
  3902. *target = '(';
  3903. break;
  3904. case '[':
  3905. *target = ']';
  3906. break;
  3907. case ']':
  3908. *target = '[';
  3909. break;
  3910. case '{':
  3911. *target = '}';
  3912. break;
  3913. case '}':
  3914. *target = '{';
  3915. break;
  3916. case '<':
  3917. *target = '>';
  3918. break;
  3919. case '>':
  3920. *target = '<';
  3921. break;
  3922. case '\\':
  3923. *target = '/';
  3924. break;
  3925. case '/':
  3926. *target = '\\';
  3927. break;
  3928. default:
  3929. break;
  3930. }
  3931. target--;
  3932. }
  3933. block_type = _HEB_BLOCK_TYPE_ENG;
  3934. } else {
  3935. while (!isheb(*(tmp+1)) && (int)*(tmp+1)!='\n' && block_end < str_len-1) {
  3936. tmp++;
  3937. block_end++;
  3938. block_length++;
  3939. }
  3940. while ((_isblank((int)*tmp) || ispunct((int)*tmp)) && *tmp!='/' && *tmp!='-' && block_end > block_start) {
  3941. tmp--;
  3942. block_end--;
  3943. }
  3944. for (i = block_end+1; i >= block_start+1; i--) {
  3945. *target = str[i-1];
  3946. target--;
  3947. }
  3948. block_type = _HEB_BLOCK_TYPE_HEB;
  3949. }
  3950. block_start=block_end+1;
  3951. } while (block_end < str_len-1);
  3952. broken_str = zend_string_alloc(str_len, 0);
  3953. begin = end = str_len-1;
  3954. target = ZSTR_VAL(broken_str);
  3955. while (1) {
  3956. char_count=0;
  3957. while ((!max_chars || (max_chars > 0 && char_count < max_chars)) && begin > 0) {
  3958. char_count++;
  3959. begin--;
  3960. if (_isnewline(heb_str[begin])) {
  3961. while (begin > 0 && _isnewline(heb_str[begin-1])) {
  3962. begin--;
  3963. char_count++;
  3964. }
  3965. break;
  3966. }
  3967. }
  3968. if (max_chars >= 0 && char_count == max_chars) { /* try to avoid breaking words */
  3969. size_t new_char_count=char_count, new_begin=begin;
  3970. while (new_char_count > 0) {
  3971. if (_isblank(heb_str[new_begin]) || _isnewline(heb_str[new_begin])) {
  3972. break;
  3973. }
  3974. new_begin++;
  3975. new_char_count--;
  3976. }
  3977. if (new_char_count > 0) {
  3978. begin=new_begin;
  3979. }
  3980. }
  3981. orig_begin=begin;
  3982. if (_isblank(heb_str[begin])) {
  3983. heb_str[begin]='\n';
  3984. }
  3985. while (begin <= end && _isnewline(heb_str[begin])) { /* skip leading newlines */
  3986. begin++;
  3987. }
  3988. for (i = begin; i <= end; i++) { /* copy content */
  3989. *target = heb_str[i];
  3990. target++;
  3991. }
  3992. for (i = orig_begin; i <= end && _isnewline(heb_str[i]); i++) {
  3993. *target = heb_str[i];
  3994. target++;
  3995. }
  3996. begin=orig_begin;
  3997. if (begin == 0) {
  3998. *target = 0;
  3999. break;
  4000. }
  4001. begin--;
  4002. end=begin;
  4003. }
  4004. efree(heb_str);
  4005. RETURN_NEW_STR(broken_str);
  4006. }
  4007. /* }}} */
  4008. /* {{{ Converts newlines to HTML line breaks */
  4009. PHP_FUNCTION(nl2br)
  4010. {
  4011. /* in brief this inserts <br /> or <br> before matched regexp \n\r?|\r\n? */
  4012. const char *tmp, *end;
  4013. zend_string *str;
  4014. char *target;
  4015. size_t repl_cnt = 0;
  4016. bool is_xhtml = 1;
  4017. zend_string *result;
  4018. ZEND_PARSE_PARAMETERS_START(1, 2)
  4019. Z_PARAM_STR(str)
  4020. Z_PARAM_OPTIONAL
  4021. Z_PARAM_BOOL(is_xhtml)
  4022. ZEND_PARSE_PARAMETERS_END();
  4023. tmp = ZSTR_VAL(str);
  4024. end = ZSTR_VAL(str) + ZSTR_LEN(str);
  4025. /* it is really faster to scan twice and allocate mem once instead of scanning once
  4026. and constantly reallocing */
  4027. while (tmp < end) {
  4028. if (*tmp == '\r') {
  4029. if (*(tmp+1) == '\n') {
  4030. tmp++;
  4031. }
  4032. repl_cnt++;
  4033. } else if (*tmp == '\n') {
  4034. if (*(tmp+1) == '\r') {
  4035. tmp++;
  4036. }
  4037. repl_cnt++;
  4038. }
  4039. tmp++;
  4040. }
  4041. if (repl_cnt == 0) {
  4042. RETURN_STR_COPY(str);
  4043. }
  4044. {
  4045. size_t repl_len = is_xhtml ? (sizeof("<br />") - 1) : (sizeof("<br>") - 1);
  4046. result = zend_string_safe_alloc(repl_cnt, repl_len, ZSTR_LEN(str), 0);
  4047. target = ZSTR_VAL(result);
  4048. }
  4049. tmp = ZSTR_VAL(str);
  4050. while (tmp < end) {
  4051. switch (*tmp) {
  4052. case '\r':
  4053. case '\n':
  4054. *target++ = '<';
  4055. *target++ = 'b';
  4056. *target++ = 'r';
  4057. if (is_xhtml) {
  4058. *target++ = ' ';
  4059. *target++ = '/';
  4060. }
  4061. *target++ = '>';
  4062. if ((*tmp == '\r' && *(tmp+1) == '\n') || (*tmp == '\n' && *(tmp+1) == '\r')) {
  4063. *target++ = *tmp++;
  4064. }
  4065. ZEND_FALLTHROUGH;
  4066. default:
  4067. *target++ = *tmp;
  4068. }
  4069. tmp++;
  4070. }
  4071. *target = '\0';
  4072. RETURN_NEW_STR(result);
  4073. }
  4074. /* }}} */
  4075. /* {{{ Strips HTML and PHP tags from a string */
  4076. PHP_FUNCTION(strip_tags)
  4077. {
  4078. zend_string *buf;
  4079. zend_string *str;
  4080. zend_string *allow_str = NULL;
  4081. HashTable *allow_ht = NULL;
  4082. const char *allowed_tags=NULL;
  4083. size_t allowed_tags_len=0;
  4084. smart_str tags_ss = {0};
  4085. ZEND_PARSE_PARAMETERS_START(1, 2)
  4086. Z_PARAM_STR(str)
  4087. Z_PARAM_OPTIONAL
  4088. Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(allow_ht, allow_str)
  4089. ZEND_PARSE_PARAMETERS_END();
  4090. if (allow_ht) {
  4091. zval *tmp;
  4092. zend_string *tag;
  4093. ZEND_HASH_FOREACH_VAL(allow_ht, tmp) {
  4094. tag = zval_get_string(tmp);
  4095. smart_str_appendc(&tags_ss, '<');
  4096. smart_str_append(&tags_ss, tag);
  4097. smart_str_appendc(&tags_ss, '>');
  4098. zend_string_release(tag);
  4099. } ZEND_HASH_FOREACH_END();
  4100. if (tags_ss.s) {
  4101. smart_str_0(&tags_ss);
  4102. allowed_tags = ZSTR_VAL(tags_ss.s);
  4103. allowed_tags_len = ZSTR_LEN(tags_ss.s);
  4104. }
  4105. } else if (allow_str) {
  4106. allowed_tags = ZSTR_VAL(allow_str);
  4107. allowed_tags_len = ZSTR_LEN(allow_str);
  4108. }
  4109. buf = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
  4110. ZSTR_LEN(buf) = php_strip_tags_ex(ZSTR_VAL(buf), ZSTR_LEN(str), allowed_tags, allowed_tags_len, 0);
  4111. smart_str_free(&tags_ss);
  4112. RETURN_NEW_STR(buf);
  4113. }
  4114. /* }}} */
  4115. static zend_string *try_setlocale_str(zend_long cat, zend_string *loc) {
  4116. const char *retval;
  4117. if (zend_string_equals_literal(loc, "0")) {
  4118. loc = NULL;
  4119. } else {
  4120. if (ZSTR_LEN(loc) >= 255) {
  4121. php_error_docref(NULL, E_WARNING, "Specified locale name is too long");
  4122. return NULL;
  4123. }
  4124. }
  4125. # ifndef PHP_WIN32
  4126. retval = setlocale(cat, loc ? ZSTR_VAL(loc) : NULL);
  4127. # else
  4128. if (loc) {
  4129. /* BC: don't try /^[a-z]{2}_[A-Z]{2}($|\..*)/ except for /^u[ks]_U[KS]$/ */
  4130. char *locp = ZSTR_VAL(loc);
  4131. if (ZSTR_LEN(loc) >= 5 && locp[2] == '_'
  4132. && locp[0] >= 'a' && locp[0] <= 'z' && locp[1] >= 'a' && locp[1] <= 'z'
  4133. && locp[3] >= 'A' && locp[3] <= 'Z' && locp[4] >= 'A' && locp[4] <= 'Z'
  4134. && (locp[5] == '\0' || locp[5] == '.')
  4135. && !(locp[0] == 'u' && (locp[1] == 'k' || locp[1] == 's')
  4136. && locp[3] == 'U' && (locp[4] == 'K' || locp[4] == 'S')
  4137. && locp[5] == '\0')
  4138. ) {
  4139. retval = NULL;
  4140. } else {
  4141. retval = setlocale(cat, ZSTR_VAL(loc));
  4142. }
  4143. } else {
  4144. retval = setlocale(cat, NULL);
  4145. }
  4146. # endif
  4147. if (!retval) {
  4148. return NULL;
  4149. }
  4150. if (loc) {
  4151. /* Remember if locale was changed */
  4152. size_t len = strlen(retval);
  4153. BG(locale_changed) = 1;
  4154. if (cat == LC_CTYPE || cat == LC_ALL) {
  4155. zend_update_current_locale();
  4156. if (BG(ctype_string)) {
  4157. zend_string_release_ex(BG(ctype_string), 0);
  4158. }
  4159. if (len == 1 && *retval == 'C') {
  4160. /* C locale is represented as NULL. */
  4161. BG(ctype_string) = NULL;
  4162. return ZSTR_CHAR('C');
  4163. } else if (len == ZSTR_LEN(loc) && !memcmp(ZSTR_VAL(loc), retval, len)) {
  4164. BG(ctype_string) = zend_string_copy(loc);
  4165. return zend_string_copy(BG(ctype_string));
  4166. } else {
  4167. BG(ctype_string) = zend_string_init(retval, len, 0);
  4168. return zend_string_copy(BG(ctype_string));
  4169. }
  4170. } else if (len == ZSTR_LEN(loc) && !memcmp(ZSTR_VAL(loc), retval, len)) {
  4171. return zend_string_copy(loc);
  4172. }
  4173. }
  4174. return zend_string_init(retval, strlen(retval), 0);
  4175. }
  4176. static zend_string *try_setlocale_zval(zend_long cat, zval *loc_zv) {
  4177. zend_string *tmp_loc_str;
  4178. zend_string *loc_str = zval_try_get_tmp_string(loc_zv, &tmp_loc_str);
  4179. if (UNEXPECTED(loc_str == NULL)) {
  4180. return NULL;
  4181. }
  4182. zend_string *result = try_setlocale_str(cat, loc_str);
  4183. zend_tmp_string_release(tmp_loc_str);
  4184. return result;
  4185. }
  4186. /* {{{ Set locale information */
  4187. PHP_FUNCTION(setlocale)
  4188. {
  4189. zend_long cat;
  4190. zval *args = NULL;
  4191. int num_args;
  4192. ZEND_PARSE_PARAMETERS_START(2, -1)
  4193. Z_PARAM_LONG(cat)
  4194. Z_PARAM_VARIADIC('+', args, num_args)
  4195. ZEND_PARSE_PARAMETERS_END();
  4196. for (uint32_t i = 0; i < num_args; i++) {
  4197. if (Z_TYPE(args[i]) == IS_ARRAY) {
  4198. zval *elem;
  4199. ZEND_HASH_FOREACH_VAL(Z_ARRVAL(args[i]), elem) {
  4200. zend_string *result = try_setlocale_zval(cat, elem);
  4201. if (EG(exception)) {
  4202. RETURN_THROWS();
  4203. }
  4204. if (result) {
  4205. RETURN_STR(result);
  4206. }
  4207. } ZEND_HASH_FOREACH_END();
  4208. } else {
  4209. zend_string *result = try_setlocale_zval(cat, &args[i]);
  4210. if (EG(exception)) {
  4211. RETURN_THROWS();
  4212. }
  4213. if (result) {
  4214. RETURN_STR(result);
  4215. }
  4216. }
  4217. }
  4218. RETURN_FALSE;
  4219. }
  4220. /* }}} */
  4221. /* {{{ Parses GET/POST/COOKIE data and sets global variables */
  4222. PHP_FUNCTION(parse_str)
  4223. {
  4224. char *arg;
  4225. zval *arrayArg = NULL;
  4226. char *res = NULL;
  4227. size_t arglen;
  4228. ZEND_PARSE_PARAMETERS_START(2, 2)
  4229. Z_PARAM_STRING(arg, arglen)
  4230. Z_PARAM_ZVAL(arrayArg)
  4231. ZEND_PARSE_PARAMETERS_END();
  4232. arrayArg = zend_try_array_init(arrayArg);
  4233. if (!arrayArg) {
  4234. RETURN_THROWS();
  4235. }
  4236. res = estrndup(arg, arglen);
  4237. sapi_module.treat_data(PARSE_STRING, res, arrayArg);
  4238. }
  4239. /* }}} */
  4240. #define PHP_TAG_BUF_SIZE 1023
  4241. /* {{{ php_tag_find
  4242. *
  4243. * Check if tag is in a set of tags
  4244. *
  4245. * states:
  4246. *
  4247. * 0 start tag
  4248. * 1 first non-whitespace char seen
  4249. */
  4250. int php_tag_find(char *tag, size_t len, const char *set) {
  4251. char c, *n;
  4252. const char *t;
  4253. int state=0, done=0;
  4254. char *norm;
  4255. if (len == 0) {
  4256. return 0;
  4257. }
  4258. norm = emalloc(len+1);
  4259. n = norm;
  4260. t = tag;
  4261. c = tolower(*t);
  4262. /*
  4263. normalize the tag removing leading and trailing whitespace
  4264. and turn any <a whatever...> into just <a> and any </tag>
  4265. into <tag>
  4266. */
  4267. while (!done) {
  4268. switch (c) {
  4269. case '<':
  4270. *(n++) = c;
  4271. break;
  4272. case '>':
  4273. done =1;
  4274. break;
  4275. default:
  4276. if (!isspace((int)c)) {
  4277. if (state == 0) {
  4278. state=1;
  4279. }
  4280. if (c != '/' || (*(t-1) != '<' && *(t+1) != '>')) {
  4281. *(n++) = c;
  4282. }
  4283. } else {
  4284. if (state == 1)
  4285. done=1;
  4286. }
  4287. break;
  4288. }
  4289. c = tolower(*(++t));
  4290. }
  4291. *(n++) = '>';
  4292. *n = '\0';
  4293. if (strstr(set, norm)) {
  4294. done=1;
  4295. } else {
  4296. done=0;
  4297. }
  4298. efree(norm);
  4299. return done;
  4300. }
  4301. /* }}} */
  4302. PHPAPI size_t php_strip_tags(char *rbuf, size_t len, const char *allow, size_t allow_len) /* {{{ */
  4303. {
  4304. return php_strip_tags_ex(rbuf, len, allow, allow_len, 0);
  4305. }
  4306. /* }}} */
  4307. /* {{{ php_strip_tags
  4308. A simple little state-machine to strip out html and php tags
  4309. State 0 is the output state, State 1 means we are inside a
  4310. normal html tag and state 2 means we are inside a php tag.
  4311. The state variable is passed in to allow a function like fgetss
  4312. to maintain state across calls to the function.
  4313. lc holds the last significant character read and br is a bracket
  4314. counter.
  4315. When an allow string is passed in we keep track of the string
  4316. in state 1 and when the tag is closed check it against the
  4317. allow string to see if we should allow it.
  4318. swm: Added ability to strip <?xml tags without assuming it PHP
  4319. code.
  4320. */
  4321. PHPAPI size_t php_strip_tags_ex(char *rbuf, size_t len, const char *allow, size_t allow_len, bool allow_tag_spaces)
  4322. {
  4323. char *tbuf, *tp, *rp, c, lc;
  4324. const char *buf, *p, *end;
  4325. int br, depth=0, in_q = 0;
  4326. uint8_t state = 0;
  4327. size_t pos;
  4328. char *allow_free = NULL;
  4329. char is_xml = 0;
  4330. buf = estrndup(rbuf, len);
  4331. end = buf + len;
  4332. lc = '\0';
  4333. p = buf;
  4334. rp = rbuf;
  4335. br = 0;
  4336. if (allow) {
  4337. allow_free = zend_str_tolower_dup_ex(allow, allow_len);
  4338. allow = allow_free ? allow_free : allow;
  4339. tbuf = emalloc(PHP_TAG_BUF_SIZE + 1);
  4340. tp = tbuf;
  4341. } else {
  4342. tbuf = tp = NULL;
  4343. }
  4344. state_0:
  4345. if (p >= end) {
  4346. goto finish;
  4347. }
  4348. c = *p;
  4349. switch (c) {
  4350. case '\0':
  4351. break;
  4352. case '<':
  4353. if (in_q) {
  4354. break;
  4355. }
  4356. if (isspace(*(p + 1)) && !allow_tag_spaces) {
  4357. *(rp++) = c;
  4358. break;
  4359. }
  4360. lc = '<';
  4361. state = 1;
  4362. if (allow) {
  4363. if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
  4364. pos = tp - tbuf;
  4365. tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
  4366. tp = tbuf + pos;
  4367. }
  4368. *(tp++) = '<';
  4369. }
  4370. p++;
  4371. goto state_1;
  4372. case '>':
  4373. if (depth) {
  4374. depth--;
  4375. break;
  4376. }
  4377. if (in_q) {
  4378. break;
  4379. }
  4380. *(rp++) = c;
  4381. break;
  4382. default:
  4383. *(rp++) = c;
  4384. break;
  4385. }
  4386. p++;
  4387. goto state_0;
  4388. state_1:
  4389. if (p >= end) {
  4390. goto finish;
  4391. }
  4392. c = *p;
  4393. switch (c) {
  4394. case '\0':
  4395. break;
  4396. case '<':
  4397. if (in_q) {
  4398. break;
  4399. }
  4400. if (isspace(*(p + 1)) && !allow_tag_spaces) {
  4401. goto reg_char_1;
  4402. }
  4403. depth++;
  4404. break;
  4405. case '>':
  4406. if (depth) {
  4407. depth--;
  4408. break;
  4409. }
  4410. if (in_q) {
  4411. break;
  4412. }
  4413. lc = '>';
  4414. if (is_xml && p >= buf + 1 && *(p -1) == '-') {
  4415. break;
  4416. }
  4417. in_q = state = is_xml = 0;
  4418. if (allow) {
  4419. if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
  4420. pos = tp - tbuf;
  4421. tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
  4422. tp = tbuf + pos;
  4423. }
  4424. *(tp++) = '>';
  4425. *tp='\0';
  4426. if (php_tag_find(tbuf, tp-tbuf, allow)) {
  4427. memcpy(rp, tbuf, tp-tbuf);
  4428. rp += tp-tbuf;
  4429. }
  4430. tp = tbuf;
  4431. }
  4432. p++;
  4433. goto state_0;
  4434. case '"':
  4435. case '\'':
  4436. if (p != buf && (!in_q || *p == in_q)) {
  4437. if (in_q) {
  4438. in_q = 0;
  4439. } else {
  4440. in_q = *p;
  4441. }
  4442. }
  4443. goto reg_char_1;
  4444. case '!':
  4445. /* JavaScript & Other HTML scripting languages */
  4446. if (p >= buf + 1 && *(p-1) == '<') {
  4447. state = 3;
  4448. lc = c;
  4449. p++;
  4450. goto state_3;
  4451. } else {
  4452. goto reg_char_1;
  4453. }
  4454. break;
  4455. case '?':
  4456. if (p >= buf + 1 && *(p-1) == '<') {
  4457. br=0;
  4458. state = 2;
  4459. p++;
  4460. goto state_2;
  4461. } else {
  4462. goto reg_char_1;
  4463. }
  4464. break;
  4465. default:
  4466. reg_char_1:
  4467. if (allow) {
  4468. if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
  4469. pos = tp - tbuf;
  4470. tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
  4471. tp = tbuf + pos;
  4472. }
  4473. *(tp++) = c;
  4474. }
  4475. break;
  4476. }
  4477. p++;
  4478. goto state_1;
  4479. state_2:
  4480. if (p >= end) {
  4481. goto finish;
  4482. }
  4483. c = *p;
  4484. switch (c) {
  4485. case '(':
  4486. if (lc != '"' && lc != '\'') {
  4487. lc = '(';
  4488. br++;
  4489. }
  4490. break;
  4491. case ')':
  4492. if (lc != '"' && lc != '\'') {
  4493. lc = ')';
  4494. br--;
  4495. }
  4496. break;
  4497. case '>':
  4498. if (depth) {
  4499. depth--;
  4500. break;
  4501. }
  4502. if (in_q) {
  4503. break;
  4504. }
  4505. if (!br && p >= buf + 1 && lc != '\"' && *(p-1) == '?') {
  4506. in_q = state = 0;
  4507. tp = tbuf;
  4508. p++;
  4509. goto state_0;
  4510. }
  4511. break;
  4512. case '"':
  4513. case '\'':
  4514. if (p >= buf + 1 && *(p-1) != '\\') {
  4515. if (lc == c) {
  4516. lc = '\0';
  4517. } else if (lc != '\\') {
  4518. lc = c;
  4519. }
  4520. if (p != buf && (!in_q || *p == in_q)) {
  4521. if (in_q) {
  4522. in_q = 0;
  4523. } else {
  4524. in_q = *p;
  4525. }
  4526. }
  4527. }
  4528. break;
  4529. case 'l':
  4530. case 'L':
  4531. /* swm: If we encounter '<?xml' then we shouldn't be in
  4532. * state == 2 (PHP). Switch back to HTML.
  4533. */
  4534. if (state == 2 && p > buf+4
  4535. && (*(p-1) == 'm' || *(p-1) == 'M')
  4536. && (*(p-2) == 'x' || *(p-2) == 'X')
  4537. && *(p-3) == '?'
  4538. && *(p-4) == '<') {
  4539. state = 1; is_xml=1;
  4540. p++;
  4541. goto state_1;
  4542. }
  4543. break;
  4544. default:
  4545. break;
  4546. }
  4547. p++;
  4548. goto state_2;
  4549. state_3:
  4550. if (p >= end) {
  4551. goto finish;
  4552. }
  4553. c = *p;
  4554. switch (c) {
  4555. case '>':
  4556. if (depth) {
  4557. depth--;
  4558. break;
  4559. }
  4560. if (in_q) {
  4561. break;
  4562. }
  4563. in_q = state = 0;
  4564. tp = tbuf;
  4565. p++;
  4566. goto state_0;
  4567. case '"':
  4568. case '\'':
  4569. if (p != buf && *(p-1) != '\\' && (!in_q || *p == in_q)) {
  4570. if (in_q) {
  4571. in_q = 0;
  4572. } else {
  4573. in_q = *p;
  4574. }
  4575. }
  4576. break;
  4577. case '-':
  4578. if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '!') {
  4579. state = 4;
  4580. p++;
  4581. goto state_4;
  4582. }
  4583. break;
  4584. case 'E':
  4585. case 'e':
  4586. /* !DOCTYPE exception */
  4587. if (p > buf+6
  4588. && (*(p-1) == 'p' || *(p-1) == 'P')
  4589. && (*(p-2) == 'y' || *(p-2) == 'Y')
  4590. && (*(p-3) == 't' || *(p-3) == 'T')
  4591. && (*(p-4) == 'c' || *(p-4) == 'C')
  4592. && (*(p-5) == 'o' || *(p-5) == 'O')
  4593. && (*(p-6) == 'd' || *(p-6) == 'D')) {
  4594. state = 1;
  4595. p++;
  4596. goto state_1;
  4597. }
  4598. break;
  4599. default:
  4600. break;
  4601. }
  4602. p++;
  4603. goto state_3;
  4604. state_4:
  4605. while (p < end) {
  4606. c = *p;
  4607. if (c == '>' && !in_q) {
  4608. if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '-') {
  4609. in_q = state = 0;
  4610. tp = tbuf;
  4611. p++;
  4612. goto state_0;
  4613. }
  4614. }
  4615. p++;
  4616. }
  4617. finish:
  4618. if (rp < rbuf + len) {
  4619. *rp = '\0';
  4620. }
  4621. efree((void *)buf);
  4622. if (tbuf) {
  4623. efree(tbuf);
  4624. }
  4625. if (allow_free) {
  4626. efree(allow_free);
  4627. }
  4628. return (size_t)(rp - rbuf);
  4629. }
  4630. /* }}} */
  4631. /* {{{ Parse a CSV string into an array */
  4632. PHP_FUNCTION(str_getcsv)
  4633. {
  4634. zend_string *str;
  4635. char delim = ',', enc = '"';
  4636. int esc = (unsigned char) '\\';
  4637. char *delim_str = NULL, *enc_str = NULL, *esc_str = NULL;
  4638. size_t delim_len = 0, enc_len = 0, esc_len = 0;
  4639. ZEND_PARSE_PARAMETERS_START(1, 4)
  4640. Z_PARAM_STR(str)
  4641. Z_PARAM_OPTIONAL
  4642. Z_PARAM_STRING(delim_str, delim_len)
  4643. Z_PARAM_STRING(enc_str, enc_len)
  4644. Z_PARAM_STRING(esc_str, esc_len)
  4645. ZEND_PARSE_PARAMETERS_END();
  4646. delim = delim_len ? delim_str[0] : delim;
  4647. enc = enc_len ? enc_str[0] : enc;
  4648. if (esc_str != NULL) {
  4649. esc = esc_len ? (unsigned char) esc_str[0] : PHP_CSV_NO_ESCAPE;
  4650. }
  4651. php_fgetcsv(NULL, delim, enc, esc, ZSTR_LEN(str), ZSTR_VAL(str), return_value);
  4652. }
  4653. /* }}} */
  4654. /* {{{ Returns the input string repeat mult times */
  4655. PHP_FUNCTION(str_repeat)
  4656. {
  4657. zend_string *input_str; /* Input string */
  4658. zend_long mult; /* Multiplier */
  4659. zend_string *result; /* Resulting string */
  4660. size_t result_len; /* Length of the resulting string */
  4661. ZEND_PARSE_PARAMETERS_START(2, 2)
  4662. Z_PARAM_STR(input_str)
  4663. Z_PARAM_LONG(mult)
  4664. ZEND_PARSE_PARAMETERS_END();
  4665. if (mult < 0) {
  4666. zend_argument_value_error(2, "must be greater than or equal to 0");
  4667. RETURN_THROWS();
  4668. }
  4669. /* Don't waste our time if it's empty */
  4670. /* ... or if the multiplier is zero */
  4671. if (ZSTR_LEN(input_str) == 0 || mult == 0)
  4672. RETURN_EMPTY_STRING();
  4673. /* Initialize the result string */
  4674. result = zend_string_safe_alloc(ZSTR_LEN(input_str), mult, 0, 0);
  4675. result_len = ZSTR_LEN(input_str) * mult;
  4676. /* Heavy optimization for situations where input string is 1 byte long */
  4677. if (ZSTR_LEN(input_str) == 1) {
  4678. memset(ZSTR_VAL(result), *ZSTR_VAL(input_str), mult);
  4679. } else {
  4680. const char *s, *ee;
  4681. char *e;
  4682. ptrdiff_t l=0;
  4683. memcpy(ZSTR_VAL(result), ZSTR_VAL(input_str), ZSTR_LEN(input_str));
  4684. s = ZSTR_VAL(result);
  4685. e = ZSTR_VAL(result) + ZSTR_LEN(input_str);
  4686. ee = ZSTR_VAL(result) + result_len;
  4687. while (e<ee) {
  4688. l = (e-s) < (ee-e) ? (e-s) : (ee-e);
  4689. memmove(e, s, l);
  4690. e += l;
  4691. }
  4692. }
  4693. ZSTR_VAL(result)[result_len] = '\0';
  4694. RETURN_NEW_STR(result);
  4695. }
  4696. /* }}} */
  4697. /* {{{ Returns info about what characters are used in input */
  4698. PHP_FUNCTION(count_chars)
  4699. {
  4700. zend_string *input;
  4701. int chars[256];
  4702. zend_long mymode=0;
  4703. const unsigned char *buf;
  4704. int inx;
  4705. char retstr[256];
  4706. size_t retlen=0;
  4707. size_t tmp = 0;
  4708. ZEND_PARSE_PARAMETERS_START(1, 2)
  4709. Z_PARAM_STR(input)
  4710. Z_PARAM_OPTIONAL
  4711. Z_PARAM_LONG(mymode)
  4712. ZEND_PARSE_PARAMETERS_END();
  4713. if (mymode < 0 || mymode > 4) {
  4714. zend_argument_value_error(2, "must be between 1 and 4 (inclusive)");
  4715. RETURN_THROWS();
  4716. }
  4717. buf = (const unsigned char *) ZSTR_VAL(input);
  4718. memset((void*) chars, 0, sizeof(chars));
  4719. while (tmp < ZSTR_LEN(input)) {
  4720. chars[*buf]++;
  4721. buf++;
  4722. tmp++;
  4723. }
  4724. if (mymode < 3) {
  4725. array_init(return_value);
  4726. }
  4727. for (inx = 0; inx < 256; inx++) {
  4728. switch (mymode) {
  4729. case 0:
  4730. add_index_long(return_value, inx, chars[inx]);
  4731. break;
  4732. case 1:
  4733. if (chars[inx] != 0) {
  4734. add_index_long(return_value, inx, chars[inx]);
  4735. }
  4736. break;
  4737. case 2:
  4738. if (chars[inx] == 0) {
  4739. add_index_long(return_value, inx, chars[inx]);
  4740. }
  4741. break;
  4742. case 3:
  4743. if (chars[inx] != 0) {
  4744. retstr[retlen++] = inx;
  4745. }
  4746. break;
  4747. case 4:
  4748. if (chars[inx] == 0) {
  4749. retstr[retlen++] = inx;
  4750. }
  4751. break;
  4752. }
  4753. }
  4754. if (mymode == 3 || mymode == 4) {
  4755. RETURN_STRINGL(retstr, retlen);
  4756. }
  4757. }
  4758. /* }}} */
  4759. /* {{{ php_strnatcmp */
  4760. static void php_strnatcmp(INTERNAL_FUNCTION_PARAMETERS, int fold_case)
  4761. {
  4762. zend_string *s1, *s2;
  4763. ZEND_PARSE_PARAMETERS_START(2, 2)
  4764. Z_PARAM_STR(s1)
  4765. Z_PARAM_STR(s2)
  4766. ZEND_PARSE_PARAMETERS_END();
  4767. RETURN_LONG(strnatcmp_ex(ZSTR_VAL(s1), ZSTR_LEN(s1),
  4768. ZSTR_VAL(s2), ZSTR_LEN(s2),
  4769. fold_case));
  4770. }
  4771. /* }}} */
  4772. PHPAPI int string_natural_compare_function_ex(zval *result, zval *op1, zval *op2, bool case_insensitive) /* {{{ */
  4773. {
  4774. zend_string *tmp_str1, *tmp_str2;
  4775. zend_string *str1 = zval_get_tmp_string(op1, &tmp_str1);
  4776. zend_string *str2 = zval_get_tmp_string(op2, &tmp_str2);
  4777. ZVAL_LONG(result, strnatcmp_ex(ZSTR_VAL(str1), ZSTR_LEN(str1), ZSTR_VAL(str2), ZSTR_LEN(str2), case_insensitive));
  4778. zend_tmp_string_release(tmp_str1);
  4779. zend_tmp_string_release(tmp_str2);
  4780. return SUCCESS;
  4781. }
  4782. /* }}} */
  4783. PHPAPI int string_natural_case_compare_function(zval *result, zval *op1, zval *op2) /* {{{ */
  4784. {
  4785. return string_natural_compare_function_ex(result, op1, op2, 1);
  4786. }
  4787. /* }}} */
  4788. PHPAPI int string_natural_compare_function(zval *result, zval *op1, zval *op2) /* {{{ */
  4789. {
  4790. return string_natural_compare_function_ex(result, op1, op2, 0);
  4791. }
  4792. /* }}} */
  4793. /* {{{ Returns the result of string comparison using 'natural' algorithm */
  4794. PHP_FUNCTION(strnatcmp)
  4795. {
  4796. php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  4797. }
  4798. /* }}} */
  4799. /* {{{ Returns numeric formatting information based on the current locale */
  4800. PHP_FUNCTION(localeconv)
  4801. {
  4802. zval grouping, mon_grouping;
  4803. int len, i;
  4804. ZEND_PARSE_PARAMETERS_NONE();
  4805. array_init(return_value);
  4806. array_init(&grouping);
  4807. array_init(&mon_grouping);
  4808. {
  4809. struct lconv currlocdata;
  4810. localeconv_r( &currlocdata );
  4811. /* Grab the grouping data out of the array */
  4812. len = (int)strlen(currlocdata.grouping);
  4813. for (i = 0; i < len; i++) {
  4814. add_index_long(&grouping, i, currlocdata.grouping[i]);
  4815. }
  4816. /* Grab the monetary grouping data out of the array */
  4817. len = (int)strlen(currlocdata.mon_grouping);
  4818. for (i = 0; i < len; i++) {
  4819. add_index_long(&mon_grouping, i, currlocdata.mon_grouping[i]);
  4820. }
  4821. add_assoc_string(return_value, "decimal_point", currlocdata.decimal_point);
  4822. add_assoc_string(return_value, "thousands_sep", currlocdata.thousands_sep);
  4823. add_assoc_string(return_value, "int_curr_symbol", currlocdata.int_curr_symbol);
  4824. add_assoc_string(return_value, "currency_symbol", currlocdata.currency_symbol);
  4825. add_assoc_string(return_value, "mon_decimal_point", currlocdata.mon_decimal_point);
  4826. add_assoc_string(return_value, "mon_thousands_sep", currlocdata.mon_thousands_sep);
  4827. add_assoc_string(return_value, "positive_sign", currlocdata.positive_sign);
  4828. add_assoc_string(return_value, "negative_sign", currlocdata.negative_sign);
  4829. add_assoc_long( return_value, "int_frac_digits", currlocdata.int_frac_digits);
  4830. add_assoc_long( return_value, "frac_digits", currlocdata.frac_digits);
  4831. add_assoc_long( return_value, "p_cs_precedes", currlocdata.p_cs_precedes);
  4832. add_assoc_long( return_value, "p_sep_by_space", currlocdata.p_sep_by_space);
  4833. add_assoc_long( return_value, "n_cs_precedes", currlocdata.n_cs_precedes);
  4834. add_assoc_long( return_value, "n_sep_by_space", currlocdata.n_sep_by_space);
  4835. add_assoc_long( return_value, "p_sign_posn", currlocdata.p_sign_posn);
  4836. add_assoc_long( return_value, "n_sign_posn", currlocdata.n_sign_posn);
  4837. }
  4838. zend_hash_str_update(Z_ARRVAL_P(return_value), "grouping", sizeof("grouping")-1, &grouping);
  4839. zend_hash_str_update(Z_ARRVAL_P(return_value), "mon_grouping", sizeof("mon_grouping")-1, &mon_grouping);
  4840. }
  4841. /* }}} */
  4842. /* {{{ Returns the result of case-insensitive string comparison using 'natural' algorithm */
  4843. PHP_FUNCTION(strnatcasecmp)
  4844. {
  4845. php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  4846. }
  4847. /* }}} */
  4848. /* {{{ Returns the number of times a substring occurs in the string */
  4849. PHP_FUNCTION(substr_count)
  4850. {
  4851. char *haystack, *needle;
  4852. zend_long offset = 0, length = 0;
  4853. bool length_is_null = 1;
  4854. zend_long count = 0;
  4855. size_t haystack_len, needle_len;
  4856. const char *p, *endp;
  4857. char cmp;
  4858. ZEND_PARSE_PARAMETERS_START(2, 4)
  4859. Z_PARAM_STRING(haystack, haystack_len)
  4860. Z_PARAM_STRING(needle, needle_len)
  4861. Z_PARAM_OPTIONAL
  4862. Z_PARAM_LONG(offset)
  4863. Z_PARAM_LONG_OR_NULL(length, length_is_null)
  4864. ZEND_PARSE_PARAMETERS_END();
  4865. if (needle_len == 0) {
  4866. zend_argument_value_error(2, "cannot be empty");
  4867. RETURN_THROWS();
  4868. }
  4869. p = haystack;
  4870. endp = p + haystack_len;
  4871. if (offset < 0) {
  4872. offset += (zend_long)haystack_len;
  4873. }
  4874. if ((offset < 0) || ((size_t)offset > haystack_len)) {
  4875. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  4876. RETURN_THROWS();
  4877. }
  4878. p += offset;
  4879. if (!length_is_null) {
  4880. if (length < 0) {
  4881. length += (haystack_len - offset);
  4882. }
  4883. if (length < 0 || ((size_t)length > (haystack_len - offset))) {
  4884. zend_argument_value_error(4, "must be contained in argument #1 ($haystack)");
  4885. RETURN_THROWS();
  4886. }
  4887. endp = p + length;
  4888. }
  4889. if (needle_len == 1) {
  4890. cmp = needle[0];
  4891. while ((p = memchr(p, cmp, endp - p))) {
  4892. count++;
  4893. p++;
  4894. }
  4895. } else {
  4896. while ((p = (char*)php_memnstr(p, needle, needle_len, endp))) {
  4897. p += needle_len;
  4898. count++;
  4899. }
  4900. }
  4901. RETURN_LONG(count);
  4902. }
  4903. /* }}} */
  4904. /* {{{ Returns input string padded on the left or right to specified length with pad_string */
  4905. PHP_FUNCTION(str_pad)
  4906. {
  4907. /* Input arguments */
  4908. zend_string *input; /* Input string */
  4909. zend_long pad_length; /* Length to pad to */
  4910. /* Helper variables */
  4911. size_t num_pad_chars; /* Number of padding characters (total - input size) */
  4912. char *pad_str = " "; /* Pointer to padding string */
  4913. size_t pad_str_len = 1;
  4914. zend_long pad_type_val = STR_PAD_RIGHT; /* The padding type value */
  4915. size_t i, left_pad=0, right_pad=0;
  4916. zend_string *result = NULL; /* Resulting string */
  4917. ZEND_PARSE_PARAMETERS_START(2, 4)
  4918. Z_PARAM_STR(input)
  4919. Z_PARAM_LONG(pad_length)
  4920. Z_PARAM_OPTIONAL
  4921. Z_PARAM_STRING(pad_str, pad_str_len)
  4922. Z_PARAM_LONG(pad_type_val)
  4923. ZEND_PARSE_PARAMETERS_END();
  4924. /* If resulting string turns out to be shorter than input string,
  4925. we simply copy the input and return. */
  4926. if (pad_length < 0 || (size_t)pad_length <= ZSTR_LEN(input)) {
  4927. RETURN_STR_COPY(input);
  4928. }
  4929. if (pad_str_len == 0) {
  4930. zend_argument_value_error(3, "must be a non-empty string");
  4931. RETURN_THROWS();
  4932. }
  4933. if (pad_type_val < STR_PAD_LEFT || pad_type_val > STR_PAD_BOTH) {
  4934. zend_argument_value_error(4, "must be STR_PAD_LEFT, STR_PAD_RIGHT, or STR_PAD_BOTH");
  4935. RETURN_THROWS();
  4936. }
  4937. num_pad_chars = pad_length - ZSTR_LEN(input);
  4938. result = zend_string_safe_alloc(1, ZSTR_LEN(input), num_pad_chars, 0);
  4939. ZSTR_LEN(result) = 0;
  4940. /* We need to figure out the left/right padding lengths. */
  4941. switch (pad_type_val) {
  4942. case STR_PAD_RIGHT:
  4943. left_pad = 0;
  4944. right_pad = num_pad_chars;
  4945. break;
  4946. case STR_PAD_LEFT:
  4947. left_pad = num_pad_chars;
  4948. right_pad = 0;
  4949. break;
  4950. case STR_PAD_BOTH:
  4951. left_pad = num_pad_chars / 2;
  4952. right_pad = num_pad_chars - left_pad;
  4953. break;
  4954. }
  4955. /* First we pad on the left. */
  4956. for (i = 0; i < left_pad; i++)
  4957. ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
  4958. /* Then we copy the input string. */
  4959. memcpy(ZSTR_VAL(result) + ZSTR_LEN(result), ZSTR_VAL(input), ZSTR_LEN(input));
  4960. ZSTR_LEN(result) += ZSTR_LEN(input);
  4961. /* Finally, we pad on the right. */
  4962. for (i = 0; i < right_pad; i++)
  4963. ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
  4964. ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
  4965. RETURN_NEW_STR(result);
  4966. }
  4967. /* }}} */
  4968. /* {{{ Implements an ANSI C compatible sscanf */
  4969. PHP_FUNCTION(sscanf)
  4970. {
  4971. zval *args = NULL;
  4972. char *str, *format;
  4973. size_t str_len, format_len;
  4974. int result, num_args = 0;
  4975. ZEND_PARSE_PARAMETERS_START(2, -1)
  4976. Z_PARAM_STRING(str, str_len)
  4977. Z_PARAM_STRING(format, format_len)
  4978. Z_PARAM_VARIADIC('*', args, num_args)
  4979. ZEND_PARSE_PARAMETERS_END();
  4980. result = php_sscanf_internal(str, format, num_args, args, 0, return_value);
  4981. if (SCAN_ERROR_WRONG_PARAM_COUNT == result) {
  4982. WRONG_PARAM_COUNT;
  4983. }
  4984. }
  4985. /* }}} */
  4986. /* static zend_string *php_str_rot13(zend_string *str) {{{ */
  4987. #ifdef __SSE2__
  4988. #include <emmintrin.h>
  4989. #endif
  4990. static zend_string *php_str_rot13(zend_string *str)
  4991. {
  4992. zend_string *ret;
  4993. const char *p, *e;
  4994. char *target;
  4995. if (UNEXPECTED(ZSTR_LEN(str) == 0)) {
  4996. return ZSTR_EMPTY_ALLOC();
  4997. }
  4998. ret = zend_string_alloc(ZSTR_LEN(str), 0);
  4999. p = ZSTR_VAL(str);
  5000. e = p + ZSTR_LEN(str);
  5001. target = ZSTR_VAL(ret);
  5002. #ifdef __SSE2__
  5003. if (e - p > 15) {
  5004. const __m128i a_minus_1 = _mm_set1_epi8('a' - 1);
  5005. const __m128i m_plus_1 = _mm_set1_epi8('m' + 1);
  5006. const __m128i n_minus_1 = _mm_set1_epi8('n' - 1);
  5007. const __m128i z_plus_1 = _mm_set1_epi8('z' + 1);
  5008. const __m128i A_minus_1 = _mm_set1_epi8('A' - 1);
  5009. const __m128i M_plus_1 = _mm_set1_epi8('M' + 1);
  5010. const __m128i N_minus_1 = _mm_set1_epi8('N' - 1);
  5011. const __m128i Z_plus_1 = _mm_set1_epi8('Z' + 1);
  5012. const __m128i add = _mm_set1_epi8(13);
  5013. const __m128i sub = _mm_set1_epi8(-13);
  5014. do {
  5015. __m128i in, gt, lt, cmp, delta;
  5016. delta = _mm_setzero_si128();
  5017. in = _mm_loadu_si128((__m128i *)p);
  5018. gt = _mm_cmpgt_epi8(in, a_minus_1);
  5019. lt = _mm_cmplt_epi8(in, m_plus_1);
  5020. cmp = _mm_and_si128(lt, gt);
  5021. if (_mm_movemask_epi8(cmp)) {
  5022. cmp = _mm_and_si128(cmp, add);
  5023. delta = _mm_or_si128(delta, cmp);
  5024. }
  5025. gt = _mm_cmpgt_epi8(in, n_minus_1);
  5026. lt = _mm_cmplt_epi8(in, z_plus_1);
  5027. cmp = _mm_and_si128(lt, gt);
  5028. if (_mm_movemask_epi8(cmp)) {
  5029. cmp = _mm_and_si128(cmp, sub);
  5030. delta = _mm_or_si128(delta, cmp);
  5031. }
  5032. gt = _mm_cmpgt_epi8(in, A_minus_1);
  5033. lt = _mm_cmplt_epi8(in, M_plus_1);
  5034. cmp = _mm_and_si128(lt, gt);
  5035. if (_mm_movemask_epi8(cmp)) {
  5036. cmp = _mm_and_si128(cmp, add);
  5037. delta = _mm_or_si128(delta, cmp);
  5038. }
  5039. gt = _mm_cmpgt_epi8(in, N_minus_1);
  5040. lt = _mm_cmplt_epi8(in, Z_plus_1);
  5041. cmp = _mm_and_si128(lt, gt);
  5042. if (_mm_movemask_epi8(cmp)) {
  5043. cmp = _mm_and_si128(cmp, sub);
  5044. delta = _mm_or_si128(delta, cmp);
  5045. }
  5046. in = _mm_add_epi8(in, delta);
  5047. _mm_storeu_si128((__m128i *)target, in);
  5048. p += 16;
  5049. target += 16;
  5050. } while (e - p > 15);
  5051. }
  5052. #endif
  5053. while (p < e) {
  5054. if (*p >= 'a' && *p <= 'z') {
  5055. *target++ = 'a' + (((*p++ - 'a') + 13) % 26);
  5056. } else if (*p >= 'A' && *p <= 'Z') {
  5057. *target++ = 'A' + (((*p++ - 'A') + 13) % 26);
  5058. } else {
  5059. *target++ = *p++;
  5060. }
  5061. }
  5062. *target = '\0';
  5063. return ret;
  5064. }
  5065. /* }}} */
  5066. /* {{{ Perform the rot13 transform on a string */
  5067. PHP_FUNCTION(str_rot13)
  5068. {
  5069. zend_string *arg;
  5070. ZEND_PARSE_PARAMETERS_START(1, 1)
  5071. Z_PARAM_STR(arg)
  5072. ZEND_PARSE_PARAMETERS_END();
  5073. RETURN_STR(php_str_rot13(arg));
  5074. }
  5075. /* }}} */
  5076. static void php_string_shuffle(char *str, zend_long len) /* {{{ */
  5077. {
  5078. zend_long n_elems, rnd_idx, n_left;
  5079. char temp;
  5080. /* The implementation is stolen from array_data_shuffle */
  5081. /* Thus the characteristics of the randomization are the same */
  5082. n_elems = len;
  5083. if (n_elems <= 1) {
  5084. return;
  5085. }
  5086. n_left = n_elems;
  5087. while (--n_left) {
  5088. rnd_idx = php_mt_rand_range(0, n_left);
  5089. if (rnd_idx != n_left) {
  5090. temp = str[n_left];
  5091. str[n_left] = str[rnd_idx];
  5092. str[rnd_idx] = temp;
  5093. }
  5094. }
  5095. }
  5096. /* }}} */
  5097. /* {{{ Shuffles string. One permutation of all possible is created */
  5098. PHP_FUNCTION(str_shuffle)
  5099. {
  5100. zend_string *arg;
  5101. ZEND_PARSE_PARAMETERS_START(1, 1)
  5102. Z_PARAM_STR(arg)
  5103. ZEND_PARSE_PARAMETERS_END();
  5104. RETVAL_STRINGL(ZSTR_VAL(arg), ZSTR_LEN(arg));
  5105. if (Z_STRLEN_P(return_value) > 1) {
  5106. php_string_shuffle(Z_STRVAL_P(return_value), (zend_long) Z_STRLEN_P(return_value));
  5107. }
  5108. }
  5109. /* }}} */
  5110. /* {{{ Counts the number of words inside a string. If format of 1 is specified,
  5111. then the function will return an array containing all the words
  5112. found inside the string. If format of 2 is specified, then the function
  5113. will return an associated array where the position of the word is the key
  5114. and the word itself is the value.
  5115. For the purpose of this function, 'word' is defined as a locale dependent
  5116. string containing alphabetic characters, which also may contain, but not start
  5117. with "'" and "-" characters.
  5118. */
  5119. PHP_FUNCTION(str_word_count)
  5120. {
  5121. zend_string *str;
  5122. char *char_list = NULL, ch[256];
  5123. const char *p, *e, *s;
  5124. size_t char_list_len = 0, word_count = 0;
  5125. zend_long type = 0;
  5126. ZEND_PARSE_PARAMETERS_START(1, 3)
  5127. Z_PARAM_STR(str)
  5128. Z_PARAM_OPTIONAL
  5129. Z_PARAM_LONG(type)
  5130. Z_PARAM_STRING_OR_NULL(char_list, char_list_len)
  5131. ZEND_PARSE_PARAMETERS_END();
  5132. switch(type) {
  5133. case 1:
  5134. case 2:
  5135. array_init(return_value);
  5136. if (!ZSTR_LEN(str)) {
  5137. return;
  5138. }
  5139. break;
  5140. case 0:
  5141. if (!ZSTR_LEN(str)) {
  5142. RETURN_LONG(0);
  5143. }
  5144. /* nothing to be done */
  5145. break;
  5146. default:
  5147. zend_argument_value_error(2, "must be a valid format value");
  5148. RETURN_THROWS();
  5149. }
  5150. if (char_list) {
  5151. php_charmask((const unsigned char *) char_list, char_list_len, ch);
  5152. }
  5153. p = ZSTR_VAL(str);
  5154. e = ZSTR_VAL(str) + ZSTR_LEN(str);
  5155. /* first character cannot be ' or -, unless explicitly allowed by the user */
  5156. if ((*p == '\'' && (!char_list || !ch['\''])) || (*p == '-' && (!char_list || !ch['-']))) {
  5157. p++;
  5158. }
  5159. /* last character cannot be -, unless explicitly allowed by the user */
  5160. if (*(e - 1) == '-' && (!char_list || !ch['-'])) {
  5161. e--;
  5162. }
  5163. while (p < e) {
  5164. s = p;
  5165. while (p < e && (isalpha((unsigned char)*p) || (char_list && ch[(unsigned char)*p]) || *p == '\'' || *p == '-')) {
  5166. p++;
  5167. }
  5168. if (p > s) {
  5169. switch (type)
  5170. {
  5171. case 1:
  5172. add_next_index_stringl(return_value, s, p - s);
  5173. break;
  5174. case 2:
  5175. add_index_stringl(return_value, (s - ZSTR_VAL(str)), s, p - s);
  5176. break;
  5177. default:
  5178. word_count++;
  5179. break;
  5180. }
  5181. }
  5182. p++;
  5183. }
  5184. if (!type) {
  5185. RETURN_LONG(word_count);
  5186. }
  5187. }
  5188. /* }}} */
  5189. /* {{{ Convert a string to an array. If split_length is specified, break the string down into chunks each split_length characters long. */
  5190. PHP_FUNCTION(str_split)
  5191. {
  5192. zend_string *str;
  5193. zend_long split_length = 1;
  5194. const char *p;
  5195. size_t n_reg_segments;
  5196. ZEND_PARSE_PARAMETERS_START(1, 2)
  5197. Z_PARAM_STR(str)
  5198. Z_PARAM_OPTIONAL
  5199. Z_PARAM_LONG(split_length)
  5200. ZEND_PARSE_PARAMETERS_END();
  5201. if (split_length <= 0) {
  5202. zend_argument_value_error(2, "must be greater than 0");
  5203. RETURN_THROWS();
  5204. }
  5205. if (0 == ZSTR_LEN(str) || (size_t)split_length >= ZSTR_LEN(str)) {
  5206. array_init_size(return_value, 1);
  5207. add_next_index_stringl(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
  5208. return;
  5209. }
  5210. array_init_size(return_value, (uint32_t)(((ZSTR_LEN(str) - 1) / split_length) + 1));
  5211. n_reg_segments = ZSTR_LEN(str) / split_length;
  5212. p = ZSTR_VAL(str);
  5213. while (n_reg_segments-- > 0) {
  5214. add_next_index_stringl(return_value, p, split_length);
  5215. p += split_length;
  5216. }
  5217. if (p != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
  5218. add_next_index_stringl(return_value, p, (ZSTR_VAL(str) + ZSTR_LEN(str) - p));
  5219. }
  5220. }
  5221. /* }}} */
  5222. /* {{{ Search a string for any of a set of characters */
  5223. PHP_FUNCTION(strpbrk)
  5224. {
  5225. zend_string *haystack, *char_list;
  5226. const char *haystack_ptr, *cl_ptr;
  5227. ZEND_PARSE_PARAMETERS_START(2, 2)
  5228. Z_PARAM_STR(haystack)
  5229. Z_PARAM_STR(char_list)
  5230. ZEND_PARSE_PARAMETERS_END();
  5231. if (!ZSTR_LEN(char_list)) {
  5232. zend_argument_value_error(2, "must be a non-empty string");
  5233. RETURN_THROWS();
  5234. }
  5235. for (haystack_ptr = ZSTR_VAL(haystack); haystack_ptr < (ZSTR_VAL(haystack) + ZSTR_LEN(haystack)); ++haystack_ptr) {
  5236. for (cl_ptr = ZSTR_VAL(char_list); cl_ptr < (ZSTR_VAL(char_list) + ZSTR_LEN(char_list)); ++cl_ptr) {
  5237. if (*cl_ptr == *haystack_ptr) {
  5238. RETURN_STRINGL(haystack_ptr, (ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - haystack_ptr));
  5239. }
  5240. }
  5241. }
  5242. RETURN_FALSE;
  5243. }
  5244. /* }}} */
  5245. /* {{{ Binary safe optionally case insensitive comparison of 2 strings from an offset, up to length characters */
  5246. PHP_FUNCTION(substr_compare)
  5247. {
  5248. zend_string *s1, *s2;
  5249. zend_long offset, len=0;
  5250. bool len_is_default=1;
  5251. bool cs=0;
  5252. size_t cmp_len;
  5253. ZEND_PARSE_PARAMETERS_START(3, 5)
  5254. Z_PARAM_STR(s1)
  5255. Z_PARAM_STR(s2)
  5256. Z_PARAM_LONG(offset)
  5257. Z_PARAM_OPTIONAL
  5258. Z_PARAM_LONG_OR_NULL(len, len_is_default)
  5259. Z_PARAM_BOOL(cs)
  5260. ZEND_PARSE_PARAMETERS_END();
  5261. if (!len_is_default && len <= 0) {
  5262. if (len == 0) {
  5263. RETURN_LONG(0L);
  5264. } else {
  5265. zend_argument_value_error(4, "must be greater than or equal to 0");
  5266. RETURN_THROWS();
  5267. }
  5268. }
  5269. if (offset < 0) {
  5270. offset = ZSTR_LEN(s1) + offset;
  5271. offset = (offset < 0) ? 0 : offset;
  5272. }
  5273. if ((size_t)offset > ZSTR_LEN(s1)) {
  5274. zend_argument_value_error(3, "must be contained in argument #1 ($main_str)");
  5275. RETURN_THROWS();
  5276. }
  5277. cmp_len = len ? (size_t)len : MAX(ZSTR_LEN(s2), (ZSTR_LEN(s1) - offset));
  5278. if (!cs) {
  5279. RETURN_LONG(zend_binary_strncmp(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
  5280. } else {
  5281. RETURN_LONG(zend_binary_strncasecmp_l(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
  5282. }
  5283. }
  5284. /* }}} */
  5285. /* {{{ */
  5286. static zend_string *php_utf8_encode(const char *s, size_t len)
  5287. {
  5288. size_t pos = len;
  5289. zend_string *str;
  5290. unsigned char c;
  5291. str = zend_string_safe_alloc(len, 2, 0, 0);
  5292. ZSTR_LEN(str) = 0;
  5293. while (pos > 0) {
  5294. /* The lower 256 codepoints of Unicode are identical to Latin-1,
  5295. * so we don't need to do any mapping here. */
  5296. c = (unsigned char)(*s);
  5297. if (c < 0x80) {
  5298. ZSTR_VAL(str)[ZSTR_LEN(str)++] = (char) c;
  5299. /* We only account for the single-byte and two-byte cases because
  5300. * we're only dealing with the first 256 Unicode codepoints. */
  5301. } else {
  5302. ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0xc0 | (c >> 6));
  5303. ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0x80 | (c & 0x3f));
  5304. }
  5305. pos--;
  5306. s++;
  5307. }
  5308. ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
  5309. str = zend_string_truncate(str, ZSTR_LEN(str), 0);
  5310. return str;
  5311. }
  5312. /* }}} */
  5313. /* {{{ */
  5314. static zend_string *php_utf8_decode(const char *s, size_t len)
  5315. {
  5316. size_t pos = 0;
  5317. unsigned int c;
  5318. zend_string *str;
  5319. str = zend_string_alloc(len, 0);
  5320. ZSTR_LEN(str) = 0;
  5321. while (pos < len) {
  5322. int status = FAILURE;
  5323. c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
  5324. /* The lower 256 codepoints of Unicode are identical to Latin-1,
  5325. * so we don't need to do any mapping here beyond replacing non-Latin-1
  5326. * characters. */
  5327. if (status == FAILURE || c > 0xFFU) {
  5328. c = '?';
  5329. }
  5330. ZSTR_VAL(str)[ZSTR_LEN(str)++] = c;
  5331. }
  5332. ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
  5333. if (ZSTR_LEN(str) < len) {
  5334. str = zend_string_truncate(str, ZSTR_LEN(str), 0);
  5335. }
  5336. return str;
  5337. }
  5338. /* }}} */
  5339. /* {{{ Encodes an ISO-8859-1 string to UTF-8 */
  5340. PHP_FUNCTION(utf8_encode)
  5341. {
  5342. char *arg;
  5343. size_t arg_len;
  5344. ZEND_PARSE_PARAMETERS_START(1, 1)
  5345. Z_PARAM_STRING(arg, arg_len)
  5346. ZEND_PARSE_PARAMETERS_END();
  5347. RETURN_STR(php_utf8_encode(arg, arg_len));
  5348. }
  5349. /* }}} */
  5350. /* {{{ Converts a UTF-8 encoded string to ISO-8859-1 */
  5351. PHP_FUNCTION(utf8_decode)
  5352. {
  5353. char *arg;
  5354. size_t arg_len;
  5355. ZEND_PARSE_PARAMETERS_START(1, 1)
  5356. Z_PARAM_STRING(arg, arg_len)
  5357. ZEND_PARSE_PARAMETERS_END();
  5358. RETURN_STR(php_utf8_decode(arg, arg_len));
  5359. }
  5360. /* }}} */