php_mbregex.c 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2016 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
  16. +----------------------------------------------------------------------+
  17. */
  18. /* $Id$ */
  19. #ifdef HAVE_CONFIG_H
  20. #include "config.h"
  21. #endif
  22. #include "php.h"
  23. #include "php_ini.h"
  24. #if HAVE_MBREGEX
  25. #include "ext/standard/php_smart_str.h"
  26. #include "ext/standard/info.h"
  27. #include "php_mbregex.h"
  28. #include "mbstring.h"
  29. #include "php_onig_compat.h" /* must come prior to the oniguruma header */
  30. #include <oniguruma.h>
  31. #undef UChar
  32. ZEND_EXTERN_MODULE_GLOBALS(mbstring)
  33. struct _zend_mb_regex_globals {
  34. OnigEncoding default_mbctype;
  35. OnigEncoding current_mbctype;
  36. HashTable ht_rc;
  37. zval *search_str;
  38. zval *search_str_val;
  39. unsigned int search_pos;
  40. php_mb_regex_t *search_re;
  41. OnigRegion *search_regs;
  42. OnigOptionType regex_default_options;
  43. OnigSyntaxType *regex_default_syntax;
  44. };
  45. #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
  46. /* {{{ static void php_mb_regex_free_cache() */
  47. static void php_mb_regex_free_cache(php_mb_regex_t **pre)
  48. {
  49. onig_free(*pre);
  50. }
  51. /* }}} */
  52. /* {{{ _php_mb_regex_globals_ctor */
  53. static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals TSRMLS_DC)
  54. {
  55. pglobals->default_mbctype = ONIG_ENCODING_UTF8;
  56. pglobals->current_mbctype = ONIG_ENCODING_UTF8;
  57. zend_hash_init(&(pglobals->ht_rc), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1);
  58. pglobals->search_str = (zval*) NULL;
  59. pglobals->search_re = (php_mb_regex_t*)NULL;
  60. pglobals->search_pos = 0;
  61. pglobals->search_regs = (OnigRegion*)NULL;
  62. pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
  63. pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
  64. return SUCCESS;
  65. }
  66. /* }}} */
  67. /* {{{ _php_mb_regex_globals_dtor */
  68. static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals TSRMLS_DC)
  69. {
  70. zend_hash_destroy(&pglobals->ht_rc);
  71. }
  72. /* }}} */
  73. /* {{{ php_mb_regex_globals_alloc */
  74. zend_mb_regex_globals *php_mb_regex_globals_alloc(TSRMLS_D)
  75. {
  76. zend_mb_regex_globals *pglobals = pemalloc(
  77. sizeof(zend_mb_regex_globals), 1);
  78. if (!pglobals) {
  79. return NULL;
  80. }
  81. if (SUCCESS != _php_mb_regex_globals_ctor(pglobals TSRMLS_CC)) {
  82. pefree(pglobals, 1);
  83. return NULL;
  84. }
  85. return pglobals;
  86. }
  87. /* }}} */
  88. /* {{{ php_mb_regex_globals_free */
  89. void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals TSRMLS_DC)
  90. {
  91. if (!pglobals) {
  92. return;
  93. }
  94. _php_mb_regex_globals_dtor(pglobals TSRMLS_CC);
  95. pefree(pglobals, 1);
  96. }
  97. /* }}} */
  98. /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
  99. PHP_MINIT_FUNCTION(mb_regex)
  100. {
  101. onig_init();
  102. return SUCCESS;
  103. }
  104. /* }}} */
  105. /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
  106. PHP_MSHUTDOWN_FUNCTION(mb_regex)
  107. {
  108. onig_end();
  109. return SUCCESS;
  110. }
  111. /* }}} */
  112. /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
  113. PHP_RINIT_FUNCTION(mb_regex)
  114. {
  115. return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
  116. }
  117. /* }}} */
  118. /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
  119. PHP_RSHUTDOWN_FUNCTION(mb_regex)
  120. {
  121. MBREX(current_mbctype) = MBREX(default_mbctype);
  122. if (MBREX(search_str) != NULL) {
  123. zval_ptr_dtor(&MBREX(search_str));
  124. MBREX(search_str) = (zval *)NULL;
  125. }
  126. MBREX(search_pos) = 0;
  127. if (MBREX(search_regs) != NULL) {
  128. onig_region_free(MBREX(search_regs), 1);
  129. MBREX(search_regs) = (OnigRegion *)NULL;
  130. }
  131. zend_hash_clean(&MBREX(ht_rc));
  132. return SUCCESS;
  133. }
  134. /* }}} */
  135. /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
  136. PHP_MINFO_FUNCTION(mb_regex)
  137. {
  138. char buf[32];
  139. php_info_print_table_start();
  140. php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
  141. snprintf(buf, sizeof(buf), "%d.%d.%d",
  142. ONIGURUMA_VERSION_MAJOR,
  143. ONIGURUMA_VERSION_MINOR,
  144. ONIGURUMA_VERSION_TEENY);
  145. #ifdef PHP_ONIG_BUNDLED
  146. #ifdef USE_COMBINATION_EXPLOSION_CHECK
  147. php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
  148. #else /* USE_COMBINATION_EXPLOSION_CHECK */
  149. php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
  150. #endif /* USE_COMBINATION_EXPLOSION_CHECK */
  151. #endif /* PHP_BUNDLED_ONIG */
  152. php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
  153. php_info_print_table_end();
  154. }
  155. /* }}} */
  156. /*
  157. * encoding name resolver
  158. */
  159. /* {{{ encoding name map */
  160. typedef struct _php_mb_regex_enc_name_map_t {
  161. const char *names;
  162. OnigEncoding code;
  163. } php_mb_regex_enc_name_map_t;
  164. php_mb_regex_enc_name_map_t enc_name_map[] = {
  165. #ifdef ONIG_ENCODING_EUC_JP
  166. {
  167. "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
  168. ONIG_ENCODING_EUC_JP
  169. },
  170. #endif
  171. #ifdef ONIG_ENCODING_UTF8
  172. {
  173. "UTF-8\0UTF8\0",
  174. ONIG_ENCODING_UTF8
  175. },
  176. #endif
  177. #ifdef ONIG_ENCODING_UTF16_BE
  178. {
  179. "UTF-16\0UTF-16BE\0",
  180. ONIG_ENCODING_UTF16_BE
  181. },
  182. #endif
  183. #ifdef ONIG_ENCODING_UTF16_LE
  184. {
  185. "UTF-16LE\0",
  186. ONIG_ENCODING_UTF16_LE
  187. },
  188. #endif
  189. #ifdef ONIG_ENCODING_UTF32_BE
  190. {
  191. "UCS-4\0UTF-32\0UTF-32BE\0",
  192. ONIG_ENCODING_UTF32_BE
  193. },
  194. #endif
  195. #ifdef ONIG_ENCODING_UTF32_LE
  196. {
  197. "UCS-4LE\0UTF-32LE\0",
  198. ONIG_ENCODING_UTF32_LE
  199. },
  200. #endif
  201. #ifdef ONIG_ENCODING_SJIS
  202. {
  203. "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
  204. ONIG_ENCODING_SJIS
  205. },
  206. #endif
  207. #ifdef ONIG_ENCODING_BIG5
  208. {
  209. "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
  210. ONIG_ENCODING_BIG5
  211. },
  212. #endif
  213. #ifdef ONIG_ENCODING_EUC_CN
  214. {
  215. "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
  216. ONIG_ENCODING_EUC_CN
  217. },
  218. #endif
  219. #ifdef ONIG_ENCODING_EUC_TW
  220. {
  221. "EUC-TW\0EUCTW\0EUC_TW\0",
  222. ONIG_ENCODING_EUC_TW
  223. },
  224. #endif
  225. #ifdef ONIG_ENCODING_EUC_KR
  226. {
  227. "EUC-KR\0EUCKR\0EUC_KR\0",
  228. ONIG_ENCODING_EUC_KR
  229. },
  230. #endif
  231. #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
  232. {
  233. "KOI8\0KOI-8\0",
  234. ONIG_ENCODING_KOI8
  235. },
  236. #endif
  237. #ifdef ONIG_ENCODING_KOI8_R
  238. {
  239. "KOI8R\0KOI8-R\0KOI-8R\0",
  240. ONIG_ENCODING_KOI8_R
  241. },
  242. #endif
  243. #ifdef ONIG_ENCODING_ISO_8859_1
  244. {
  245. "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
  246. ONIG_ENCODING_ISO_8859_1
  247. },
  248. #endif
  249. #ifdef ONIG_ENCODING_ISO_8859_2
  250. {
  251. "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
  252. ONIG_ENCODING_ISO_8859_2
  253. },
  254. #endif
  255. #ifdef ONIG_ENCODING_ISO_8859_3
  256. {
  257. "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
  258. ONIG_ENCODING_ISO_8859_3
  259. },
  260. #endif
  261. #ifdef ONIG_ENCODING_ISO_8859_4
  262. {
  263. "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
  264. ONIG_ENCODING_ISO_8859_4
  265. },
  266. #endif
  267. #ifdef ONIG_ENCODING_ISO_8859_5
  268. {
  269. "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
  270. ONIG_ENCODING_ISO_8859_5
  271. },
  272. #endif
  273. #ifdef ONIG_ENCODING_ISO_8859_6
  274. {
  275. "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
  276. ONIG_ENCODING_ISO_8859_6
  277. },
  278. #endif
  279. #ifdef ONIG_ENCODING_ISO_8859_7
  280. {
  281. "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
  282. ONIG_ENCODING_ISO_8859_7
  283. },
  284. #endif
  285. #ifdef ONIG_ENCODING_ISO_8859_8
  286. {
  287. "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
  288. ONIG_ENCODING_ISO_8859_8
  289. },
  290. #endif
  291. #ifdef ONIG_ENCODING_ISO_8859_9
  292. {
  293. "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
  294. ONIG_ENCODING_ISO_8859_9
  295. },
  296. #endif
  297. #ifdef ONIG_ENCODING_ISO_8859_10
  298. {
  299. "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
  300. ONIG_ENCODING_ISO_8859_10
  301. },
  302. #endif
  303. #ifdef ONIG_ENCODING_ISO_8859_11
  304. {
  305. "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
  306. ONIG_ENCODING_ISO_8859_11
  307. },
  308. #endif
  309. #ifdef ONIG_ENCODING_ISO_8859_13
  310. {
  311. "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
  312. ONIG_ENCODING_ISO_8859_13
  313. },
  314. #endif
  315. #ifdef ONIG_ENCODING_ISO_8859_14
  316. {
  317. "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
  318. ONIG_ENCODING_ISO_8859_14
  319. },
  320. #endif
  321. #ifdef ONIG_ENCODING_ISO_8859_15
  322. {
  323. "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
  324. ONIG_ENCODING_ISO_8859_15
  325. },
  326. #endif
  327. #ifdef ONIG_ENCODING_ISO_8859_16
  328. {
  329. "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
  330. ONIG_ENCODING_ISO_8859_16
  331. },
  332. #endif
  333. #ifdef ONIG_ENCODING_ASCII
  334. {
  335. "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
  336. ONIG_ENCODING_ASCII
  337. },
  338. #endif
  339. { NULL, ONIG_ENCODING_UNDEF }
  340. };
  341. /* }}} */
  342. /* {{{ php_mb_regex_name2mbctype */
  343. static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
  344. {
  345. const char *p;
  346. php_mb_regex_enc_name_map_t *mapping;
  347. if (pname == NULL || !*pname) {
  348. return ONIG_ENCODING_UNDEF;
  349. }
  350. for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
  351. for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
  352. if (strcasecmp(p, pname) == 0) {
  353. return mapping->code;
  354. }
  355. }
  356. }
  357. return ONIG_ENCODING_UNDEF;
  358. }
  359. /* }}} */
  360. /* {{{ php_mb_regex_mbctype2name */
  361. static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
  362. {
  363. php_mb_regex_enc_name_map_t *mapping;
  364. for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
  365. if (mapping->code == mbctype) {
  366. return mapping->names;
  367. }
  368. }
  369. return NULL;
  370. }
  371. /* }}} */
  372. /* {{{ php_mb_regex_set_mbctype */
  373. int php_mb_regex_set_mbctype(const char *encname TSRMLS_DC)
  374. {
  375. OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
  376. if (mbctype == ONIG_ENCODING_UNDEF) {
  377. return FAILURE;
  378. }
  379. MBREX(current_mbctype) = mbctype;
  380. return SUCCESS;
  381. }
  382. /* }}} */
  383. /* {{{ php_mb_regex_set_default_mbctype */
  384. int php_mb_regex_set_default_mbctype(const char *encname TSRMLS_DC)
  385. {
  386. OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
  387. if (mbctype == ONIG_ENCODING_UNDEF) {
  388. return FAILURE;
  389. }
  390. MBREX(default_mbctype) = mbctype;
  391. return SUCCESS;
  392. }
  393. /* }}} */
  394. /* {{{ php_mb_regex_get_mbctype */
  395. const char *php_mb_regex_get_mbctype(TSRMLS_D)
  396. {
  397. return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
  398. }
  399. /* }}} */
  400. /* {{{ php_mb_regex_get_default_mbctype */
  401. const char *php_mb_regex_get_default_mbctype(TSRMLS_D)
  402. {
  403. return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
  404. }
  405. /* }}} */
  406. /*
  407. * regex cache
  408. */
  409. /* {{{ php_mbregex_compile_pattern */
  410. static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC)
  411. {
  412. int err_code = 0;
  413. int found = 0;
  414. php_mb_regex_t *retval = NULL, **rc = NULL;
  415. OnigErrorInfo err_info;
  416. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  417. found = zend_hash_find(&MBREX(ht_rc), (char *)pattern, patlen+1, (void **) &rc);
  418. if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) {
  419. if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
  420. onig_error_code_to_str(err_str, err_code, &err_info);
  421. php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str);
  422. retval = NULL;
  423. goto out;
  424. }
  425. zend_hash_update(&MBREX(ht_rc), (char *) pattern, patlen + 1, (void *) &retval, sizeof(retval), NULL);
  426. } else if (found == SUCCESS) {
  427. retval = *rc;
  428. }
  429. out:
  430. return retval;
  431. }
  432. /* }}} */
  433. /* {{{ _php_mb_regex_get_option_string */
  434. static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
  435. {
  436. size_t len_left = len;
  437. size_t len_req = 0;
  438. char *p = str;
  439. char c;
  440. if ((option & ONIG_OPTION_IGNORECASE) != 0) {
  441. if (len_left > 0) {
  442. --len_left;
  443. *(p++) = 'i';
  444. }
  445. ++len_req;
  446. }
  447. if ((option & ONIG_OPTION_EXTEND) != 0) {
  448. if (len_left > 0) {
  449. --len_left;
  450. *(p++) = 'x';
  451. }
  452. ++len_req;
  453. }
  454. if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
  455. (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
  456. if (len_left > 0) {
  457. --len_left;
  458. *(p++) = 'p';
  459. }
  460. ++len_req;
  461. } else {
  462. if ((option & ONIG_OPTION_MULTILINE) != 0) {
  463. if (len_left > 0) {
  464. --len_left;
  465. *(p++) = 'm';
  466. }
  467. ++len_req;
  468. }
  469. if ((option & ONIG_OPTION_SINGLELINE) != 0) {
  470. if (len_left > 0) {
  471. --len_left;
  472. *(p++) = 's';
  473. }
  474. ++len_req;
  475. }
  476. }
  477. if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
  478. if (len_left > 0) {
  479. --len_left;
  480. *(p++) = 'l';
  481. }
  482. ++len_req;
  483. }
  484. if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
  485. if (len_left > 0) {
  486. --len_left;
  487. *(p++) = 'n';
  488. }
  489. ++len_req;
  490. }
  491. c = 0;
  492. if (syntax == ONIG_SYNTAX_JAVA) {
  493. c = 'j';
  494. } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
  495. c = 'u';
  496. } else if (syntax == ONIG_SYNTAX_GREP) {
  497. c = 'g';
  498. } else if (syntax == ONIG_SYNTAX_EMACS) {
  499. c = 'c';
  500. } else if (syntax == ONIG_SYNTAX_RUBY) {
  501. c = 'r';
  502. } else if (syntax == ONIG_SYNTAX_PERL) {
  503. c = 'z';
  504. } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
  505. c = 'b';
  506. } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
  507. c = 'd';
  508. }
  509. if (c != 0) {
  510. if (len_left > 0) {
  511. --len_left;
  512. *(p++) = c;
  513. }
  514. ++len_req;
  515. }
  516. if (len_left > 0) {
  517. --len_left;
  518. *(p++) = '\0';
  519. }
  520. ++len_req;
  521. if (len < len_req) {
  522. return len_req;
  523. }
  524. return 0;
  525. }
  526. /* }}} */
  527. /* {{{ _php_mb_regex_init_options */
  528. static void
  529. _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
  530. {
  531. int n;
  532. char c;
  533. int optm = 0;
  534. *syntax = ONIG_SYNTAX_RUBY;
  535. if (parg != NULL) {
  536. n = 0;
  537. while(n < narg) {
  538. c = parg[n++];
  539. switch (c) {
  540. case 'i':
  541. optm |= ONIG_OPTION_IGNORECASE;
  542. break;
  543. case 'x':
  544. optm |= ONIG_OPTION_EXTEND;
  545. break;
  546. case 'm':
  547. optm |= ONIG_OPTION_MULTILINE;
  548. break;
  549. case 's':
  550. optm |= ONIG_OPTION_SINGLELINE;
  551. break;
  552. case 'p':
  553. optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
  554. break;
  555. case 'l':
  556. optm |= ONIG_OPTION_FIND_LONGEST;
  557. break;
  558. case 'n':
  559. optm |= ONIG_OPTION_FIND_NOT_EMPTY;
  560. break;
  561. case 'j':
  562. *syntax = ONIG_SYNTAX_JAVA;
  563. break;
  564. case 'u':
  565. *syntax = ONIG_SYNTAX_GNU_REGEX;
  566. break;
  567. case 'g':
  568. *syntax = ONIG_SYNTAX_GREP;
  569. break;
  570. case 'c':
  571. *syntax = ONIG_SYNTAX_EMACS;
  572. break;
  573. case 'r':
  574. *syntax = ONIG_SYNTAX_RUBY;
  575. break;
  576. case 'z':
  577. *syntax = ONIG_SYNTAX_PERL;
  578. break;
  579. case 'b':
  580. *syntax = ONIG_SYNTAX_POSIX_BASIC;
  581. break;
  582. case 'd':
  583. *syntax = ONIG_SYNTAX_POSIX_EXTENDED;
  584. break;
  585. case 'e':
  586. if (eval != NULL) *eval = 1;
  587. break;
  588. default:
  589. break;
  590. }
  591. }
  592. if (option != NULL) *option|=optm;
  593. }
  594. }
  595. /* }}} */
  596. /*
  597. * php functions
  598. */
  599. /* {{{ proto string mb_regex_encoding([string encoding])
  600. Returns the current encoding for regex as a string. */
  601. PHP_FUNCTION(mb_regex_encoding)
  602. {
  603. size_t argc = ZEND_NUM_ARGS();
  604. char *encoding;
  605. int encoding_len;
  606. OnigEncoding mbctype;
  607. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &encoding, &encoding_len) == FAILURE) {
  608. return;
  609. }
  610. if (argc == 0) {
  611. const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
  612. if (retval == NULL) {
  613. RETURN_FALSE;
  614. }
  615. RETURN_STRING((char *)retval, 1);
  616. } else if (argc == 1) {
  617. mbctype = _php_mb_regex_name2mbctype(encoding);
  618. if (mbctype == ONIG_ENCODING_UNDEF) {
  619. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
  620. RETURN_FALSE;
  621. }
  622. MBREX(current_mbctype) = mbctype;
  623. RETURN_TRUE;
  624. }
  625. }
  626. /* }}} */
  627. /* {{{ _php_mb_regex_ereg_exec */
  628. static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
  629. {
  630. zval **arg_pattern, *array;
  631. char *string;
  632. int string_len;
  633. php_mb_regex_t *re;
  634. OnigRegion *regs = NULL;
  635. int i, match_len, beg, end;
  636. OnigOptionType options;
  637. char *str;
  638. array = NULL;
  639. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) {
  640. RETURN_FALSE;
  641. }
  642. options = MBREX(regex_default_options);
  643. if (icase) {
  644. options |= ONIG_OPTION_IGNORECASE;
  645. }
  646. /* compile the regular expression from the supplied regex */
  647. if (Z_TYPE_PP(arg_pattern) != IS_STRING) {
  648. /* we convert numbers to integers and treat them as a string */
  649. if (Z_TYPE_PP(arg_pattern) == IS_DOUBLE) {
  650. convert_to_long_ex(arg_pattern); /* get rid of decimal places */
  651. }
  652. convert_to_string_ex(arg_pattern);
  653. /* don't bother doing an extended regex with just a number */
  654. }
  655. if (!Z_STRVAL_PP(arg_pattern) || Z_STRLEN_PP(arg_pattern) == 0) {
  656. php_error_docref(NULL TSRMLS_CC, E_WARNING, "empty pattern");
  657. RETVAL_FALSE;
  658. goto out;
  659. }
  660. re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC);
  661. if (re == NULL) {
  662. RETVAL_FALSE;
  663. goto out;
  664. }
  665. regs = onig_region_new();
  666. /* actually execute the regular expression */
  667. if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
  668. RETVAL_FALSE;
  669. goto out;
  670. }
  671. match_len = 1;
  672. str = string;
  673. if (array != NULL) {
  674. match_len = regs->end[0] - regs->beg[0];
  675. zval_dtor(array);
  676. array_init(array);
  677. for (i = 0; i < regs->num_regs; i++) {
  678. beg = regs->beg[i];
  679. end = regs->end[i];
  680. if (beg >= 0 && beg < end && end <= string_len) {
  681. add_index_stringl(array, i, (char *)&str[beg], end - beg, 1);
  682. } else {
  683. add_index_bool(array, i, 0);
  684. }
  685. }
  686. }
  687. if (match_len == 0) {
  688. match_len = 1;
  689. }
  690. RETVAL_LONG(match_len);
  691. out:
  692. if (regs != NULL) {
  693. onig_region_free(regs, 1);
  694. }
  695. }
  696. /* }}} */
  697. /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
  698. Regular expression match for multibyte string */
  699. PHP_FUNCTION(mb_ereg)
  700. {
  701. _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  702. }
  703. /* }}} */
  704. /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
  705. Case-insensitive regular expression match for multibyte string */
  706. PHP_FUNCTION(mb_eregi)
  707. {
  708. _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  709. }
  710. /* }}} */
  711. /* {{{ _php_mb_regex_ereg_replace_exec */
  712. static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
  713. {
  714. zval **arg_pattern_zval;
  715. char *arg_pattern;
  716. int arg_pattern_len;
  717. char *replace;
  718. int replace_len;
  719. zend_fcall_info arg_replace_fci;
  720. zend_fcall_info_cache arg_replace_fci_cache;
  721. char *string;
  722. int string_len;
  723. char *p;
  724. php_mb_regex_t *re;
  725. OnigSyntaxType *syntax;
  726. OnigRegion *regs = NULL;
  727. smart_str out_buf = { 0 };
  728. smart_str eval_buf = { 0 };
  729. smart_str *pbuf;
  730. int i, err, eval, n;
  731. OnigUChar *pos;
  732. OnigUChar *string_lim;
  733. char *description = NULL;
  734. char pat_buf[6];
  735. const mbfl_encoding *enc;
  736. {
  737. const char *current_enc_name;
  738. current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
  739. if (current_enc_name == NULL ||
  740. (enc = mbfl_name2encoding(current_enc_name)) == NULL) {
  741. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
  742. RETURN_FALSE;
  743. }
  744. }
  745. eval = 0;
  746. {
  747. char *option_str = NULL;
  748. int option_str_len = 0;
  749. if (!is_callable) {
  750. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zss|s",
  751. &arg_pattern_zval,
  752. &replace, &replace_len,
  753. &string, &string_len,
  754. &option_str, &option_str_len) == FAILURE) {
  755. RETURN_FALSE;
  756. }
  757. } else {
  758. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zfs|s",
  759. &arg_pattern_zval,
  760. &arg_replace_fci, &arg_replace_fci_cache,
  761. &string, &string_len,
  762. &option_str, &option_str_len) == FAILURE) {
  763. RETURN_FALSE;
  764. }
  765. }
  766. if (option_str != NULL) {
  767. _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
  768. } else {
  769. options |= MBREX(regex_default_options);
  770. syntax = MBREX(regex_default_syntax);
  771. }
  772. }
  773. if (Z_TYPE_PP(arg_pattern_zval) == IS_STRING) {
  774. arg_pattern = Z_STRVAL_PP(arg_pattern_zval);
  775. arg_pattern_len = Z_STRLEN_PP(arg_pattern_zval);
  776. } else {
  777. /* FIXME: this code is not multibyte aware! */
  778. convert_to_long_ex(arg_pattern_zval);
  779. pat_buf[0] = (char)Z_LVAL_PP(arg_pattern_zval);
  780. pat_buf[1] = '\0';
  781. pat_buf[2] = '\0';
  782. pat_buf[3] = '\0';
  783. pat_buf[4] = '\0';
  784. pat_buf[5] = '\0';
  785. arg_pattern = pat_buf;
  786. arg_pattern_len = 1;
  787. }
  788. /* create regex pattern buffer */
  789. re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax TSRMLS_CC);
  790. if (re == NULL) {
  791. RETURN_FALSE;
  792. }
  793. if (eval || is_callable) {
  794. pbuf = &eval_buf;
  795. description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC);
  796. } else {
  797. pbuf = &out_buf;
  798. description = NULL;
  799. }
  800. if (is_callable) {
  801. if (eval) {
  802. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Option 'e' cannot be used with replacement callback");
  803. RETURN_FALSE;
  804. }
  805. }
  806. /* do the actual work */
  807. err = 0;
  808. pos = (OnigUChar *)string;
  809. string_lim = (OnigUChar*)(string + string_len);
  810. regs = onig_region_new();
  811. while (err >= 0) {
  812. err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
  813. if (err <= -2) {
  814. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  815. onig_error_code_to_str(err_str, err);
  816. php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
  817. break;
  818. }
  819. if (err >= 0) {
  820. #if moriyoshi_0
  821. if (regs->beg[0] == regs->end[0]) {
  822. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
  823. break;
  824. }
  825. #endif
  826. /* copy the part of the string before the match */
  827. smart_str_appendl(&out_buf, pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
  828. if (!is_callable) {
  829. /* copy replacement and backrefs */
  830. i = 0;
  831. p = replace;
  832. while (i < replace_len) {
  833. int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
  834. n = -1;
  835. if ((replace_len - i) >= 2 && fwd == 1 &&
  836. p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
  837. n = p[1] - '0';
  838. }
  839. if (n >= 0 && n < regs->num_regs) {
  840. if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) {
  841. smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
  842. }
  843. p += 2;
  844. i += 2;
  845. } else {
  846. smart_str_appendl(pbuf, p, fwd);
  847. p += fwd;
  848. i += fwd;
  849. }
  850. }
  851. }
  852. if (eval) {
  853. zval v;
  854. /* null terminate buffer */
  855. smart_str_0(&eval_buf);
  856. /* do eval */
  857. if (zend_eval_stringl(eval_buf.c, eval_buf.len, &v, description TSRMLS_CC) == FAILURE) {
  858. efree(description);
  859. php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, eval_buf.c);
  860. /* zend_error() does not return in this case */
  861. }
  862. /* result of eval */
  863. convert_to_string(&v);
  864. smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
  865. /* Clean up */
  866. eval_buf.len = 0;
  867. zval_dtor(&v);
  868. } else if (is_callable) {
  869. zval *retval_ptr = NULL;
  870. zval **args[1];
  871. zval *subpats;
  872. int i;
  873. MAKE_STD_ZVAL(subpats);
  874. array_init(subpats);
  875. for (i = 0; i < regs->num_regs; i++) {
  876. add_next_index_stringl(subpats, string + regs->beg[i], regs->end[i] - regs->beg[i], 1);
  877. }
  878. args[0] = &subpats;
  879. /* null terminate buffer */
  880. smart_str_0(&eval_buf);
  881. arg_replace_fci.param_count = 1;
  882. arg_replace_fci.params = args;
  883. arg_replace_fci.retval_ptr_ptr = &retval_ptr;
  884. if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache TSRMLS_CC) == SUCCESS && arg_replace_fci.retval_ptr_ptr && retval_ptr) {
  885. convert_to_string_ex(&retval_ptr);
  886. smart_str_appendl(&out_buf, Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
  887. eval_buf.len = 0;
  888. zval_ptr_dtor(&retval_ptr);
  889. } else {
  890. if (!EG(exception)) {
  891. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
  892. }
  893. }
  894. zval_ptr_dtor(&subpats);
  895. }
  896. n = regs->end[0];
  897. if ((pos - (OnigUChar *)string) < n) {
  898. pos = (OnigUChar *)string + n;
  899. } else {
  900. if (pos < string_lim) {
  901. smart_str_appendl(&out_buf, pos, 1);
  902. }
  903. pos++;
  904. }
  905. } else { /* nomatch */
  906. /* stick that last bit of string on our output */
  907. if (string_lim - pos > 0) {
  908. smart_str_appendl(&out_buf, pos, string_lim - pos);
  909. }
  910. }
  911. onig_region_free(regs, 0);
  912. }
  913. if (description) {
  914. efree(description);
  915. }
  916. if (regs != NULL) {
  917. onig_region_free(regs, 1);
  918. }
  919. smart_str_free(&eval_buf);
  920. if (err <= -2) {
  921. smart_str_free(&out_buf);
  922. RETVAL_FALSE;
  923. } else {
  924. smart_str_appendc(&out_buf, '\0');
  925. RETVAL_STRINGL((char *)out_buf.c, out_buf.len - 1, 0);
  926. }
  927. }
  928. /* }}} */
  929. /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
  930. Replace regular expression for multibyte string */
  931. PHP_FUNCTION(mb_ereg_replace)
  932. {
  933. _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
  934. }
  935. /* }}} */
  936. /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
  937. Case insensitive replace regular expression for multibyte string */
  938. PHP_FUNCTION(mb_eregi_replace)
  939. {
  940. _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
  941. }
  942. /* }}} */
  943. /* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option])
  944. regular expression for multibyte string using replacement callback */
  945. PHP_FUNCTION(mb_ereg_replace_callback)
  946. {
  947. _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
  948. }
  949. /* }}} */
  950. /* {{{ proto array mb_split(string pattern, string string [, int limit])
  951. split multibyte string into array by regular expression */
  952. PHP_FUNCTION(mb_split)
  953. {
  954. char *arg_pattern;
  955. int arg_pattern_len;
  956. php_mb_regex_t *re;
  957. OnigRegion *regs = NULL;
  958. char *string;
  959. OnigUChar *pos, *chunk_pos;
  960. int string_len;
  961. int n, err;
  962. long count = -1;
  963. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
  964. RETURN_FALSE;
  965. }
  966. if (count > 0) {
  967. count--;
  968. }
  969. /* create regex pattern buffer */
  970. if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
  971. RETURN_FALSE;
  972. }
  973. array_init(return_value);
  974. chunk_pos = pos = (OnigUChar *)string;
  975. err = 0;
  976. regs = onig_region_new();
  977. /* churn through str, generating array entries as we go */
  978. while (count != 0 && (pos - (OnigUChar *)string) < string_len) {
  979. int beg, end;
  980. err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
  981. if (err < 0) {
  982. break;
  983. }
  984. beg = regs->beg[0], end = regs->end[0];
  985. /* add it to the array */
  986. if ((pos - (OnigUChar *)string) < end) {
  987. if (beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) {
  988. add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos), 1);
  989. --count;
  990. } else {
  991. err = -2;
  992. break;
  993. }
  994. /* point at our new starting point */
  995. chunk_pos = pos = (OnigUChar *)string + end;
  996. } else {
  997. pos++;
  998. }
  999. onig_region_free(regs, 0);
  1000. }
  1001. onig_region_free(regs, 1);
  1002. /* see if we encountered an error */
  1003. if (err <= -2) {
  1004. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  1005. onig_error_code_to_str(err_str, err);
  1006. php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
  1007. zval_dtor(return_value);
  1008. RETURN_FALSE;
  1009. }
  1010. /* otherwise we just have one last element to add to the array */
  1011. n = ((OnigUChar *)(string + string_len) - chunk_pos);
  1012. if (n > 0) {
  1013. add_next_index_stringl(return_value, (char *)chunk_pos, n, 1);
  1014. } else {
  1015. add_next_index_stringl(return_value, "", 0, 1);
  1016. }
  1017. }
  1018. /* }}} */
  1019. /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
  1020. Regular expression match for multibyte string */
  1021. PHP_FUNCTION(mb_ereg_match)
  1022. {
  1023. char *arg_pattern;
  1024. int arg_pattern_len;
  1025. char *string;
  1026. int string_len;
  1027. php_mb_regex_t *re;
  1028. OnigSyntaxType *syntax;
  1029. OnigOptionType option = 0;
  1030. int err;
  1031. {
  1032. char *option_str = NULL;
  1033. int option_str_len = 0;
  1034. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s",
  1035. &arg_pattern, &arg_pattern_len, &string, &string_len,
  1036. &option_str, &option_str_len)==FAILURE) {
  1037. RETURN_FALSE;
  1038. }
  1039. if (option_str != NULL) {
  1040. _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
  1041. } else {
  1042. option |= MBREX(regex_default_options);
  1043. syntax = MBREX(regex_default_syntax);
  1044. }
  1045. }
  1046. if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
  1047. RETURN_FALSE;
  1048. }
  1049. /* match */
  1050. err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
  1051. if (err >= 0) {
  1052. RETVAL_TRUE;
  1053. } else {
  1054. RETVAL_FALSE;
  1055. }
  1056. }
  1057. /* }}} */
  1058. /* regex search */
  1059. /* {{{ _php_mb_regex_ereg_search_exec */
  1060. static void
  1061. _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
  1062. {
  1063. size_t argc = ZEND_NUM_ARGS();
  1064. char *arg_pattern, *arg_options;
  1065. int arg_pattern_len, arg_options_len;
  1066. int n, i, err, pos, len, beg, end;
  1067. OnigOptionType option;
  1068. OnigUChar *str;
  1069. OnigSyntaxType *syntax;
  1070. if (zend_parse_parameters(argc TSRMLS_CC, "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
  1071. return;
  1072. }
  1073. option = MBREX(regex_default_options);
  1074. if (argc == 2) {
  1075. option = 0;
  1076. _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
  1077. }
  1078. if (argc > 0) {
  1079. /* create regex pattern buffer */
  1080. if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
  1081. RETURN_FALSE;
  1082. }
  1083. }
  1084. pos = MBREX(search_pos);
  1085. str = NULL;
  1086. len = 0;
  1087. if (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING){
  1088. str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
  1089. len = Z_STRLEN_P(MBREX(search_str));
  1090. }
  1091. if (MBREX(search_re) == NULL) {
  1092. php_error_docref(NULL TSRMLS_CC, E_WARNING, "No regex given");
  1093. RETURN_FALSE;
  1094. }
  1095. if (str == NULL) {
  1096. php_error_docref(NULL TSRMLS_CC, E_WARNING, "No string given");
  1097. RETURN_FALSE;
  1098. }
  1099. if (MBREX(search_regs)) {
  1100. onig_region_free(MBREX(search_regs), 1);
  1101. }
  1102. MBREX(search_regs) = onig_region_new();
  1103. err = onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0);
  1104. if (err == ONIG_MISMATCH) {
  1105. MBREX(search_pos) = len;
  1106. RETVAL_FALSE;
  1107. } else if (err <= -2) {
  1108. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  1109. onig_error_code_to_str(err_str, err);
  1110. php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
  1111. RETVAL_FALSE;
  1112. } else {
  1113. switch (mode) {
  1114. case 1:
  1115. array_init(return_value);
  1116. beg = MBREX(search_regs)->beg[0];
  1117. end = MBREX(search_regs)->end[0];
  1118. add_next_index_long(return_value, beg);
  1119. add_next_index_long(return_value, end - beg);
  1120. break;
  1121. case 2:
  1122. array_init(return_value);
  1123. n = MBREX(search_regs)->num_regs;
  1124. for (i = 0; i < n; i++) {
  1125. beg = MBREX(search_regs)->beg[i];
  1126. end = MBREX(search_regs)->end[i];
  1127. if (beg >= 0 && beg <= end && end <= len) {
  1128. add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
  1129. } else {
  1130. add_index_bool(return_value, i, 0);
  1131. }
  1132. }
  1133. break;
  1134. default:
  1135. RETVAL_TRUE;
  1136. break;
  1137. }
  1138. end = MBREX(search_regs)->end[0];
  1139. if (pos <= end) {
  1140. MBREX(search_pos) = end;
  1141. } else {
  1142. MBREX(search_pos) = pos + 1;
  1143. }
  1144. }
  1145. if (err < 0) {
  1146. onig_region_free(MBREX(search_regs), 1);
  1147. MBREX(search_regs) = (OnigRegion *)NULL;
  1148. }
  1149. }
  1150. /* }}} */
  1151. /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
  1152. Regular expression search for multibyte string */
  1153. PHP_FUNCTION(mb_ereg_search)
  1154. {
  1155. _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  1156. }
  1157. /* }}} */
  1158. /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
  1159. Regular expression search for multibyte string */
  1160. PHP_FUNCTION(mb_ereg_search_pos)
  1161. {
  1162. _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  1163. }
  1164. /* }}} */
  1165. /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
  1166. Regular expression search for multibyte string */
  1167. PHP_FUNCTION(mb_ereg_search_regs)
  1168. {
  1169. _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
  1170. }
  1171. /* }}} */
  1172. /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
  1173. Initialize string and regular expression for search. */
  1174. PHP_FUNCTION(mb_ereg_search_init)
  1175. {
  1176. size_t argc = ZEND_NUM_ARGS();
  1177. zval *arg_str;
  1178. char *arg_pattern = NULL, *arg_options = NULL;
  1179. int arg_pattern_len = 0, arg_options_len = 0;
  1180. OnigSyntaxType *syntax = NULL;
  1181. OnigOptionType option;
  1182. if (zend_parse_parameters(argc TSRMLS_CC, "z|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
  1183. return;
  1184. }
  1185. if (argc > 1 && arg_pattern_len == 0) {
  1186. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty pattern");
  1187. RETURN_FALSE;
  1188. }
  1189. option = MBREX(regex_default_options);
  1190. syntax = MBREX(regex_default_syntax);
  1191. if (argc == 3) {
  1192. option = 0;
  1193. _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
  1194. }
  1195. if (argc > 1) {
  1196. /* create regex pattern buffer */
  1197. if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
  1198. RETURN_FALSE;
  1199. }
  1200. }
  1201. if (MBREX(search_str) != NULL) {
  1202. zval_ptr_dtor(&MBREX(search_str));
  1203. MBREX(search_str) = (zval *)NULL;
  1204. }
  1205. MBREX(search_str) = arg_str;
  1206. Z_ADDREF_P(MBREX(search_str));
  1207. SEPARATE_ZVAL_IF_NOT_REF(&MBREX(search_str));
  1208. MBREX(search_pos) = 0;
  1209. if (MBREX(search_regs) != NULL) {
  1210. onig_region_free(MBREX(search_regs), 1);
  1211. MBREX(search_regs) = (OnigRegion *) NULL;
  1212. }
  1213. RETURN_TRUE;
  1214. }
  1215. /* }}} */
  1216. /* {{{ proto array mb_ereg_search_getregs(void)
  1217. Get matched substring of the last time */
  1218. PHP_FUNCTION(mb_ereg_search_getregs)
  1219. {
  1220. int n, i, len, beg, end;
  1221. OnigUChar *str;
  1222. if (MBREX(search_regs) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && Z_STRVAL_P(MBREX(search_str)) != NULL) {
  1223. array_init(return_value);
  1224. str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
  1225. len = Z_STRLEN_P(MBREX(search_str));
  1226. n = MBREX(search_regs)->num_regs;
  1227. for (i = 0; i < n; i++) {
  1228. beg = MBREX(search_regs)->beg[i];
  1229. end = MBREX(search_regs)->end[i];
  1230. if (beg >= 0 && beg <= end && end <= len) {
  1231. add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
  1232. } else {
  1233. add_index_bool(return_value, i, 0);
  1234. }
  1235. }
  1236. } else {
  1237. RETVAL_FALSE;
  1238. }
  1239. }
  1240. /* }}} */
  1241. /* {{{ proto int mb_ereg_search_getpos(void)
  1242. Get search start position */
  1243. PHP_FUNCTION(mb_ereg_search_getpos)
  1244. {
  1245. RETVAL_LONG(MBREX(search_pos));
  1246. }
  1247. /* }}} */
  1248. /* {{{ proto bool mb_ereg_search_setpos(int position)
  1249. Set search start position */
  1250. PHP_FUNCTION(mb_ereg_search_setpos)
  1251. {
  1252. long position;
  1253. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &position) == FAILURE) {
  1254. return;
  1255. }
  1256. if (position < 0 || (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && position > Z_STRLEN_P(MBREX(search_str)))) {
  1257. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range");
  1258. MBREX(search_pos) = 0;
  1259. RETURN_FALSE;
  1260. }
  1261. MBREX(search_pos) = position;
  1262. RETURN_TRUE;
  1263. }
  1264. /* }}} */
  1265. /* {{{ php_mb_regex_set_options */
  1266. static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC)
  1267. {
  1268. if (prev_options != NULL) {
  1269. *prev_options = MBREX(regex_default_options);
  1270. }
  1271. if (prev_syntax != NULL) {
  1272. *prev_syntax = MBREX(regex_default_syntax);
  1273. }
  1274. MBREX(regex_default_options) = options;
  1275. MBREX(regex_default_syntax) = syntax;
  1276. }
  1277. /* }}} */
  1278. /* {{{ proto string mb_regex_set_options([string options])
  1279. Set or get the default options for mbregex functions */
  1280. PHP_FUNCTION(mb_regex_set_options)
  1281. {
  1282. OnigOptionType opt;
  1283. OnigSyntaxType *syntax;
  1284. char *string = NULL;
  1285. int string_len;
  1286. char buf[16];
  1287. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s",
  1288. &string, &string_len) == FAILURE) {
  1289. RETURN_FALSE;
  1290. }
  1291. if (string != NULL) {
  1292. opt = 0;
  1293. syntax = NULL;
  1294. _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
  1295. _php_mb_regex_set_options(opt, syntax, NULL, NULL TSRMLS_CC);
  1296. } else {
  1297. opt = MBREX(regex_default_options);
  1298. syntax = MBREX(regex_default_syntax);
  1299. }
  1300. _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
  1301. RETVAL_STRING(buf, 1);
  1302. }
  1303. /* }}} */
  1304. #endif /* HAVE_MBREGEX */
  1305. /*
  1306. * Local variables:
  1307. * tab-width: 4
  1308. * c-basic-offset: 4
  1309. * End:
  1310. * vim600: fdm=marker
  1311. * vim: noet sw=4 ts=4
  1312. */