1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639 |
- /*
- +----------------------------------------------------------------------+
- | Copyright (c) The PHP Group |
- +----------------------------------------------------------------------+
- | This source file is subject to version 3.01 of the PHP license, |
- | that is bundled with this package in the file LICENSE, and is |
- | available through the world-wide-web at the following url: |
- | https://www.php.net/license/3_01.txt |
- | If you did not receive a copy of the PHP license and are unable to |
- | obtain it through the world-wide-web, please send a note to |
- | license@php.net so we can mail you a copy immediately. |
- +----------------------------------------------------------------------+
- | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
- +----------------------------------------------------------------------+
- */
- #include "libmbfl/config.h"
- #include "php.h"
- #include "php_ini.h"
- #ifdef HAVE_MBREGEX
- #include "zend_smart_str.h"
- #include "ext/standard/info.h"
- #include "php_mbregex.h"
- #include "mbstring.h"
- #include "libmbfl/filters/mbfilter_utf8.h"
- #include "php_onig_compat.h" /* must come prior to the oniguruma header */
- #include <oniguruma.h>
- #undef UChar
- #if !defined(ONIGURUMA_VERSION_INT) || ONIGURUMA_VERSION_INT < 60800
- typedef void OnigMatchParam;
- #define onig_new_match_param() (NULL)
- #define onig_initialize_match_param(x) (void)(x)
- #define onig_set_match_stack_limit_size_of_match_param(x, y)
- #define onig_set_retry_limit_in_match_of_match_param(x, y)
- #define onig_free_match_param(x)
- #define onig_search_with_param(reg, str, end, start, range, region, option, mp) \
- onig_search(reg, str, end, start, range, region, option)
- #define onig_match_with_param(re, str, end, at, region, option, mp) \
- onig_match(re, str, end, at, region, option)
- #endif
- ZEND_EXTERN_MODULE_GLOBALS(mbstring)
- struct _zend_mb_regex_globals {
- OnigEncoding default_mbctype;
- OnigEncoding current_mbctype;
- const mbfl_encoding *current_mbctype_mbfl_encoding;
- HashTable ht_rc;
- zval search_str;
- zval *search_str_val;
- size_t search_pos;
- php_mb_regex_t *search_re;
- OnigRegion *search_regs;
- OnigOptionType regex_default_options;
- OnigSyntaxType *regex_default_syntax;
- };
- #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
- /* {{{ static void php_mb_regex_free_cache() */
- static void php_mb_regex_free_cache(zval *el) {
- onig_free((php_mb_regex_t *)Z_PTR_P(el));
- }
- /* }}} */
- /* {{{ _php_mb_regex_globals_ctor */
- static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals)
- {
- pglobals->default_mbctype = ONIG_ENCODING_UTF8;
- pglobals->current_mbctype = ONIG_ENCODING_UTF8;
- pglobals->current_mbctype_mbfl_encoding = &mbfl_encoding_utf8;
- ZVAL_UNDEF(&pglobals->search_str);
- pglobals->search_re = (php_mb_regex_t*)NULL;
- pglobals->search_pos = 0;
- pglobals->search_regs = (OnigRegion*)NULL;
- pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
- pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
- return SUCCESS;
- }
- /* }}} */
- /* {{{ php_mb_regex_globals_alloc */
- zend_mb_regex_globals *php_mb_regex_globals_alloc(void)
- {
- zend_mb_regex_globals *pglobals = pemalloc(
- sizeof(zend_mb_regex_globals), 1);
- if (SUCCESS != _php_mb_regex_globals_ctor(pglobals)) {
- pefree(pglobals, 1);
- return NULL;
- }
- return pglobals;
- }
- /* }}} */
- /* {{{ php_mb_regex_globals_free */
- void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals)
- {
- if (!pglobals) {
- return;
- }
- pefree(pglobals, 1);
- }
- /* }}} */
- /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
- PHP_MINIT_FUNCTION(mb_regex)
- {
- char version[256];
- onig_init();
- snprintf(version, sizeof(version), "%d.%d.%d",
- ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY);
- REGISTER_STRING_CONSTANT("MB_ONIGURUMA_VERSION", version, CONST_CS | CONST_PERSISTENT);
- return SUCCESS;
- }
- /* }}} */
- /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
- PHP_MSHUTDOWN_FUNCTION(mb_regex)
- {
- onig_end();
- return SUCCESS;
- }
- /* }}} */
- /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
- PHP_RINIT_FUNCTION(mb_regex)
- {
- if (!MBSTRG(mb_regex_globals)) return FAILURE;
- zend_hash_init(&MBREX(ht_rc), 0, NULL, php_mb_regex_free_cache, 0);
- return SUCCESS;
- }
- /* }}} */
- /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
- PHP_RSHUTDOWN_FUNCTION(mb_regex)
- {
- MBREX(current_mbctype) = MBREX(default_mbctype);
- MBREX(current_mbctype_mbfl_encoding) = mbfl_name2encoding(php_mb_regex_get_default_mbctype());
- if (!Z_ISUNDEF(MBREX(search_str))) {
- zval_ptr_dtor(&MBREX(search_str));
- ZVAL_UNDEF(&MBREX(search_str));
- }
- MBREX(search_pos) = 0;
- MBREX(search_re) = NULL;
- if (MBREX(search_regs) != NULL) {
- onig_region_free(MBREX(search_regs), 1);
- MBREX(search_regs) = (OnigRegion *)NULL;
- }
- zend_hash_destroy(&MBREX(ht_rc));
- return SUCCESS;
- }
- /* }}} */
- /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
- PHP_MINFO_FUNCTION(mb_regex)
- {
- char buf[32];
- php_info_print_table_start();
- php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
- snprintf(buf, sizeof(buf), "%d.%d.%d",
- ONIGURUMA_VERSION_MAJOR,
- ONIGURUMA_VERSION_MINOR,
- ONIGURUMA_VERSION_TEENY);
- php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
- php_info_print_table_end();
- }
- /* }}} */
- /*
- * encoding name resolver
- */
- /* {{{ encoding name map */
- typedef struct _php_mb_regex_enc_name_map_t {
- const char *names;
- OnigEncoding code;
- } php_mb_regex_enc_name_map_t;
- static const php_mb_regex_enc_name_map_t enc_name_map[] = {
- #ifdef ONIG_ENCODING_EUC_JP
- {
- "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
- ONIG_ENCODING_EUC_JP
- },
- #endif
- #ifdef ONIG_ENCODING_UTF8
- {
- "UTF-8\0UTF8\0",
- ONIG_ENCODING_UTF8
- },
- #endif
- #ifdef ONIG_ENCODING_UTF16_BE
- {
- "UTF-16\0UTF-16BE\0",
- ONIG_ENCODING_UTF16_BE
- },
- #endif
- #ifdef ONIG_ENCODING_UTF16_LE
- {
- "UTF-16LE\0",
- ONIG_ENCODING_UTF16_LE
- },
- #endif
- #ifdef ONIG_ENCODING_UTF32_BE
- {
- "UCS-4\0UTF-32\0UTF-32BE\0",
- ONIG_ENCODING_UTF32_BE
- },
- #endif
- #ifdef ONIG_ENCODING_UTF32_LE
- {
- "UCS-4LE\0UTF-32LE\0",
- ONIG_ENCODING_UTF32_LE
- },
- #endif
- #ifdef ONIG_ENCODING_SJIS
- {
- "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
- ONIG_ENCODING_SJIS
- },
- #endif
- #ifdef ONIG_ENCODING_BIG5
- {
- "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
- ONIG_ENCODING_BIG5
- },
- #endif
- #ifdef ONIG_ENCODING_EUC_CN
- {
- "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
- ONIG_ENCODING_EUC_CN
- },
- #endif
- #ifdef ONIG_ENCODING_EUC_TW
- {
- "EUC-TW\0EUCTW\0EUC_TW\0",
- ONIG_ENCODING_EUC_TW
- },
- #endif
- #ifdef ONIG_ENCODING_EUC_KR
- {
- "EUC-KR\0EUCKR\0EUC_KR\0",
- ONIG_ENCODING_EUC_KR
- },
- #endif
- #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
- {
- "KOI8\0KOI-8\0",
- ONIG_ENCODING_KOI8
- },
- #endif
- #ifdef ONIG_ENCODING_KOI8_R
- {
- "KOI8R\0KOI8-R\0KOI-8R\0",
- ONIG_ENCODING_KOI8_R
- },
- #endif
- #ifdef ONIG_ENCODING_ISO_8859_1
- {
- "ISO-8859-1\0ISO8859-1\0",
- ONIG_ENCODING_ISO_8859_1
- },
- #endif
- #ifdef ONIG_ENCODING_ISO_8859_2
- {
- "ISO-8859-2\0ISO8859-2\0",
- ONIG_ENCODING_ISO_8859_2
- },
- #endif
- #ifdef ONIG_ENCODING_ISO_8859_3
- {
- "ISO-8859-3\0ISO8859-3\0",
- ONIG_ENCODING_ISO_8859_3
- },
- #endif
- #ifdef ONIG_ENCODING_ISO_8859_4
- {
- "ISO-8859-4\0ISO8859-4\0",
- ONIG_ENCODING_ISO_8859_4
- },
- #endif
- #ifdef ONIG_ENCODING_ISO_8859_5
- {
- "ISO-8859-5\0ISO8859-5\0",
- ONIG_ENCODING_ISO_8859_5
- },
- #endif
- #ifdef ONIG_ENCODING_ISO_8859_6
- {
- "ISO-8859-6\0ISO8859-6\0",
- ONIG_ENCODING_ISO_8859_6
- },
- #endif
- #ifdef ONIG_ENCODING_ISO_8859_7
- {
- "ISO-8859-7\0ISO8859-7\0",
- ONIG_ENCODING_ISO_8859_7
- },
- #endif
- #ifdef ONIG_ENCODING_ISO_8859_8
- {
- "ISO-8859-8\0ISO8859-8\0",
- ONIG_ENCODING_ISO_8859_8
- },
- #endif
- #ifdef ONIG_ENCODING_ISO_8859_9
- {
- "ISO-8859-9\0ISO8859-9\0",
- ONIG_ENCODING_ISO_8859_9
- },
- #endif
- #ifdef ONIG_ENCODING_ISO_8859_10
- {
- "ISO-8859-10\0ISO8859-10\0",
- ONIG_ENCODING_ISO_8859_10
- },
- #endif
- #ifdef ONIG_ENCODING_ISO_8859_11
- {
- "ISO-8859-11\0ISO8859-11\0",
- ONIG_ENCODING_ISO_8859_11
- },
- #endif
- #ifdef ONIG_ENCODING_ISO_8859_13
- {
- "ISO-8859-13\0ISO8859-13\0",
- ONIG_ENCODING_ISO_8859_13
- },
- #endif
- #ifdef ONIG_ENCODING_ISO_8859_14
- {
- "ISO-8859-14\0ISO8859-14\0",
- ONIG_ENCODING_ISO_8859_14
- },
- #endif
- #ifdef ONIG_ENCODING_ISO_8859_15
- {
- "ISO-8859-15\0ISO8859-15\0",
- ONIG_ENCODING_ISO_8859_15
- },
- #endif
- #ifdef ONIG_ENCODING_ISO_8859_16
- {
- "ISO-8859-16\0ISO8859-16\0",
- ONIG_ENCODING_ISO_8859_16
- },
- #endif
- #ifdef ONIG_ENCODING_ASCII
- {
- "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
- ONIG_ENCODING_ASCII
- },
- #endif
- { NULL, ONIG_ENCODING_UNDEF }
- };
- /* }}} */
- /* {{{ php_mb_regex_name2mbctype */
- static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
- {
- const char *p;
- const php_mb_regex_enc_name_map_t *mapping;
- if (pname == NULL || !*pname) {
- return ONIG_ENCODING_UNDEF;
- }
- for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
- for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
- if (strcasecmp(p, pname) == 0) {
- return mapping->code;
- }
- }
- }
- return ONIG_ENCODING_UNDEF;
- }
- /* }}} */
- /* {{{ php_mb_regex_mbctype2name */
- static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
- {
- const php_mb_regex_enc_name_map_t *mapping;
- for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
- if (mapping->code == mbctype) {
- return mapping->names;
- }
- }
- return NULL;
- }
- /* }}} */
- /* {{{ php_mb_regex_set_mbctype */
- int php_mb_regex_set_mbctype(const char *encname)
- {
- OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
- if (mbctype == ONIG_ENCODING_UNDEF) {
- return FAILURE;
- }
- MBREX(current_mbctype) = mbctype;
- MBREX(current_mbctype_mbfl_encoding) = mbfl_name2encoding(encname);
- return SUCCESS;
- }
- /* }}} */
- /* {{{ php_mb_regex_set_default_mbctype */
- int php_mb_regex_set_default_mbctype(const char *encname)
- {
- OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
- if (mbctype == ONIG_ENCODING_UNDEF) {
- return FAILURE;
- }
- MBREX(default_mbctype) = mbctype;
- return SUCCESS;
- }
- /* }}} */
- /* {{{ php_mb_regex_get_mbctype */
- const char *php_mb_regex_get_mbctype(void)
- {
- return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
- }
- /* }}} */
- /* {{{ php_mb_regex_get_mbctype_encoding */
- const mbfl_encoding *php_mb_regex_get_mbctype_encoding(void)
- {
- return MBREX(current_mbctype_mbfl_encoding);
- }
- /* }}} */
- /* {{{ php_mb_regex_get_default_mbctype */
- const char *php_mb_regex_get_default_mbctype(void)
- {
- return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
- }
- /* }}} */
- /*
- * regex cache
- */
- /* {{{ php_mbregex_compile_pattern */
- static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, size_t patlen, OnigOptionType options, OnigSyntaxType *syntax)
- {
- int err_code = 0;
- php_mb_regex_t *retval = NULL, *rc = NULL;
- OnigErrorInfo err_info;
- OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
- OnigEncoding enc = MBREX(current_mbctype);
- if (!php_mb_check_encoding(pattern, patlen, php_mb_regex_get_mbctype_encoding())) {
- php_error_docref(NULL, E_WARNING,
- "Pattern is not valid under %s encoding", _php_mb_regex_mbctype2name(enc));
- return NULL;
- }
- rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen);
- if (!rc || onig_get_options(rc) != options || onig_get_encoding(rc) != enc || onig_get_syntax(rc) != syntax) {
- if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
- onig_error_code_to_str(err_str, err_code, &err_info);
- php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str);
- return NULL;
- }
- if (rc == MBREX(search_re)) {
- /* reuse the new rc? see bug #72399 */
- MBREX(search_re) = NULL;
- }
- zend_hash_str_update_ptr(&MBREX(ht_rc), (char *)pattern, patlen, retval);
- } else {
- retval = rc;
- }
- return retval;
- }
- /* }}} */
- /* {{{ _php_mb_regex_get_option_string */
- static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
- {
- size_t len_left = len;
- size_t len_req = 0;
- char *p = str;
- char c;
- if ((option & ONIG_OPTION_IGNORECASE) != 0) {
- if (len_left > 0) {
- --len_left;
- *(p++) = 'i';
- }
- ++len_req;
- }
- if ((option & ONIG_OPTION_EXTEND) != 0) {
- if (len_left > 0) {
- --len_left;
- *(p++) = 'x';
- }
- ++len_req;
- }
- if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
- (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
- if (len_left > 0) {
- --len_left;
- *(p++) = 'p';
- }
- ++len_req;
- } else {
- if ((option & ONIG_OPTION_MULTILINE) != 0) {
- if (len_left > 0) {
- --len_left;
- *(p++) = 'm';
- }
- ++len_req;
- }
- if ((option & ONIG_OPTION_SINGLELINE) != 0) {
- if (len_left > 0) {
- --len_left;
- *(p++) = 's';
- }
- ++len_req;
- }
- }
- if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
- if (len_left > 0) {
- --len_left;
- *(p++) = 'l';
- }
- ++len_req;
- }
- if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
- if (len_left > 0) {
- --len_left;
- *(p++) = 'n';
- }
- ++len_req;
- }
- c = 0;
- if (syntax == ONIG_SYNTAX_JAVA) {
- c = 'j';
- } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
- c = 'u';
- } else if (syntax == ONIG_SYNTAX_GREP) {
- c = 'g';
- } else if (syntax == ONIG_SYNTAX_EMACS) {
- c = 'c';
- } else if (syntax == ONIG_SYNTAX_RUBY) {
- c = 'r';
- } else if (syntax == ONIG_SYNTAX_PERL) {
- c = 'z';
- } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
- c = 'b';
- } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
- c = 'd';
- }
- if (c != 0) {
- if (len_left > 0) {
- --len_left;
- *(p++) = c;
- }
- ++len_req;
- }
- if (len_left > 0) {
- --len_left;
- *(p++) = '\0';
- }
- ++len_req;
- if (len < len_req) {
- return len_req;
- }
- return 0;
- }
- /* }}} */
- /* {{{ _php_mb_regex_init_options */
- static bool _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option,
- OnigSyntaxType **syntax)
- {
- size_t n;
- char c;
- OnigOptionType optm = 0;
- *syntax = ONIG_SYNTAX_RUBY;
- if (parg != NULL) {
- n = 0;
- while(n < narg) {
- c = parg[n++];
- switch (c) {
- case 'i':
- optm |= ONIG_OPTION_IGNORECASE;
- break;
- case 'x':
- optm |= ONIG_OPTION_EXTEND;
- break;
- case 'm':
- optm |= ONIG_OPTION_MULTILINE;
- break;
- case 's':
- optm |= ONIG_OPTION_SINGLELINE;
- break;
- case 'p':
- optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
- break;
- case 'l':
- optm |= ONIG_OPTION_FIND_LONGEST;
- break;
- case 'n':
- optm |= ONIG_OPTION_FIND_NOT_EMPTY;
- break;
- case 'j':
- *syntax = ONIG_SYNTAX_JAVA;
- break;
- case 'u':
- *syntax = ONIG_SYNTAX_GNU_REGEX;
- break;
- case 'g':
- *syntax = ONIG_SYNTAX_GREP;
- break;
- case 'c':
- *syntax = ONIG_SYNTAX_EMACS;
- break;
- case 'r':
- *syntax = ONIG_SYNTAX_RUBY;
- break;
- case 'z':
- *syntax = ONIG_SYNTAX_PERL;
- break;
- case 'b':
- *syntax = ONIG_SYNTAX_POSIX_BASIC;
- break;
- case 'd':
- *syntax = ONIG_SYNTAX_POSIX_EXTENDED;
- break;
- default:
- zend_value_error("Option \"%c\" is not supported", c);
- return false;
- }
- }
- if (option != NULL) *option|=optm;
- }
- return true;
- }
- /* }}} */
- /*
- * Callbacks for named subpatterns
- */
- /* {{{ struct mb_ereg_groups_iter_arg */
- typedef struct mb_regex_groups_iter_args {
- zval *groups;
- char *search_str;
- size_t search_len;
- OnigRegion *region;
- } mb_regex_groups_iter_args;
- /* }}} */
- /* {{{ mb_ereg_groups_iter */
- static int
- mb_regex_groups_iter(const OnigUChar* name, const OnigUChar* name_end, int ngroup_num, int* group_nums, regex_t* reg, void* parg)
- {
- mb_regex_groups_iter_args *args = (mb_regex_groups_iter_args *) parg;
- int gn, beg, end;
- /*
- * In case of duplicate groups, keep only the last succeeding one
- * to be consistent with preg_match with the PCRE_DUPNAMES option.
- */
- gn = onig_name_to_backref_number(reg, name, name_end, args->region);
- beg = args->region->beg[gn];
- end = args->region->end[gn];
- if (beg >= 0 && beg < end && end <= args->search_len) {
- add_assoc_stringl_ex(args->groups, (char *)name, name_end - name, &args->search_str[beg], end - beg);
- } else {
- add_assoc_bool_ex(args->groups, (char *)name, name_end - name, 0);
- }
- return 0;
- }
- /* }}} */
- /*
- * Helper for _php_mb_regex_ereg_replace_exec
- */
- /* {{{ mb_regex_substitute */
- static inline void mb_regex_substitute(
- smart_str *pbuf,
- const char *subject,
- size_t subject_len,
- char *replace,
- size_t replace_len,
- php_mb_regex_t *regexp,
- OnigRegion *regs,
- const mbfl_encoding *enc
- ) {
- char *p, *sp, *eos;
- int no; /* bakreference group number */
- int clen; /* byte-length of the current character */
- p = replace;
- eos = replace + replace_len;
- while (p < eos) {
- clen = (int) php_mb_mbchar_bytes_ex(p, enc);
- if (clen != 1 || p == eos || p[0] != '\\') {
- /* skip anything that's not an ascii backslash */
- smart_str_appendl(pbuf, p, clen);
- p += clen;
- continue;
- }
- sp = p; /* save position */
- clen = (int) php_mb_mbchar_bytes_ex(++p, enc);
- if (clen != 1 || p == eos) {
- /* skip backslash followed by multibyte char */
- smart_str_appendl(pbuf, sp, p - sp);
- continue;
- }
- no = -1;
- switch (p[0]) {
- case '0':
- no = 0;
- p++;
- break;
- case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- if (!onig_noname_group_capture_is_active(regexp)) {
- /*
- * FIXME:
- * Oniguruma throws a compile error if numbered backrefs are used with named groups in the pattern.
- * For now we just ignore them, but in the future we might want to raise a warning
- * and abort the whole replace operation.
- */
- p++;
- smart_str_appendl(pbuf, sp, p - sp);
- continue;
- }
- no = p[0] - '0';
- p++;
- break;
- case 'k':
- {
- clen = (int) php_mb_mbchar_bytes_ex(++p, enc);
- if (clen != 1 || p == eos || (p[0] != '<' && p[0] != '\'')) {
- /* not a backref delimiter */
- p += clen;
- smart_str_appendl(pbuf, sp, p - sp);
- continue;
- }
- /* try to consume everything until next delimiter */
- char delim = p[0] == '<' ? '>' : '\'';
- char *name, *name_end;
- char maybe_num = 1;
- name_end = name = p + 1;
- while (name_end < eos) {
- clen = (int) php_mb_mbchar_bytes_ex(name_end, enc);
- if (clen != 1) {
- name_end += clen;
- maybe_num = 0;
- continue;
- }
- if (name_end[0] == delim) break;
- if (maybe_num && !isdigit(name_end[0])) maybe_num = 0;
- name_end++;
- }
- p = name_end + 1;
- if (name_end - name < 1 || name_end >= eos) {
- /* the backref was empty or we failed to find the end delimiter */
- smart_str_appendl(pbuf, sp, p - sp);
- continue;
- }
- /* we have either a name or a number */
- if (maybe_num) {
- if (!onig_noname_group_capture_is_active(regexp)) {
- /* see above note on mixing numbered & named backrefs */
- smart_str_appendl(pbuf, sp, p - sp);
- continue;
- }
- if (name_end - name == 1) {
- no = name[0] - '0';
- break;
- }
- if (name[0] == '0') {
- /* 01 is not a valid number */
- break;
- }
- no = (int) strtoul(name, NULL, 10);
- break;
- }
- no = onig_name_to_backref_number(regexp, (OnigUChar *)name, (OnigUChar *)name_end, regs);
- break;
- }
- default:
- /* We're not treating \ as an escape character and will interpret something like
- * \\1 as \ followed by \1, rather than \\ followed by 1. This is because this
- * function has not supported escaping of backslashes historically. */
- smart_str_appendl(pbuf, sp, p - sp);
- continue;
- }
- if (no < 0 || no >= regs->num_regs) {
- /* invalid group number reference, keep the escape sequence in the output */
- smart_str_appendl(pbuf, sp, p - sp);
- continue;
- }
- if (regs->beg[no] >= 0 && regs->beg[no] < regs->end[no] && (size_t)regs->end[no] <= subject_len) {
- smart_str_appendl(pbuf, subject + regs->beg[no], regs->end[no] - regs->beg[no]);
- }
- }
- if (p < eos) {
- smart_str_appendl(pbuf, p, eos - p);
- }
- }
- /* }}} */
- /*
- * php functions
- */
- /* {{{ Returns the current encoding for regex as a string. */
- PHP_FUNCTION(mb_regex_encoding)
- {
- char *encoding = NULL;
- size_t encoding_len;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!", &encoding, &encoding_len) == FAILURE) {
- RETURN_THROWS();
- }
- if (!encoding) {
- const char *retval = php_mb_regex_get_mbctype();
- ZEND_ASSERT(retval != NULL);
- RETURN_STRING(retval);
- } else {
- if (php_mb_regex_set_mbctype(encoding) == FAILURE) {
- zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", encoding);
- RETURN_THROWS();
- }
- /* TODO Make function return previous encoding? */
- RETURN_TRUE;
- }
- }
- /* }}} */
- /* {{{ _php_mb_onig_search */
- static int _php_mb_onig_search(regex_t* reg, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start,
- const OnigUChar* range, OnigRegion* region, OnigOptionType option) {
- OnigMatchParam *mp = onig_new_match_param();
- int err;
- onig_initialize_match_param(mp);
- if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_stack_limit))) {
- onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit));
- }
- if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_retry_limit))) {
- onig_set_retry_limit_in_match_of_match_param(mp, (unsigned int)MBSTRG(regex_retry_limit));
- }
- /* search */
- err = onig_search_with_param(reg, str, end, start, range, region, option, mp);
- onig_free_match_param(mp);
- return err;
- }
- /* }}} */
- /* {{{ _php_mb_regex_ereg_exec */
- static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
- {
- zval *array = NULL;
- char *arg_pattern, *string;
- size_t arg_pattern_len, string_len;
- php_mb_regex_t *re;
- OnigRegion *regs = NULL;
- int i, beg, end;
- OnigOptionType options;
- char *str;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|z", &arg_pattern, &arg_pattern_len, &string, &string_len, &array) == FAILURE) {
- RETURN_THROWS();
- }
- if (arg_pattern_len == 0) {
- zend_argument_value_error(1, "must not be empty");
- RETURN_THROWS();
- }
- if (array != NULL) {
- array = zend_try_array_init(array);
- if (!array) {
- RETURN_THROWS();
- }
- }
- if (!php_mb_check_encoding(
- string,
- string_len,
- php_mb_regex_get_mbctype_encoding()
- )) {
- RETURN_FALSE;
- }
- options = MBREX(regex_default_options);
- if (icase) {
- options |= ONIG_OPTION_IGNORECASE;
- }
- re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(regex_default_syntax));
- if (re == NULL) {
- RETVAL_FALSE;
- goto out;
- }
- regs = onig_region_new();
- /* actually execute the regular expression */
- if (_php_mb_onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
- RETVAL_FALSE;
- goto out;
- }
- str = string;
- if (array != NULL) {
- for (i = 0; i < regs->num_regs; i++) {
- beg = regs->beg[i];
- end = regs->end[i];
- if (beg >= 0 && beg < end && (size_t)end <= string_len) {
- add_index_stringl(array, i, (char *)&str[beg], end - beg);
- } else {
- add_index_bool(array, i, 0);
- }
- }
- if (onig_number_of_names(re) > 0) {
- mb_regex_groups_iter_args args = {array, string, string_len, regs};
- onig_foreach_name(re, mb_regex_groups_iter, &args);
- }
- }
- RETVAL_TRUE;
- out:
- if (regs != NULL) {
- onig_region_free(regs, 1);
- }
- }
- /* }}} */
- /* {{{ Regular expression match for multibyte string */
- PHP_FUNCTION(mb_ereg)
- {
- _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
- }
- /* }}} */
- /* {{{ Case-insensitive regular expression match for multibyte string */
- PHP_FUNCTION(mb_eregi)
- {
- _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
- }
- /* }}} */
- /* {{{ _php_mb_regex_ereg_replace_exec */
- static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
- {
- char *arg_pattern;
- size_t arg_pattern_len;
- char *replace;
- size_t replace_len;
- zend_fcall_info arg_replace_fci;
- zend_fcall_info_cache arg_replace_fci_cache;
- char *string;
- size_t string_len;
- php_mb_regex_t *re;
- OnigSyntaxType *syntax;
- OnigRegion *regs = NULL;
- smart_str out_buf = {0};
- smart_str eval_buf = {0};
- smart_str *pbuf;
- int err, n;
- OnigUChar *pos;
- OnigUChar *string_lim;
- char *description = NULL;
- const mbfl_encoding *enc = php_mb_regex_get_mbctype_encoding();
- ZEND_ASSERT(enc != NULL);
- {
- char *option_str = NULL;
- size_t option_str_len = 0;
- if (!is_callable) {
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|s!",
- &arg_pattern, &arg_pattern_len,
- &replace, &replace_len,
- &string, &string_len,
- &option_str, &option_str_len) == FAILURE) {
- RETURN_THROWS();
- }
- } else {
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "sfs|s!",
- &arg_pattern, &arg_pattern_len,
- &arg_replace_fci, &arg_replace_fci_cache,
- &string, &string_len,
- &option_str, &option_str_len) == FAILURE) {
- RETURN_THROWS();
- }
- }
- if (!php_mb_check_encoding(string, string_len, enc)) {
- RETURN_NULL();
- }
- if (option_str != NULL) {
- /* Initialize option and in case of failure it means there is a value error */
- if (!_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax)) {
- RETURN_THROWS();
- }
- } else {
- options |= MBREX(regex_default_options);
- syntax = MBREX(regex_default_syntax);
- }
- }
- /* create regex pattern buffer */
- re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, syntax);
- if (re == NULL) {
- RETURN_FALSE;
- }
- if (is_callable) {
- pbuf = &eval_buf;
- description = zend_make_compiled_string_description("mbregex replace");
- } else {
- pbuf = &out_buf;
- description = NULL;
- }
- /* do the actual work */
- err = 0;
- pos = (OnigUChar *)string;
- string_lim = (OnigUChar*)(string + string_len);
- regs = onig_region_new();
- while (err >= 0) {
- err = _php_mb_onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
- if (err <= -2) {
- OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
- onig_error_code_to_str(err_str, err);
- php_error_docref(NULL, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
- break;
- }
- if (err >= 0) {
- /* copy the part of the string before the match */
- smart_str_appendl(&out_buf, (char *)pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
- if (!is_callable) {
- mb_regex_substitute(pbuf, string, string_len, replace, replace_len, re, regs, enc);
- }
- if (is_callable) {
- zval args[1];
- zval subpats, retval;
- int i;
- array_init(&subpats);
- for (i = 0; i < regs->num_regs; i++) {
- add_next_index_stringl(&subpats, string + regs->beg[i], regs->end[i] - regs->beg[i]);
- }
- if (onig_number_of_names(re) > 0) {
- mb_regex_groups_iter_args args = {&subpats, string, string_len, regs};
- onig_foreach_name(re, mb_regex_groups_iter, &args);
- }
- ZVAL_COPY_VALUE(&args[0], &subpats);
- /* null terminate buffer */
- smart_str_0(&eval_buf);
- arg_replace_fci.param_count = 1;
- arg_replace_fci.params = args;
- arg_replace_fci.retval = &retval;
- if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache) == SUCCESS &&
- !Z_ISUNDEF(retval)) {
- convert_to_string(&retval);
- smart_str_appendl(&out_buf, Z_STRVAL(retval), Z_STRLEN(retval));
- smart_str_free(&eval_buf);
- zval_ptr_dtor(&retval);
- } else {
- if (!EG(exception)) {
- zend_throw_error(NULL, "Unable to call custom replacement function");
- zval_ptr_dtor(&subpats);
- RETURN_THROWS();
- }
- }
- zval_ptr_dtor(&subpats);
- }
- n = regs->end[0];
- if ((pos - (OnigUChar *)string) < n) {
- pos = (OnigUChar *)string + n;
- } else {
- if (pos < string_lim) {
- smart_str_appendl(&out_buf, (char *)pos, 1);
- }
- pos++;
- }
- } else { /* nomatch */
- /* stick that last bit of string on our output */
- if (string_lim - pos > 0) {
- smart_str_appendl(&out_buf, (char *)pos, string_lim - pos);
- }
- }
- onig_region_free(regs, 0);
- }
- if (description) {
- efree(description);
- }
- if (regs != NULL) {
- onig_region_free(regs, 1);
- }
- smart_str_free(&eval_buf);
- if (err <= -2) {
- smart_str_free(&out_buf);
- RETVAL_FALSE;
- } else if (out_buf.s) {
- smart_str_0(&out_buf);
- RETVAL_STR(out_buf.s);
- } else {
- RETVAL_EMPTY_STRING();
- }
- }
- /* }}} */
- /* {{{ Replace regular expression for multibyte string */
- PHP_FUNCTION(mb_ereg_replace)
- {
- _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
- }
- /* }}} */
- /* {{{ Case insensitive replace regular expression for multibyte string */
- PHP_FUNCTION(mb_eregi_replace)
- {
- _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
- }
- /* }}} */
- /* {{{ regular expression for multibyte string using replacement callback */
- PHP_FUNCTION(mb_ereg_replace_callback)
- {
- _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
- }
- /* }}} */
- /* {{{ split multibyte string into array by regular expression */
- PHP_FUNCTION(mb_split)
- {
- char *arg_pattern;
- size_t arg_pattern_len;
- php_mb_regex_t *re;
- OnigRegion *regs = NULL;
- char *string;
- OnigUChar *pos, *chunk_pos;
- size_t string_len;
- int err;
- zend_long count = -1;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
- RETURN_THROWS();
- }
- if (count > 0) {
- count--;
- }
- if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) {
- RETURN_FALSE;
- }
- /* create regex pattern buffer */
- if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(regex_default_syntax))) == NULL) {
- RETURN_FALSE;
- }
- array_init(return_value);
- chunk_pos = pos = (OnigUChar *)string;
- err = 0;
- regs = onig_region_new();
- /* churn through str, generating array entries as we go */
- while (count != 0 && (size_t)(pos - (OnigUChar *)string) < string_len) {
- size_t beg, end;
- err = _php_mb_onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
- if (err < 0) {
- break;
- }
- beg = regs->beg[0], end = regs->end[0];
- /* add it to the array */
- if ((size_t)(pos - (OnigUChar *)string) < end) {
- if (beg < string_len && beg >= (size_t)(chunk_pos - (OnigUChar *)string)) {
- add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos));
- --count;
- } else {
- err = -2;
- break;
- }
- /* point at our new starting point */
- chunk_pos = pos = (OnigUChar *)string + end;
- } else {
- pos++;
- }
- onig_region_free(regs, 0);
- }
- onig_region_free(regs, 1);
- /* see if we encountered an error */
- // ToDo investigate if this can actually/should happen ...
- if (err <= -2) {
- OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
- onig_error_code_to_str(err_str, err);
- php_error_docref(NULL, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
- zend_array_destroy(Z_ARR_P(return_value));
- RETURN_FALSE;
- }
- /* otherwise we just have one last element to add to the array */
- if ((OnigUChar *)(string + string_len) > chunk_pos) {
- size_t n = ((OnigUChar *)(string + string_len) - chunk_pos);
- add_next_index_stringl(return_value, (char *)chunk_pos, n);
- } else {
- add_next_index_stringl(return_value, "", 0);
- }
- }
- /* }}} */
- /* {{{ Regular expression match for multibyte string */
- PHP_FUNCTION(mb_ereg_match)
- {
- char *arg_pattern;
- size_t arg_pattern_len;
- char *string;
- size_t string_len;
- php_mb_regex_t *re;
- OnigSyntaxType *syntax;
- OnigOptionType option = 0;
- int err;
- OnigMatchParam *mp;
- {
- char *option_str = NULL;
- size_t option_str_len = 0;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s!",
- &arg_pattern, &arg_pattern_len, &string, &string_len,
- &option_str, &option_str_len)==FAILURE) {
- RETURN_THROWS();
- }
- if (option_str != NULL) {
- if(!_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax)) {
- RETURN_THROWS();
- }
- } else {
- option |= MBREX(regex_default_options);
- syntax = MBREX(regex_default_syntax);
- }
- }
- if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) {
- RETURN_FALSE;
- }
- if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) {
- RETURN_FALSE;
- }
- mp = onig_new_match_param();
- onig_initialize_match_param(mp);
- if (MBSTRG(regex_stack_limit) > 0 && MBSTRG(regex_stack_limit) < UINT_MAX) {
- onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit));
- }
- if (MBSTRG(regex_retry_limit) > 0 && MBSTRG(regex_retry_limit) < UINT_MAX) {
- onig_set_retry_limit_in_match_of_match_param(mp, (unsigned int)MBSTRG(regex_retry_limit));
- }
- /* match */
- err = onig_match_with_param(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0, mp);
- onig_free_match_param(mp);
- if (err >= 0) {
- RETVAL_TRUE;
- } else {
- RETVAL_FALSE;
- }
- }
- /* }}} */
- /* regex search */
- /* {{{ _php_mb_regex_ereg_search_exec */
- static void _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
- {
- char *arg_pattern = NULL, *arg_options = NULL;
- size_t arg_pattern_len, arg_options_len;
- int err;
- size_t n, i, pos, len;
- /* Stored as int* in the OnigRegion struct */
- int beg, end;
- OnigOptionType option = 0;
- OnigUChar *str;
- OnigSyntaxType *syntax;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!s!", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
- RETURN_THROWS();
- }
- if (arg_options) {
- _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax);
- } else {
- option |= MBREX(regex_default_options);
- syntax = MBREX(regex_default_syntax);
- }
- if (MBREX(search_regs)) {
- onig_region_free(MBREX(search_regs), 1);
- MBREX(search_regs) = NULL;
- }
- if (arg_pattern) {
- /* create regex pattern buffer */
- if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) {
- RETURN_FALSE;
- }
- }
- pos = MBREX(search_pos);
- str = NULL;
- len = 0;
- if (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING){
- str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
- len = Z_STRLEN(MBREX(search_str));
- }
- if (MBREX(search_re) == NULL) {
- zend_throw_error(NULL, "No pattern was provided");
- RETURN_THROWS();
- }
- if (str == NULL) {
- zend_throw_error(NULL, "No string was provided");
- RETURN_THROWS();
- }
- MBREX(search_regs) = onig_region_new();
- err = _php_mb_onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0);
- if (err == ONIG_MISMATCH) {
- MBREX(search_pos) = len;
- RETVAL_FALSE;
- } else if (err <= -2) {
- OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
- onig_error_code_to_str(err_str, err);
- php_error_docref(NULL, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
- RETVAL_FALSE;
- } else {
- switch (mode) {
- case 1:
- array_init(return_value);
- beg = MBREX(search_regs)->beg[0];
- end = MBREX(search_regs)->end[0];
- add_next_index_long(return_value, beg);
- add_next_index_long(return_value, end - beg);
- break;
- case 2:
- array_init(return_value);
- n = MBREX(search_regs)->num_regs;
- for (i = 0; i < n; i++) {
- beg = MBREX(search_regs)->beg[i];
- end = MBREX(search_regs)->end[i];
- if (beg >= 0 && beg <= end && end <= len) {
- add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
- } else {
- add_index_bool(return_value, i, 0);
- }
- }
- if (onig_number_of_names(MBREX(search_re)) > 0) {
- mb_regex_groups_iter_args args = {
- return_value,
- Z_STRVAL(MBREX(search_str)),
- Z_STRLEN(MBREX(search_str)),
- MBREX(search_regs)
- };
- onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args);
- }
- break;
- default:
- RETVAL_TRUE;
- break;
- }
- end = MBREX(search_regs)->end[0];
- if (pos <= end) {
- MBREX(search_pos) = end;
- } else {
- MBREX(search_pos) = pos + 1;
- }
- }
- if (err < 0) {
- onig_region_free(MBREX(search_regs), 1);
- MBREX(search_regs) = (OnigRegion *)NULL;
- }
- }
- /* }}} */
- /* {{{ Regular expression search for multibyte string */
- PHP_FUNCTION(mb_ereg_search)
- {
- _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
- }
- /* }}} */
- /* {{{ Regular expression search for multibyte string */
- PHP_FUNCTION(mb_ereg_search_pos)
- {
- _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
- }
- /* }}} */
- /* {{{ Regular expression search for multibyte string */
- PHP_FUNCTION(mb_ereg_search_regs)
- {
- _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
- }
- /* }}} */
- /* {{{ Initialize string and regular expression for search. */
- PHP_FUNCTION(mb_ereg_search_init)
- {
- zend_string *arg_str;
- char *arg_pattern = NULL, *arg_options = NULL;
- size_t arg_pattern_len = 0, arg_options_len = 0;
- OnigSyntaxType *syntax = NULL;
- OnigOptionType option;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|s!s!", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
- RETURN_THROWS();
- }
- if (arg_pattern && arg_pattern_len == 0) {
- zend_argument_value_error(2, "must not be empty");
- RETURN_THROWS();
- }
- if (arg_options) {
- option = 0;
- _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax);
- } else {
- option = MBREX(regex_default_options);
- syntax = MBREX(regex_default_syntax);
- }
- if (arg_pattern) {
- /* create regex pattern buffer */
- if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) {
- RETURN_FALSE;
- }
- }
- if (!Z_ISNULL(MBREX(search_str))) {
- zval_ptr_dtor(&MBREX(search_str));
- }
- ZVAL_STR_COPY(&MBREX(search_str), arg_str);
- if (php_mb_check_encoding(ZSTR_VAL(arg_str), ZSTR_LEN(arg_str), php_mb_regex_get_mbctype_encoding())) {
- MBREX(search_pos) = 0;
- RETVAL_TRUE;
- } else {
- MBREX(search_pos) = ZSTR_LEN(arg_str);
- RETVAL_FALSE;
- }
- if (MBREX(search_regs) != NULL) {
- onig_region_free(MBREX(search_regs), 1);
- MBREX(search_regs) = NULL;
- }
- }
- /* }}} */
- /* {{{ Get matched substring of the last time */
- PHP_FUNCTION(mb_ereg_search_getregs)
- {
- size_t n, i, len;
- /* Stored as int* in the OnigRegion struct */
- int beg, end;
- OnigUChar *str;
- if (zend_parse_parameters_none() == FAILURE) {
- RETURN_THROWS();
- }
- if (MBREX(search_regs) != NULL && Z_TYPE(MBREX(search_str)) == IS_STRING) {
- array_init(return_value);
- str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
- len = Z_STRLEN(MBREX(search_str));
- n = MBREX(search_regs)->num_regs;
- for (i = 0; i < n; i++) {
- beg = MBREX(search_regs)->beg[i];
- end = MBREX(search_regs)->end[i];
- if (beg >= 0 && beg <= end && end <= len) {
- add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
- } else {
- add_index_bool(return_value, i, 0);
- }
- }
- if (onig_number_of_names(MBREX(search_re)) > 0) {
- mb_regex_groups_iter_args args = {
- return_value,
- Z_STRVAL(MBREX(search_str)),
- len,
- MBREX(search_regs)
- };
- onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args);
- }
- } else {
- // TODO This seems to be some logical error, promote to Error
- RETVAL_FALSE;
- }
- }
- /* }}} */
- /* {{{ Get search start position */
- PHP_FUNCTION(mb_ereg_search_getpos)
- {
- if (zend_parse_parameters_none() == FAILURE) {
- RETURN_THROWS();
- }
- RETVAL_LONG(MBREX(search_pos));
- }
- /* }}} */
- /* {{{ Set search start position */
- PHP_FUNCTION(mb_ereg_search_setpos)
- {
- zend_long position;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &position) == FAILURE) {
- RETURN_THROWS();
- }
- /* Accept negative position if length of search string can be determined */
- if ((position < 0) && (!Z_ISUNDEF(MBREX(search_str))) && (Z_TYPE(MBREX(search_str)) == IS_STRING)) {
- position += Z_STRLEN(MBREX(search_str));
- }
- if (position < 0 || (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING && (size_t)position > Z_STRLEN(MBREX(search_str)))) {
- zend_argument_value_error(1, "is out of range");
- RETURN_THROWS();
- }
- MBREX(search_pos) = position;
- // TODO Return void
- RETURN_TRUE;
- }
- /* }}} */
- /* {{{ php_mb_regex_set_options */
- static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax)
- {
- if (prev_options != NULL) {
- *prev_options = MBREX(regex_default_options);
- }
- if (prev_syntax != NULL) {
- *prev_syntax = MBREX(regex_default_syntax);
- }
- MBREX(regex_default_options) = options;
- MBREX(regex_default_syntax) = syntax;
- }
- /* }}} */
- /* {{{ Set or get the default options for mbregex functions */
- PHP_FUNCTION(mb_regex_set_options)
- {
- OnigOptionType opt, prev_opt;
- OnigSyntaxType *syntax, *prev_syntax;
- char *string = NULL;
- size_t string_len;
- char buf[16];
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!",
- &string, &string_len) == FAILURE) {
- RETURN_THROWS();
- }
- if (string != NULL) {
- opt = 0;
- syntax = NULL;
- if(!_php_mb_regex_init_options(string, string_len, &opt, &syntax)) {
- RETURN_THROWS();
- }
- _php_mb_regex_set_options(opt, syntax, &prev_opt, &prev_syntax);
- opt = prev_opt;
- syntax = prev_syntax;
- } else {
- opt = MBREX(regex_default_options);
- syntax = MBREX(regex_default_syntax);
- }
- _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
- RETVAL_STRING(buf);
- }
- /* }}} */
- #endif /* HAVE_MBREGEX */
|