12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060 |
- #include "php.h"
- #include "php_ini.h"
- #include "php_globals.h"
- #include "php_pcre.h"
- #include "ext/standard/info.h"
- #include "ext/standard/php_smart_str.h"
- #if HAVE_PCRE || HAVE_BUNDLED_PCRE
- #include "ext/standard/php_string.h"
- #define PREG_PATTERN_ORDER 1
- #define PREG_SET_ORDER 2
- #define PREG_OFFSET_CAPTURE (1<<8)
- #define PREG_SPLIT_NO_EMPTY (1<<0)
- #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
- #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
- #define PREG_REPLACE_EVAL (1<<0)
- #define PREG_GREP_INVERT (1<<0)
- #define PCRE_CACHE_SIZE 4096
- #ifndef PCRE_NOTEMPTY_ATSTART
- # define PCRE_NOTEMPTY_ATSTART PCRE_NOTEMPTY
- #endif
- enum {
- PHP_PCRE_NO_ERROR = 0,
- PHP_PCRE_INTERNAL_ERROR,
- PHP_PCRE_BACKTRACK_LIMIT_ERROR,
- PHP_PCRE_RECURSION_LIMIT_ERROR,
- PHP_PCRE_BAD_UTF8_ERROR,
- PHP_PCRE_BAD_UTF8_OFFSET_ERROR
- };
- ZEND_DECLARE_MODULE_GLOBALS(pcre)
- static void pcre_handle_exec_error(int pcre_code TSRMLS_DC)
- {
- int preg_code = 0;
- switch (pcre_code) {
- case PCRE_ERROR_MATCHLIMIT:
- preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
- break;
- case PCRE_ERROR_RECURSIONLIMIT:
- preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
- break;
- case PCRE_ERROR_BADUTF8:
- preg_code = PHP_PCRE_BAD_UTF8_ERROR;
- break;
- case PCRE_ERROR_BADUTF8_OFFSET:
- preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
- break;
- default:
- preg_code = PHP_PCRE_INTERNAL_ERROR;
- break;
- }
- PCRE_G(error_code) = preg_code;
- }
- static void php_free_pcre_cache(void *data)
- {
- pcre_cache_entry *pce = (pcre_cache_entry *) data;
- if (!pce) return;
- pefree(pce->re, 1);
- if (pce->extra) pefree(pce->extra, 1);
- #if HAVE_SETLOCALE
- if ((void*)pce->tables) pefree((void*)pce->tables, 1);
- pefree(pce->locale, 1);
- #endif
- }
- static PHP_GINIT_FUNCTION(pcre)
- {
- zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
- pcre_globals->backtrack_limit = 0;
- pcre_globals->recursion_limit = 0;
- pcre_globals->error_code = PHP_PCRE_NO_ERROR;
- }
- static PHP_GSHUTDOWN_FUNCTION(pcre)
- {
- zend_hash_destroy(&pcre_globals->pcre_cache);
- }
- PHP_INI_BEGIN()
- STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
- STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
- PHP_INI_END()
- static PHP_MINFO_FUNCTION(pcre)
- {
- php_info_print_table_start();
- php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
- php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
- php_info_print_table_end();
- DISPLAY_INI_ENTRIES();
- }
- static PHP_MINIT_FUNCTION(pcre)
- {
- REGISTER_INI_ENTRIES();
- REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
- REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
- return SUCCESS;
- }
- static PHP_MSHUTDOWN_FUNCTION(pcre)
- {
- UNREGISTER_INI_ENTRIES();
- return SUCCESS;
- }
- static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
- {
- pcre_cache_entry *pce = (pcre_cache_entry *) data;
- int *num_clean = (int *)arg;
- if (*num_clean > 0 && !pce->refcount) {
- (*num_clean)--;
- return ZEND_HASH_APPLY_REMOVE;
- } else {
- return ZEND_HASH_APPLY_KEEP;
- }
- }
- static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
- {
- pcre_extra *extra = pce->extra;
- int name_cnt = 0, name_size, ni = 0;
- int rc;
- char *name_table;
- unsigned short name_idx;
- char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
- rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
- if (rc < 0) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
- efree(subpat_names);
- return NULL;
- }
- if (name_cnt > 0) {
- int rc1, rc2;
- rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
- rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
- rc = rc2 ? rc2 : rc1;
- if (rc < 0) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
- efree(subpat_names);
- return NULL;
- }
- while (ni++ < name_cnt) {
- name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
- subpat_names[name_idx] = name_table + 2;
- if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
- efree(subpat_names);
- return NULL;
- }
- name_table += name_size;
- }
- }
- return subpat_names;
- }
- static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
- {
- int unit_len;
- if (pce->compile_options & PCRE_UTF8) {
- char *end = start;
-
- while ((*++end & 0xC0) == 0x80);
- unit_len = end - start;
- } else {
- unit_len = 1;
- }
- return unit_len;
- }
- PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
- {
- pcre *re = NULL;
- pcre_extra *extra;
- int coptions = 0;
- int soptions = 0;
- const char *error;
- int erroffset;
- char delimiter;
- char start_delimiter;
- char end_delimiter;
- char *p, *pp;
- char *pattern;
- int do_study = 0;
- int poptions = 0;
- int count = 0;
- unsigned const char *tables = NULL;
- #if HAVE_SETLOCALE
- char *locale;
- #endif
- pcre_cache_entry *pce;
- pcre_cache_entry new_entry;
- char *tmp = NULL;
- #if HAVE_SETLOCALE
- # if defined(PHP_WIN32) && defined(ZTS)
- _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
- # endif
- locale = setlocale(LC_CTYPE, NULL);
- #endif
-
- if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
-
- if (pcre_fullinfo(pce->re, NULL, PCRE_INFO_CAPTURECOUNT, &count) == PCRE_ERROR_BADMAGIC) {
- zend_hash_clean(&PCRE_G(pcre_cache));
- } else {
- #if HAVE_SETLOCALE
- if (!strcmp(pce->locale, locale)) {
- #endif
- return pce;
- #if HAVE_SETLOCALE
- }
- #endif
- }
- }
- p = regex;
-
- while (isspace((int)*(unsigned char *)p)) p++;
- if (*p == 0) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING,
- p < regex + regex_len ? "Null byte in regex" : "Empty regular expression");
- return NULL;
- }
-
- delimiter = *p++;
- if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
- php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
- return NULL;
- }
- start_delimiter = delimiter;
- if ((pp = strchr("([{< )]}> )]}>", delimiter)))
- delimiter = pp[5];
- end_delimiter = delimiter;
- pp = p;
- if (start_delimiter == end_delimiter) {
-
- while (*pp != 0) {
- if (*pp == '\\' && pp[1] != 0) pp++;
- else if (*pp == delimiter)
- break;
- pp++;
- }
- } else {
-
- int brackets = 1;
- while (*pp != 0) {
- if (*pp == '\\' && pp[1] != 0) pp++;
- else if (*pp == end_delimiter && --brackets <= 0)
- break;
- else if (*pp == start_delimiter)
- brackets++;
- pp++;
- }
- }
- if (*pp == 0) {
- if (pp < regex + regex_len) {
- php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex");
- } else if (start_delimiter == end_delimiter) {
- php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
- } else {
- php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
- }
- return NULL;
- }
-
- pattern = estrndup(p, pp-p);
-
- pp++;
-
- while (pp < regex + regex_len) {
- switch (*pp++) {
-
- case 'i': coptions |= PCRE_CASELESS; break;
- case 'm': coptions |= PCRE_MULTILINE; break;
- case 's': coptions |= PCRE_DOTALL; break;
- case 'x': coptions |= PCRE_EXTENDED; break;
-
- case 'A': coptions |= PCRE_ANCHORED; break;
- case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break;
- case 'S': do_study = 1; break;
- case 'U': coptions |= PCRE_UNGREEDY; break;
- case 'X': coptions |= PCRE_EXTRA; break;
- case 'u': coptions |= PCRE_UTF8;
-
- #ifdef PCRE_UCP
- coptions |= PCRE_UCP;
- #endif
- break;
- case 'J': coptions |= PCRE_DUPNAMES; break;
-
- case 'e': poptions |= PREG_REPLACE_EVAL; break;
- case ' ':
- case '\n':
- break;
- default:
- if (pp[-1]) {
- php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
- } else {
- php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex");
- }
- efree(pattern);
- return NULL;
- }
- }
- #if HAVE_SETLOCALE
- if (strcmp(locale, "C"))
- tables = pcre_maketables();
- #endif
-
- re = pcre_compile(pattern,
- coptions,
- &error,
- &erroffset,
- tables);
- if (re == NULL) {
- php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
- efree(pattern);
- if (tables) {
- pefree((void*)tables, 1);
- }
- return NULL;
- }
-
- if (do_study) {
- extra = pcre_study(re, soptions, &error);
- if (extra) {
- extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- }
- if (error != NULL) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
- }
- } else {
- extra = NULL;
- }
- efree(pattern);
-
- if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
- int num_clean = PCRE_CACHE_SIZE / 8;
- zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
- }
-
- new_entry.re = re;
- new_entry.extra = extra;
- new_entry.preg_options = poptions;
- new_entry.compile_options = coptions;
- #if HAVE_SETLOCALE
- new_entry.locale = pestrdup(locale, 1);
- new_entry.tables = tables;
- #endif
- new_entry.refcount = 0;
-
- if (IS_INTERNED(regex)) {
- regex = tmp = estrndup(regex, regex_len);
- }
- zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
- sizeof(pcre_cache_entry), (void**)&pce);
- if (tmp) {
- efree(tmp);
- }
- return pce;
- }
- PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
- {
- pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
- if (extra) {
- *extra = pce ? pce->extra : NULL;
- }
- if (preg_options) {
- *preg_options = pce ? pce->preg_options : 0;
- }
- return pce ? pce->re : NULL;
- }
- PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
- {
- pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
- if (extra) {
- *extra = pce ? pce->extra : NULL;
- }
- if (preg_options) {
- *preg_options = pce ? pce->preg_options : 0;
- }
- if (compile_options) {
- *compile_options = pce ? pce->compile_options : 0;
- }
- return pce ? pce->re : NULL;
- }
- static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
- {
- zval *match_pair;
- ALLOC_ZVAL(match_pair);
- array_init(match_pair);
- INIT_PZVAL(match_pair);
-
- add_next_index_stringl(match_pair, str, len, 1);
- add_next_index_long(match_pair, offset);
- if (name) {
- zval_add_ref(&match_pair);
- zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL);
- }
- zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
- }
- static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
- {
-
- char *regex;
- char *subject;
- int regex_len;
- int subject_len;
- pcre_cache_entry *pce;
- zval *subpats = NULL;
- long flags = 0;
- long start_offset = 0;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|zll", ®ex, ®ex_len,
- &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) {
- RETURN_FALSE;
- }
-
- if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
- RETURN_FALSE;
- }
- pce->refcount++;
- php_pcre_match_impl(pce, subject, subject_len, return_value, subpats,
- global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
- pce->refcount--;
- }
- PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
- zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
- {
- zval *result_set,
- **match_sets = NULL;
- pcre_extra *extra = pce->extra;
- pcre_extra extra_data;
- int exoptions = 0;
- int count = 0;
- int *offsets;
- int num_subpats;
- int size_offsets;
- int matched;
- int g_notempty = 0;
- const char **stringlist;
- char **subpat_names;
- int i, rc;
- int subpats_order;
- int offset_capture;
- unsigned char *mark = NULL;
- zval *marks = NULL;
-
- if (subpats != NULL) {
- zval_dtor(subpats);
- array_init(subpats);
- }
- subpats_order = global ? PREG_PATTERN_ORDER : 0;
- if (use_flags) {
- offset_capture = flags & PREG_OFFSET_CAPTURE;
-
- if (flags & 0xff) {
- subpats_order = flags & 0xff;
- }
- if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
- (!global && subpats_order != 0)) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
- return;
- }
- } else {
- offset_capture = 0;
- }
-
- if (start_offset < 0) {
- start_offset = subject_len + start_offset;
- if (start_offset < 0) {
- start_offset = 0;
- }
- }
- if (extra == NULL) {
- extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra = &extra_data;
- }
- extra->match_limit = PCRE_G(backtrack_limit);
- extra->match_limit_recursion = PCRE_G(recursion_limit);
- #ifdef PCRE_EXTRA_MARK
- extra->mark = &mark;
- extra->flags |= PCRE_EXTRA_MARK;
- #endif
-
- rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
- if (rc < 0) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
- RETURN_FALSE;
- }
- num_subpats++;
- size_offsets = num_subpats * 3;
-
- subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
- if (!subpat_names) {
- RETURN_FALSE;
- }
- offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
- memset(offsets, 0, size_offsets*sizeof(int));
-
- if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
- match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
- for (i=0; i<num_subpats; i++) {
- ALLOC_ZVAL(match_sets[i]);
- array_init(match_sets[i]);
- INIT_PZVAL(match_sets[i]);
- }
- }
- matched = 0;
- PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
- do {
-
- count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
- exoptions|g_notempty, offsets, size_offsets);
-
- exoptions |= PCRE_NO_UTF8_CHECK;
-
- if (count == 0) {
- php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
- count = size_offsets/3;
- }
-
- if (count > 0) {
- matched++;
-
- if (subpats != NULL) {
-
- if ((offsets[1] - offsets[0] < 0) || pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
- efree(subpat_names);
- efree(offsets);
- if (match_sets) efree(match_sets);
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
- RETURN_FALSE;
- }
- if (global) {
- if (subpats && subpats_order == PREG_PATTERN_ORDER) {
-
- for (i = 0; i < count; i++) {
- if (offset_capture) {
- add_offset_pair(match_sets[i], (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
- } else {
- add_next_index_stringl(match_sets[i], (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1], 1);
- }
- }
-
- if (mark) {
- if (!marks) {
- MAKE_STD_ZVAL(marks);
- array_init(marks);
- }
- add_index_string(marks, matched - 1, (char *) mark, 1);
- }
-
- if (count < num_subpats) {
- for (; i < num_subpats; i++) {
- add_next_index_string(match_sets[i], "", 1);
- }
- }
- } else {
-
- ALLOC_ZVAL(result_set);
- array_init(result_set);
- INIT_PZVAL(result_set);
-
- for (i = 0; i < count; i++) {
- if (offset_capture) {
- add_offset_pair(result_set, (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
- } else {
- if (subpat_names[i]) {
- add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1], 1);
- }
- add_next_index_stringl(result_set, (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1], 1);
- }
- }
-
- if (mark) {
- add_assoc_string(result_set, "MARK", (char *) mark, 1);
- }
-
- zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
- }
- } else {
-
- for (i = 0; i < count; i++) {
- if (offset_capture) {
- add_offset_pair(subpats, (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1],
- offsets[i<<1], subpat_names[i]);
- } else {
- if (subpat_names[i]) {
- add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1], 1);
- }
- add_next_index_stringl(subpats, (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1], 1);
- }
- }
-
- if (mark) {
- add_assoc_string(subpats, "MARK", (char *) mark, 1);
- }
- }
- pcre_free((void *) stringlist);
- }
- } else if (count == PCRE_ERROR_NOMATCH) {
-
- if (g_notempty != 0 && start_offset < subject_len) {
- int unit_len = calculate_unit_length(pce, subject + start_offset);
- offsets[0] = start_offset;
- offsets[1] = start_offset + unit_len;
- } else
- break;
- } else {
- pcre_handle_exec_error(count TSRMLS_CC);
- break;
- }
-
- g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
-
- start_offset = offsets[1];
- } while (global);
-
- if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
- for (i = 0; i < num_subpats; i++) {
- if (subpat_names[i]) {
- zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i],
- strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL);
- Z_ADDREF_P(match_sets[i]);
- }
- zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
- }
- efree(match_sets);
- if (marks) {
- add_assoc_zval(subpats, "MARK", marks);
- }
- }
- efree(offsets);
- efree(subpat_names);
-
- if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
- RETVAL_LONG(matched);
- } else {
- RETVAL_FALSE;
- }
- }
- static PHP_FUNCTION(preg_match)
- {
- php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
- }
- static PHP_FUNCTION(preg_match_all)
- {
- php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
- }
- static int preg_get_backref(char **str, int *backref)
- {
- register char in_brace = 0;
- register char *walk = *str;
- if (walk[1] == 0)
- return 0;
- if (*walk == '$' && walk[1] == '{') {
- in_brace = 1;
- walk++;
- }
- walk++;
- if (*walk >= '0' && *walk <= '9') {
- *backref = *walk - '0';
- walk++;
- } else
- return 0;
- if (*walk && *walk >= '0' && *walk <= '9') {
- *backref = *backref * 10 + *walk - '0';
- walk++;
- }
- if (in_brace) {
- if (*walk == 0 || *walk != '}')
- return 0;
- else
- walk++;
- }
- *str = walk;
- return 1;
- }
- static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark, char **result TSRMLS_DC)
- {
- zval *retval_ptr;
- zval **args[1];
- zval *subpats;
- int result_len;
- int i;
- MAKE_STD_ZVAL(subpats);
- array_init(subpats);
- for (i = 0; i < count; i++) {
- if (subpat_names[i]) {
- add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1);
- }
- add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
- }
- if (mark) {
- add_assoc_string(subpats, "MARK", (char *) mark, 1);
- }
- args[0] = &subpats;
- if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
- convert_to_string_ex(&retval_ptr);
- *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
- result_len = Z_STRLEN_P(retval_ptr);
- zval_ptr_dtor(&retval_ptr);
- } else {
- if (!EG(exception)) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
- }
- result_len = offsets[1] - offsets[0];
- *result = estrndup(&subject[offsets[0]], result_len);
- }
- zval_ptr_dtor(&subpats);
- return result_len;
- }
- static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
- int *offsets, int count, char **result TSRMLS_DC)
- {
- zval retval;
- char *eval_str_end,
- *match,
- *esc_match,
- *walk,
- *segment,
- walk_last;
- int match_len;
- int esc_match_len;
- int result_len;
- int backref;
- char *compiled_string_description;
- smart_str code = {0};
- eval_str_end = eval_str + eval_str_len;
- walk = segment = eval_str;
- walk_last = 0;
- while (walk < eval_str_end) {
-
- if ('\\' == *walk || '$' == *walk) {
- smart_str_appendl(&code, segment, walk - segment);
- if (walk_last == '\\') {
- code.c[code.len-1] = *walk++;
- segment = walk;
- walk_last = 0;
- continue;
- }
- segment = walk;
- if (preg_get_backref(&walk, &backref)) {
- if (backref < count) {
-
- match = subject + offsets[backref<<1];
- match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
- if (match_len) {
- esc_match = php_addslashes(match, match_len, &esc_match_len, 0 TSRMLS_CC);
- } else {
- esc_match = match;
- esc_match_len = 0;
- }
- } else {
- esc_match = "";
- esc_match_len = 0;
- }
- smart_str_appendl(&code, esc_match, esc_match_len);
- segment = walk;
-
- if (esc_match_len)
- efree(esc_match);
- continue;
- }
- }
- walk++;
- walk_last = walk[-1];
- }
- smart_str_appendl(&code, segment, walk - segment);
- smart_str_0(&code);
- compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
-
- if (zend_eval_stringl(code.c, code.len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
- efree(compiled_string_description);
- php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c);
-
- }
- efree(compiled_string_description);
- convert_to_string(&retval);
-
- *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval));
- result_len = Z_STRLEN(retval);
-
- zval_dtor(&retval);
- smart_str_free(&code);
- return result_len;
- }
- PHPAPI char *php_pcre_replace(char *regex, int regex_len,
- char *subject, int subject_len,
- zval *replace_val, int is_callable_replace,
- int *result_len, int limit, int *replace_count TSRMLS_DC)
- {
- pcre_cache_entry *pce;
- char *result;
-
- if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
- return NULL;
- }
- pce->refcount++;
- result = php_pcre_replace_impl(pce, subject, subject_len, replace_val,
- is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
- pce->refcount--;
- return result;
- }
- PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val,
- int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC)
- {
- pcre_extra *extra = pce->extra;
- pcre_extra extra_data;
- int exoptions = 0;
- int count = 0;
- int *offsets;
- char **subpat_names;
- int num_subpats;
- int size_offsets;
- size_t new_len;
- size_t alloc_len;
- int eval_result_len=0;
- int match_len;
- int backref;
- int eval;
- int start_offset;
- int g_notempty=0;
- int replace_len=0;
- char *result,
- *replace=NULL,
- *new_buf,
- *walkbuf,
- *walk,
- *match,
- *piece,
- *replace_end=NULL,
- *eval_result,
- walk_last;
- int rc;
- unsigned char *mark = NULL;
- if (extra == NULL) {
- extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra = &extra_data;
- }
- extra->match_limit = PCRE_G(backtrack_limit);
- extra->match_limit_recursion = PCRE_G(recursion_limit);
- #ifdef PCRE_EXTRA_MARK
- extra->mark = &mark;
- extra->flags |= PCRE_EXTRA_MARK;
- #endif
- eval = pce->preg_options & PREG_REPLACE_EVAL;
- if (is_callable_replace) {
- if (eval) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
- return NULL;
- }
- } else {
- replace = Z_STRVAL_P(replace_val);
- replace_len = Z_STRLEN_P(replace_val);
- replace_end = replace + replace_len;
- }
- if (eval) {
- php_error_docref(NULL TSRMLS_CC, E_DEPRECATED, "The /e modifier is deprecated, use preg_replace_callback instead");
- }
-
- rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
- if (rc < 0) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
- return NULL;
- }
- num_subpats++;
- size_offsets = num_subpats * 3;
-
- subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
- if (!subpat_names) {
- return NULL;
- }
- offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
- result = safe_emalloc(subject_len, 2*sizeof(char), 1);
- alloc_len = 2 * (size_t)subject_len + 1;
-
- match = NULL;
- *result_len = 0;
- start_offset = 0;
- PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
- while (1) {
-
- count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
- exoptions|g_notempty, offsets, size_offsets);
-
- exoptions |= PCRE_NO_UTF8_CHECK;
-
- if (count == 0) {
- php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
- count = size_offsets/3;
- }
- piece = subject + start_offset;
- if (count > 0 && (offsets[1] - offsets[0] >= 0) && (limit == -1 || limit > 0)) {
- if (replace_count) {
- ++*replace_count;
- }
-
- match = subject + offsets[0];
- new_len = *result_len + offsets[0] - start_offset;
-
- if (eval) {
- eval_result_len = preg_do_eval(replace, replace_len, subject,
- offsets, count, &eval_result TSRMLS_CC);
- new_len += eval_result_len;
- } else if (is_callable_replace) {
-
- eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark, &eval_result TSRMLS_CC);
- new_len += eval_result_len;
- } else {
- walk = replace;
- walk_last = 0;
- while (walk < replace_end) {
- if ('\\' == *walk || '$' == *walk) {
- if (walk_last == '\\') {
- walk++;
- walk_last = 0;
- continue;
- }
- if (preg_get_backref(&walk, &backref)) {
- if (backref < count)
- new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
- continue;
- }
- }
- new_len++;
- walk++;
- walk_last = walk[-1];
- }
- }
- if (new_len + 1 > alloc_len) {
- new_buf = safe_emalloc(2, new_len + 1, alloc_len);
- alloc_len = 1 + alloc_len + 2 * (size_t)new_len;
- memcpy(new_buf, result, *result_len);
- efree(result);
- result = new_buf;
- }
-
- memcpy(&result[*result_len], piece, match-piece);
- *result_len += match-piece;
-
- walkbuf = result + *result_len;
-
- if (eval || is_callable_replace) {
- memcpy(walkbuf, eval_result, eval_result_len);
- *result_len += eval_result_len;
- STR_FREE(eval_result);
- } else {
- walk = replace;
- walk_last = 0;
- while (walk < replace_end) {
- if ('\\' == *walk || '$' == *walk) {
- if (walk_last == '\\') {
- *(walkbuf-1) = *walk++;
- walk_last = 0;
- continue;
- }
- if (preg_get_backref(&walk, &backref)) {
- if (backref < count) {
- match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
- memcpy(walkbuf, subject + offsets[backref<<1], match_len);
- walkbuf += match_len;
- }
- continue;
- }
- }
- *walkbuf++ = *walk++;
- walk_last = walk[-1];
- }
- *walkbuf = '\0';
-
- *result_len += walkbuf - (result + *result_len);
- }
- if (limit != -1)
- limit--;
- } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
-
- if (g_notempty != 0 && start_offset < subject_len) {
- int unit_len = calculate_unit_length(pce, piece);
- offsets[0] = start_offset;
- offsets[1] = start_offset + unit_len;
- memcpy(&result[*result_len], piece, unit_len);
- *result_len += unit_len;
- } else {
- new_len = *result_len + subject_len - start_offset;
- if (new_len + 1 > alloc_len) {
- new_buf = safe_emalloc(new_len, sizeof(char), 1);
- alloc_len = (size_t)new_len + 1;
- memcpy(new_buf, result, *result_len);
- efree(result);
- result = new_buf;
- }
-
- memcpy(&result[*result_len], piece, subject_len - start_offset);
- *result_len += subject_len - start_offset;
- result[*result_len] = '\0';
- break;
- }
- } else {
- pcre_handle_exec_error(count TSRMLS_CC);
- efree(result);
- result = NULL;
- break;
- }
-
- g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
-
- start_offset = offsets[1];
- }
- efree(offsets);
- efree(subpat_names);
- if(result && (size_t)(*result_len) > INT_MAX) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Result is too big, max is %d", INT_MAX);
- efree(result);
- result = NULL;
- }
- return result;
- }
- static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, int is_callable_replace, int *replace_count TSRMLS_DC)
- {
- zval **regex_entry,
- **replace_entry = NULL,
- *replace_value,
- empty_replace;
- char *subject_value,
- *result;
- int subject_len;
-
- convert_to_string_ex(subject);
-
- ZVAL_STRINGL(&empty_replace, "", 0, 0);
-
- if (Z_TYPE_P(regex) == IS_ARRAY) {
-
- subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject));
- subject_len = Z_STRLEN_PP(subject);
- *result_len = subject_len;
- zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex));
- replace_value = replace;
- if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace)
- zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace));
-
- while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)®ex_entry) == SUCCESS) {
-
- convert_to_string_ex(regex_entry);
-
- if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
-
- if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) {
- if (!is_callable_replace) {
- convert_to_string_ex(replace_entry);
- }
- replace_value = *replace_entry;
- zend_hash_move_forward(Z_ARRVAL_P(replace));
- } else {
-
- replace_value = &empty_replace;
- }
- }
-
- if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry),
- Z_STRLEN_PP(regex_entry),
- subject_value,
- subject_len,
- replace_value,
- is_callable_replace,
- result_len,
- limit,
- replace_count TSRMLS_CC)) != NULL) {
- efree(subject_value);
- subject_value = result;
- subject_len = *result_len;
- } else {
- efree(subject_value);
- return NULL;
- }
- zend_hash_move_forward(Z_ARRVAL_P(regex));
- }
- return subject_value;
- } else {
- result = php_pcre_replace(Z_STRVAL_P(regex),
- Z_STRLEN_P(regex),
- Z_STRVAL_PP(subject),
- Z_STRLEN_PP(subject),
- replace,
- is_callable_replace,
- result_len,
- limit,
- replace_count TSRMLS_CC);
- return result;
- }
- }
- static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
- {
- zval **regex,
- **replace,
- **subject,
- **subject_entry,
- **zcount = NULL;
- char *result;
- int result_len;
- int limit_val = -1;
- long limit = -1;
- char *string_key;
- uint string_key_len;
- ulong num_key;
- char *callback_name;
- int replace_count=0, old_replace_count;
-
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZZ|lZ", ®ex, &replace, &subject, &limit, &zcount) == FAILURE) {
- return;
- }
- if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
- RETURN_FALSE;
- }
- SEPARATE_ZVAL(replace);
- if (Z_TYPE_PP(replace) != IS_ARRAY && (Z_TYPE_PP(replace) != IS_OBJECT || !is_callable_replace)) {
- convert_to_string_ex(replace);
- }
- if (is_callable_replace) {
- if (!zend_is_callable(*replace, 0, &callback_name TSRMLS_CC)) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name);
- efree(callback_name);
- MAKE_COPY_ZVAL(subject, return_value);
- return;
- }
- efree(callback_name);
- }
- SEPARATE_ZVAL(regex);
- SEPARATE_ZVAL(subject);
- if (ZEND_NUM_ARGS() > 3) {
- limit_val = limit;
- }
- if (Z_TYPE_PP(regex) != IS_ARRAY)
- convert_to_string_ex(regex);
-
- if (Z_TYPE_PP(subject) == IS_ARRAY) {
- array_init(return_value);
- zend_hash_internal_pointer_reset(Z_ARRVAL_PP(subject));
-
- while (zend_hash_get_current_data(Z_ARRVAL_PP(subject), (void **)&subject_entry) == SUCCESS) {
- SEPARATE_ZVAL(subject_entry);
- old_replace_count = replace_count;
- if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
- if (!is_filter || replace_count > old_replace_count) {
-
- switch(zend_hash_get_current_key_ex(Z_ARRVAL_PP(subject), &string_key, &string_key_len, &num_key, 0, NULL))
- {
- case HASH_KEY_IS_STRING:
- add_assoc_stringl_ex(return_value, string_key, string_key_len, result, result_len, 0);
- break;
- case HASH_KEY_IS_LONG:
- add_index_stringl(return_value, num_key, result, result_len, 0);
- break;
- }
- } else {
- efree(result);
- }
- }
- zend_hash_move_forward(Z_ARRVAL_PP(subject));
- }
- } else {
- old_replace_count = replace_count;
- if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
- if (!is_filter || replace_count > old_replace_count) {
- RETVAL_STRINGL(result, result_len, 0);
- } else {
- efree(result);
- }
- }
- }
- if (ZEND_NUM_ARGS() > 4) {
- zval_dtor(*zcount);
- ZVAL_LONG(*zcount, replace_count);
- }
- }
- static PHP_FUNCTION(preg_replace)
- {
- preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
- }
- static PHP_FUNCTION(preg_replace_callback)
- {
- preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
- }
- static PHP_FUNCTION(preg_filter)
- {
- preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
- }
- static PHP_FUNCTION(preg_split)
- {
- char *regex;
- char *subject;
- int regex_len;
- int subject_len;
- long limit_val = -1;
- long flags = 0;
- pcre_cache_entry *pce;
-
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", ®ex, ®ex_len,
- &subject, &subject_len, &limit_val, &flags) == FAILURE) {
- RETURN_FALSE;
- }
-
- if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
- RETURN_FALSE;
- }
- pce->refcount++;
- php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC);
- pce->refcount--;
- }
- PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
- long limit_val, long flags TSRMLS_DC)
- {
- pcre_extra *extra = NULL;
- pcre *re_bump = NULL;
- pcre_extra *extra_bump = NULL;
- pcre_extra extra_data;
- int *offsets;
- int size_offsets;
- int exoptions = 0;
- int count = 0;
- int start_offset;
- int next_offset;
- int g_notempty = 0;
- char *last_match;
- int rc;
- int no_empty;
- int delim_capture;
- int offset_capture;
- no_empty = flags & PREG_SPLIT_NO_EMPTY;
- delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
- offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
- if (limit_val == 0) {
- limit_val = -1;
- }
- if (extra == NULL) {
- extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra = &extra_data;
- }
- extra->match_limit = PCRE_G(backtrack_limit);
- extra->match_limit_recursion = PCRE_G(recursion_limit);
- #ifdef PCRE_EXTRA_MARK
- extra->flags &= ~PCRE_EXTRA_MARK;
- #endif
-
- array_init(return_value);
-
- rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
- if (rc < 0) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
- RETURN_FALSE;
- }
- size_offsets = (size_offsets + 1) * 3;
- offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
-
- start_offset = 0;
- next_offset = 0;
- last_match = subject;
- PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
-
- while ((limit_val == -1 || limit_val > 1)) {
- count = pcre_exec(pce->re, extra, subject,
- subject_len, start_offset,
- exoptions|g_notempty, offsets, size_offsets);
-
- exoptions |= PCRE_NO_UTF8_CHECK;
-
- if (count == 0) {
- php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
- count = size_offsets/3;
- }
-
- if (count > 0 && (offsets[1] - offsets[0] >= 0)) {
- if (!no_empty || &subject[offsets[0]] != last_match) {
- if (offset_capture) {
-
- add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
- } else {
-
- add_next_index_stringl(return_value, last_match,
- &subject[offsets[0]]-last_match, 1);
- }
-
- if (limit_val != -1)
- limit_val--;
- }
- last_match = &subject[offsets[1]];
- next_offset = offsets[1];
- if (delim_capture) {
- int i, match_len;
- for (i = 1; i < count; i++) {
- match_len = offsets[(i<<1)+1] - offsets[i<<1];
-
- if (!no_empty || match_len > 0) {
- if (offset_capture) {
- add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
- } else {
- add_next_index_stringl(return_value,
- &subject[offsets[i<<1]],
- match_len, 1);
- }
- }
- }
- }
- } else if (count == PCRE_ERROR_NOMATCH) {
-
- if (g_notempty != 0 && start_offset < subject_len) {
- if (pce->compile_options & PCRE_UTF8) {
- if (re_bump == NULL) {
- int dummy;
- if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
- RETURN_FALSE;
- }
- }
- count = pcre_exec(re_bump, extra_bump, subject,
- subject_len, start_offset,
- exoptions, offsets, size_offsets);
- if (count < 1) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
- RETURN_FALSE;
- }
- } else {
- offsets[0] = start_offset;
- offsets[1] = start_offset + 1;
- }
- } else
- break;
- } else {
- pcre_handle_exec_error(count TSRMLS_CC);
- break;
- }
-
- g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
-
- start_offset = offsets[1];
- }
- start_offset = last_match - subject;
- if (!no_empty || start_offset < subject_len)
- {
- if (offset_capture) {
-
- add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
- } else {
-
- add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);
- }
- }
-
- efree(offsets);
- }
- static PHP_FUNCTION(preg_quote)
- {
- int in_str_len;
- char *in_str;
- char *in_str_end;
- int delim_len = 0;
- char *delim = NULL;
- char *out_str,
- *p,
- *q,
- delim_char=0,
- c;
- zend_bool quote_delim = 0;
-
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len,
- &delim, &delim_len) == FAILURE) {
- return;
- }
- in_str_end = in_str + in_str_len;
-
- if (in_str == in_str_end) {
- RETURN_EMPTY_STRING();
- }
- if (delim && *delim) {
- delim_char = delim[0];
- quote_delim = 1;
- }
-
- out_str = safe_emalloc_string(4, in_str_len, 1);
-
- for(p = in_str, q = out_str; p != in_str_end; p++) {
- c = *p;
- switch(c) {
- case '.':
- case '\\':
- case '+':
- case '*':
- case '?':
- case '[':
- case '^':
- case ']':
- case '$':
- case '(':
- case ')':
- case '{':
- case '}':
- case '=':
- case '!':
- case '>':
- case '<':
- case '|':
- case ':':
- case '-':
- *q++ = '\\';
- *q++ = c;
- break;
- case '\0':
- *q++ = '\\';
- *q++ = '0';
- *q++ = '0';
- *q++ = '0';
- break;
- default:
- if (quote_delim && c == delim_char)
- *q++ = '\\';
- *q++ = c;
- break;
- }
- }
- *q = '\0';
-
- RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0);
- }
- static PHP_FUNCTION(preg_grep)
- {
- char *regex;
- int regex_len;
- zval *input;
- long flags = 0;
- pcre_cache_entry *pce;
-
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", ®ex, ®ex_len,
- &input, &flags) == FAILURE) {
- return;
- }
-
- if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
- RETURN_FALSE;
- }
- pce->refcount++;
- php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
- pce->refcount--;
- }
- PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC)
- {
- zval **entry;
- pcre_extra *extra = pce->extra;
- pcre_extra extra_data;
- int *offsets;
- int size_offsets;
- int count = 0;
- char *string_key;
- uint string_key_len;
- ulong num_key;
- zend_bool invert;
- int rc;
- invert = flags & PREG_GREP_INVERT ? 1 : 0;
- if (extra == NULL) {
- extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra = &extra_data;
- }
- extra->match_limit = PCRE_G(backtrack_limit);
- extra->match_limit_recursion = PCRE_G(recursion_limit);
- #ifdef PCRE_EXTRA_MARK
- extra->flags &= ~PCRE_EXTRA_MARK;
- #endif
-
- rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
- if (rc < 0) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
- RETURN_FALSE;
- }
- size_offsets = (size_offsets + 1) * 3;
- offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
-
- array_init(return_value);
- PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
-
- zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
- while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
- zval subject = **entry;
- if (Z_TYPE_PP(entry) != IS_STRING) {
- zval_copy_ctor(&subject);
- convert_to_string(&subject);
- }
-
- count = pcre_exec(pce->re, extra, Z_STRVAL(subject),
- Z_STRLEN(subject), 0,
- 0, offsets, size_offsets);
-
- if (count == 0) {
- php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
- count = size_offsets/3;
- } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
- pcre_handle_exec_error(count TSRMLS_CC);
- break;
- }
-
- if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
- Z_ADDREF_PP(entry);
-
- switch (zend_hash_get_current_key_ex(Z_ARRVAL_P(input), &string_key, &string_key_len, &num_key, 0, NULL))
- {
- case HASH_KEY_IS_STRING:
- zend_hash_update(Z_ARRVAL_P(return_value), string_key,
- string_key_len, entry, sizeof(zval *), NULL);
- break;
- case HASH_KEY_IS_LONG:
- zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry,
- sizeof(zval *), NULL);
- break;
- }
- }
- if (Z_TYPE_PP(entry) != IS_STRING) {
- zval_dtor(&subject);
- }
- zend_hash_move_forward(Z_ARRVAL_P(input));
- }
- zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
-
- efree(offsets);
- }
- static PHP_FUNCTION(preg_last_error)
- {
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
- return;
- }
- RETURN_LONG(PCRE_G(error_code));
- }
- ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
- ZEND_ARG_INFO(0, pattern)
- ZEND_ARG_INFO(0, subject)
- ZEND_ARG_INFO(1, subpatterns)
- ZEND_ARG_INFO(0, flags)
- ZEND_ARG_INFO(0, offset)
- ZEND_END_ARG_INFO()
- ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
- ZEND_ARG_INFO(0, pattern)
- ZEND_ARG_INFO(0, subject)
- ZEND_ARG_INFO(1, subpatterns)
- ZEND_ARG_INFO(0, flags)
- ZEND_ARG_INFO(0, offset)
- ZEND_END_ARG_INFO()
- ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
- ZEND_ARG_INFO(0, regex)
- ZEND_ARG_INFO(0, replace)
- ZEND_ARG_INFO(0, subject)
- ZEND_ARG_INFO(0, limit)
- ZEND_ARG_INFO(1, count)
- ZEND_END_ARG_INFO()
- ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
- ZEND_ARG_INFO(0, regex)
- ZEND_ARG_INFO(0, callback)
- ZEND_ARG_INFO(0, subject)
- ZEND_ARG_INFO(0, limit)
- ZEND_ARG_INFO(1, count)
- ZEND_END_ARG_INFO()
- ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
- ZEND_ARG_INFO(0, pattern)
- ZEND_ARG_INFO(0, subject)
- ZEND_ARG_INFO(0, limit)
- ZEND_ARG_INFO(0, flags)
- ZEND_END_ARG_INFO()
- ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
- ZEND_ARG_INFO(0, str)
- ZEND_ARG_INFO(0, delim_char)
- ZEND_END_ARG_INFO()
- ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
- ZEND_ARG_INFO(0, regex)
- ZEND_ARG_INFO(0, input)
- ZEND_ARG_INFO(0, flags)
- ZEND_END_ARG_INFO()
- ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
- ZEND_END_ARG_INFO()
- static const zend_function_entry pcre_functions[] = {
- PHP_FE(preg_match, arginfo_preg_match)
- PHP_FE(preg_match_all, arginfo_preg_match_all)
- PHP_FE(preg_replace, arginfo_preg_replace)
- PHP_FE(preg_replace_callback, arginfo_preg_replace_callback)
- PHP_FE(preg_filter, arginfo_preg_replace)
- PHP_FE(preg_split, arginfo_preg_split)
- PHP_FE(preg_quote, arginfo_preg_quote)
- PHP_FE(preg_grep, arginfo_preg_grep)
- PHP_FE(preg_last_error, arginfo_preg_last_error)
- PHP_FE_END
- };
- zend_module_entry pcre_module_entry = {
- STANDARD_MODULE_HEADER,
- "pcre",
- pcre_functions,
- PHP_MINIT(pcre),
- PHP_MSHUTDOWN(pcre),
- NULL,
- NULL,
- PHP_MINFO(pcre),
- NO_VERSION_YET,
- PHP_MODULE_GLOBALS(pcre),
- PHP_GINIT(pcre),
- PHP_GSHUTDOWN(pcre),
- NULL,
- STANDARD_MODULE_PROPERTIES_EX
- };
- #ifdef COMPILE_DL_PCRE
- ZEND_GET_MODULE(pcre)
- #endif
- #endif
|