12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053 |
- /*
- +----------------------------------------------------------------------+
- | Copyright (c) The PHP Group |
- +----------------------------------------------------------------------+
- | This source file is subject to version 3.01 of the PHP license, |
- | that is bundled with this package in the file LICENSE, and is |
- | available through the world-wide-web at the following url: |
- | https://www.php.net/license/3_01.txt |
- | If you did not receive a copy of the PHP license and are unable to |
- | obtain it through the world-wide-web, please send a note to |
- | license@php.net so we can mail you a copy immediately. |
- +----------------------------------------------------------------------+
- | Author: Andrei Zmievski <andrei@php.net> |
- +----------------------------------------------------------------------+
- */
- #include "php.h"
- #include "php_ini.h"
- #include "php_globals.h"
- #include "php_pcre.h"
- #include "php_pcre_arginfo.h"
- #include "ext/standard/info.h"
- #include "ext/standard/basic_functions.h"
- #include "zend_smart_str.h"
- #include "SAPI.h"
- #include "ext/standard/php_string.h"
- #define PREG_PATTERN_ORDER 1
- #define PREG_SET_ORDER 2
- #define PREG_OFFSET_CAPTURE (1<<8)
- #define PREG_UNMATCHED_AS_NULL (1<<9)
- #define PREG_SPLIT_NO_EMPTY (1<<0)
- #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
- #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
- #define PREG_REPLACE_EVAL (1<<0)
- #define PREG_GREP_INVERT (1<<0)
- #define PREG_JIT (1<<3)
- #define PCRE_CACHE_SIZE 4096
- struct _pcre_cache_entry {
- pcre2_code *re;
- uint32_t preg_options;
- uint32_t capture_count;
- uint32_t name_count;
- uint32_t compile_options;
- uint32_t refcount;
- };
- PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
- #ifdef HAVE_PCRE_JIT_SUPPORT
- #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
- #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
- ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
- #endif
- /* General context using (infallible) system allocator. */
- ZEND_TLS pcre2_general_context *gctx = NULL;
- /* These two are global per thread for now. Though it is possible to use these
- per pattern. Either one can copy it and use in pce, or one does no global
- contexts at all, but creates for every pce. */
- ZEND_TLS pcre2_compile_context *cctx = NULL;
- ZEND_TLS pcre2_match_context *mctx = NULL;
- ZEND_TLS pcre2_match_data *mdata = NULL;
- ZEND_TLS bool mdata_used = 0;
- ZEND_TLS uint8_t pcre2_init_ok = 0;
- #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
- static MUTEX_T pcre_mt = NULL;
- #define php_pcre_mutex_alloc() \
- if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
- #define php_pcre_mutex_free() \
- if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
- #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
- #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
- #else
- #define php_pcre_mutex_alloc()
- #define php_pcre_mutex_free()
- #define php_pcre_mutex_lock()
- #define php_pcre_mutex_unlock()
- #endif
- ZEND_TLS HashTable char_tables;
- static void php_pcre_free_char_table(zval *data)
- {/*{{{*/
- void *ptr = Z_PTR_P(data);
- pefree(ptr, 1);
- }/*}}}*/
- static void pcre_handle_exec_error(int pcre_code) /* {{{ */
- {
- int preg_code = 0;
- switch (pcre_code) {
- case PCRE2_ERROR_MATCHLIMIT:
- preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
- break;
- case PCRE2_ERROR_RECURSIONLIMIT:
- preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
- break;
- case PCRE2_ERROR_BADUTFOFFSET:
- preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
- break;
- #ifdef HAVE_PCRE_JIT_SUPPORT
- case PCRE2_ERROR_JIT_STACKLIMIT:
- preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
- break;
- #endif
- default:
- if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
- preg_code = PHP_PCRE_BAD_UTF8_ERROR;
- } else {
- preg_code = PHP_PCRE_INTERNAL_ERROR;
- }
- break;
- }
- PCRE_G(error_code) = preg_code;
- }
- /* }}} */
- static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
- {
- switch (error_code) {
- case PHP_PCRE_NO_ERROR:
- return "No error";
- case PHP_PCRE_INTERNAL_ERROR:
- return "Internal error";
- case PHP_PCRE_BAD_UTF8_ERROR:
- return "Malformed UTF-8 characters, possibly incorrectly encoded";
- case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
- return "The offset did not correspond to the beginning of a valid UTF-8 code point";
- case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
- return "Backtrack limit exhausted";
- case PHP_PCRE_RECURSION_LIMIT_ERROR:
- return "Recursion limit exhausted";
- #ifdef HAVE_PCRE_JIT_SUPPORT
- case PHP_PCRE_JIT_STACKLIMIT_ERROR:
- return "JIT stack limit exhausted";
- #endif
- default:
- return "Unknown error";
- }
- }
- /* }}} */
- static void php_free_pcre_cache(zval *data) /* {{{ */
- {
- pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
- if (!pce) return;
- pcre2_code_free(pce->re);
- free(pce);
- }
- /* }}} */
- static void php_efree_pcre_cache(zval *data) /* {{{ */
- {
- pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
- if (!pce) return;
- pcre2_code_free(pce->re);
- efree(pce);
- }
- /* }}} */
- static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
- {
- return pemalloc(size, 1);
- }
- static void php_pcre_free(void *block, void *data)
- {
- pefree(block, 1);
- }
- static void *php_pcre_emalloc(PCRE2_SIZE size, void *data)
- {
- return emalloc(size);
- }
- static void php_pcre_efree(void *block, void *data)
- {
- efree(block);
- }
- #ifdef PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
- /* pcre 10.38 needs PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, disabled by default */
- #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
- #else
- #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS 0
- #endif
- #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
- static void php_pcre_init_pcre2(uint8_t jit)
- {/*{{{*/
- if (!gctx) {
- gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
- if (!gctx) {
- pcre2_init_ok = 0;
- return;
- }
- }
- if (!cctx) {
- cctx = pcre2_compile_context_create(gctx);
- if (!cctx) {
- pcre2_init_ok = 0;
- return;
- }
- }
- pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
- if (!mctx) {
- mctx = pcre2_match_context_create(gctx);
- if (!mctx) {
- pcre2_init_ok = 0;
- return;
- }
- }
- #ifdef HAVE_PCRE_JIT_SUPPORT
- if (jit && !jit_stack) {
- jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
- if (!jit_stack) {
- pcre2_init_ok = 0;
- return;
- }
- }
- #endif
- if (!mdata) {
- mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
- if (!mdata) {
- pcre2_init_ok = 0;
- return;
- }
- }
- pcre2_init_ok = 1;
- }/*}}}*/
- static void php_pcre_shutdown_pcre2(void)
- {/*{{{*/
- if (gctx) {
- pcre2_general_context_free(gctx);
- gctx = NULL;
- }
- if (cctx) {
- pcre2_compile_context_free(cctx);
- cctx = NULL;
- }
- if (mctx) {
- pcre2_match_context_free(mctx);
- mctx = NULL;
- }
- #ifdef HAVE_PCRE_JIT_SUPPORT
- /* Stack may only be destroyed when no cached patterns
- possibly associated with it do exist. */
- if (jit_stack) {
- pcre2_jit_stack_free(jit_stack);
- jit_stack = NULL;
- }
- #endif
- if (mdata) {
- pcre2_match_data_free(mdata);
- mdata = NULL;
- }
- pcre2_init_ok = 0;
- }/*}}}*/
- static PHP_GINIT_FUNCTION(pcre) /* {{{ */
- {
- php_pcre_mutex_alloc();
- /* If we're on the CLI SAPI, there will only be one request, so we don't need the
- * cache to survive after RSHUTDOWN. */
- pcre_globals->per_request_cache = strcmp(sapi_module.name, "cli") == 0;
- if (!pcre_globals->per_request_cache) {
- zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
- }
- pcre_globals->backtrack_limit = 0;
- pcre_globals->recursion_limit = 0;
- pcre_globals->error_code = PHP_PCRE_NO_ERROR;
- ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
- ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
- #ifdef HAVE_PCRE_JIT_SUPPORT
- pcre_globals->jit = 1;
- #endif
- php_pcre_init_pcre2(1);
- zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
- }
- /* }}} */
- static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
- {
- if (!pcre_globals->per_request_cache) {
- zend_hash_destroy(&pcre_globals->pcre_cache);
- }
- php_pcre_shutdown_pcre2();
- zend_hash_destroy(&char_tables);
- php_pcre_mutex_free();
- }
- /* }}} */
- static PHP_INI_MH(OnUpdateBacktrackLimit)
- {/*{{{*/
- OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
- if (mctx) {
- pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
- }
- return SUCCESS;
- }/*}}}*/
- static PHP_INI_MH(OnUpdateRecursionLimit)
- {/*{{{*/
- OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
- if (mctx) {
- pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
- }
- return SUCCESS;
- }/*}}}*/
- #ifdef HAVE_PCRE_JIT_SUPPORT
- static PHP_INI_MH(OnUpdateJit)
- {/*{{{*/
- OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
- if (PCRE_G(jit) && jit_stack) {
- pcre2_jit_stack_assign(mctx, NULL, jit_stack);
- } else {
- pcre2_jit_stack_assign(mctx, NULL, NULL);
- }
- return SUCCESS;
- }/*}}}*/
- #endif
- PHP_INI_BEGIN()
- STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
- STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
- #ifdef HAVE_PCRE_JIT_SUPPORT
- STD_PHP_INI_ENTRY("pcre.jit", "1", PHP_INI_ALL, OnUpdateJit, jit, zend_pcre_globals, pcre_globals)
- #endif
- PHP_INI_END()
- static char *_pcre2_config_str(uint32_t what)
- {/*{{{*/
- int len = pcre2_config(what, NULL);
- char *ret = (char *) malloc(len + 1);
- len = pcre2_config(what, ret);
- if (!len) {
- free(ret);
- return NULL;
- }
- return ret;
- }/*}}}*/
- /* {{{ PHP_MINFO_FUNCTION(pcre) */
- static PHP_MINFO_FUNCTION(pcre)
- {
- #ifdef HAVE_PCRE_JIT_SUPPORT
- uint32_t flag = 0;
- char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
- #endif
- char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
- char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
- php_info_print_table_start();
- php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
- php_info_print_table_row(2, "PCRE Library Version", version);
- free(version);
- php_info_print_table_row(2, "PCRE Unicode Version", unicode);
- free(unicode);
- #ifdef HAVE_PCRE_JIT_SUPPORT
- if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
- php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
- } else {
- php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
- }
- if (jit_target) {
- php_info_print_table_row(2, "PCRE JIT Target", jit_target);
- }
- free(jit_target);
- #else
- php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
- #endif
- #ifdef HAVE_PCRE_VALGRIND_SUPPORT
- php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
- #endif
- php_info_print_table_end();
- DISPLAY_INI_ENTRIES();
- }
- /* }}} */
- /* {{{ PHP_MINIT_FUNCTION(pcre) */
- static PHP_MINIT_FUNCTION(pcre)
- {
- char *version;
- #ifdef HAVE_PCRE_JIT_SUPPORT
- if (UNEXPECTED(!pcre2_init_ok)) {
- /* Retry. */
- php_pcre_init_pcre2(PCRE_G(jit));
- if (!pcre2_init_ok) {
- return FAILURE;
- }
- }
- #endif
- REGISTER_INI_ENTRIES();
- REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
- version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
- REGISTER_STRING_CONSTANT("PCRE_VERSION", version, CONST_CS | CONST_PERSISTENT);
- free(version);
- REGISTER_LONG_CONSTANT("PCRE_VERSION_MAJOR", PCRE2_MAJOR, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("PCRE_VERSION_MINOR", PCRE2_MINOR, CONST_CS | CONST_PERSISTENT);
- #ifdef HAVE_PCRE_JIT_SUPPORT
- REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 1, CONST_CS | CONST_PERSISTENT);
- #else
- REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 0, CONST_CS | CONST_PERSISTENT);
- #endif
- return SUCCESS;
- }
- /* }}} */
- /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
- static PHP_MSHUTDOWN_FUNCTION(pcre)
- {
- UNREGISTER_INI_ENTRIES();
- return SUCCESS;
- }
- /* }}} */
- /* {{{ PHP_RINIT_FUNCTION(pcre) */
- static PHP_RINIT_FUNCTION(pcre)
- {
- #ifdef HAVE_PCRE_JIT_SUPPORT
- if (UNEXPECTED(!pcre2_init_ok)) {
- /* Retry. */
- php_pcre_mutex_lock();
- php_pcre_init_pcre2(PCRE_G(jit));
- if (!pcre2_init_ok) {
- php_pcre_mutex_unlock();
- return FAILURE;
- }
- php_pcre_mutex_unlock();
- }
- mdata_used = 0;
- #endif
- PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
- PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL);
- if (!PCRE_G(gctx_zmm)) {
- return FAILURE;
- }
- if (PCRE_G(per_request_cache)) {
- zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, php_efree_pcre_cache, 0);
- }
- return SUCCESS;
- }
- /* }}} */
- static PHP_RSHUTDOWN_FUNCTION(pcre)
- {
- pcre2_general_context_free(PCRE_G(gctx_zmm));
- PCRE_G(gctx_zmm) = NULL;
- if (PCRE_G(per_request_cache)) {
- zend_hash_destroy(&PCRE_G(pcre_cache));
- }
- zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
- zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
- ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
- ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
- return SUCCESS;
- }
- /* {{{ static pcre_clean_cache */
- static int pcre_clean_cache(zval *data, void *arg)
- {
- pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
- int *num_clean = (int *)arg;
- if (*num_clean > 0 && !pce->refcount) {
- (*num_clean)--;
- return ZEND_HASH_APPLY_REMOVE;
- } else {
- return ZEND_HASH_APPLY_KEEP;
- }
- }
- /* }}} */
- static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
- uint32_t i;
- for (i = 0; i < num_subpats; i++) {
- if (subpat_names[i]) {
- zend_string_release(subpat_names[i]);
- }
- }
- efree(subpat_names);
- }
- /* {{{ static make_subpats_table */
- static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
- {
- uint32_t name_cnt = pce->name_count, name_size, ni = 0;
- char *name_table;
- zend_string **subpat_names;
- int rc1, rc2;
- rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
- rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
- if (rc1 < 0 || rc2 < 0) {
- php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
- return NULL;
- }
- subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
- while (ni++ < name_cnt) {
- unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
- const char *name = name_table + 2;
- subpat_names[name_idx] = zend_string_init(name, strlen(name), 0);
- if (is_numeric_string(ZSTR_VAL(subpat_names[name_idx]), ZSTR_LEN(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
- php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
- free_subpats_table(subpat_names, num_subpats);
- return NULL;
- }
- name_table += name_size;
- }
- return subpat_names;
- }
- /* }}} */
- /* {{{ static calculate_unit_length */
- /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
- static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
- {
- size_t unit_len;
- if (pce->compile_options & PCRE2_UTF) {
- const char *end = start;
- /* skip continuation bytes */
- while ((*++end & 0xC0) == 0x80);
- unit_len = end - start;
- } else {
- unit_len = 1;
- }
- return unit_len;
- }
- /* }}} */
- /* {{{ pcre_get_compiled_regex_cache */
- PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
- {
- pcre2_code *re = NULL;
- #if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !HAVE_BUNDLED_PCRE
- uint32_t coptions = PCRE2_NO_START_OPTIMIZE;
- #else
- uint32_t coptions = 0;
- #endif
- PCRE2_UCHAR error[128];
- PCRE2_SIZE erroffset;
- int errnumber;
- char delimiter;
- char start_delimiter;
- char end_delimiter;
- char *p, *pp;
- char *pattern;
- size_t pattern_len;
- uint32_t poptions = 0;
- const uint8_t *tables = NULL;
- zval *zv;
- pcre_cache_entry new_entry;
- int rc;
- zend_string *key;
- pcre_cache_entry *ret;
- if (locale_aware && BG(ctype_string)) {
- key = zend_string_concat2(
- ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
- ZSTR_VAL(regex), ZSTR_LEN(regex));
- } else {
- key = regex;
- }
- /* Try to lookup the cached regex entry, and if successful, just pass
- back the compiled pattern, otherwise go on and compile it. */
- zv = zend_hash_find(&PCRE_G(pcre_cache), key);
- if (zv) {
- if (key != regex) {
- zend_string_release_ex(key, 0);
- }
- return (pcre_cache_entry*)Z_PTR_P(zv);
- }
- p = ZSTR_VAL(regex);
- /* Parse through the leading whitespace, and display a warning if we
- get to the end without encountering a delimiter. */
- while (isspace((int)*(unsigned char *)p)) p++;
- if (*p == 0) {
- if (key != regex) {
- zend_string_release_ex(key, 0);
- }
- php_error_docref(NULL, E_WARNING,
- p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
- pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
- return NULL;
- }
- /* Get the delimiter and display a warning if it is alphanumeric
- or a backslash. */
- delimiter = *p++;
- if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
- if (key != regex) {
- zend_string_release_ex(key, 0);
- }
- php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
- pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
- return NULL;
- }
- start_delimiter = delimiter;
- if ((pp = strchr("([{< )]}> )]}>", delimiter)))
- delimiter = pp[5];
- end_delimiter = delimiter;
- pp = p;
- if (start_delimiter == end_delimiter) {
- /* We need to iterate through the pattern, searching for the ending delimiter,
- but skipping the backslashed delimiters. If the ending delimiter is not
- found, display a warning. */
- while (*pp != 0) {
- if (*pp == '\\' && pp[1] != 0) pp++;
- else if (*pp == delimiter)
- break;
- pp++;
- }
- } else {
- /* We iterate through the pattern, searching for the matching ending
- * delimiter. For each matching starting delimiter, we increment nesting
- * level, and decrement it for each matching ending delimiter. If we
- * reach the end of the pattern without matching, display a warning.
- */
- int brackets = 1; /* brackets nesting level */
- while (*pp != 0) {
- if (*pp == '\\' && pp[1] != 0) pp++;
- else if (*pp == end_delimiter && --brackets <= 0)
- break;
- else if (*pp == start_delimiter)
- brackets++;
- pp++;
- }
- }
- if (*pp == 0) {
- if (key != regex) {
- zend_string_release_ex(key, 0);
- }
- if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
- php_error_docref(NULL,E_WARNING, "Null byte in regex");
- } else if (start_delimiter == end_delimiter) {
- php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
- } else {
- php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
- }
- pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
- return NULL;
- }
- /* Make a copy of the actual pattern. */
- pattern_len = pp - p;
- pattern = estrndup(p, pattern_len);
- /* Move on to the options */
- pp++;
- /* Parse through the options, setting appropriate flags. Display
- a warning if we encounter an unknown modifier. */
- while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
- switch (*pp++) {
- /* Perl compatible options */
- case 'i': coptions |= PCRE2_CASELESS; break;
- case 'm': coptions |= PCRE2_MULTILINE; break;
- case 's': coptions |= PCRE2_DOTALL; break;
- case 'x': coptions |= PCRE2_EXTENDED; break;
- /* PCRE specific options */
- case 'A': coptions |= PCRE2_ANCHORED; break;
- case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
- case 'S': /* Pass. */ break;
- case 'X': /* Pass. */ break;
- case 'U': coptions |= PCRE2_UNGREEDY; break;
- case 'u': coptions |= PCRE2_UTF;
- /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
- characters, even in UTF-8 mode. However, this can be changed by setting
- the PCRE2_UCP option. */
- #ifdef PCRE2_UCP
- coptions |= PCRE2_UCP;
- #endif
- break;
- case 'J': coptions |= PCRE2_DUPNAMES; break;
- /* Custom preg options */
- case 'e': poptions |= PREG_REPLACE_EVAL; break;
- case ' ':
- case '\n':
- case '\r':
- break;
- default:
- if (pp[-1]) {
- php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
- } else {
- php_error_docref(NULL,E_WARNING, "Null byte in regex");
- }
- pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
- efree(pattern);
- if (key != regex) {
- zend_string_release_ex(key, 0);
- }
- return NULL;
- }
- }
- if (poptions & PREG_REPLACE_EVAL) {
- php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
- pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
- efree(pattern);
- if (key != regex) {
- zend_string_release_ex(key, 0);
- }
- return NULL;
- }
- if (key != regex) {
- tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(ctype_string));
- if (!tables) {
- zend_string *_k;
- tables = pcre2_maketables(gctx);
- if (UNEXPECTED(!tables)) {
- php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
- pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
- zend_string_release_ex(key, 0);
- efree(pattern);
- return NULL;
- }
- _k = zend_string_init(ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)), 1);
- GC_MAKE_PERSISTENT_LOCAL(_k);
- zend_hash_add_ptr(&char_tables, _k, (void *)tables);
- zend_string_release(_k);
- }
- }
- pcre2_set_character_tables(cctx, tables);
- /* Compile pattern and display a warning if compilation failed. */
- re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
- if (re == NULL) {
- if (key != regex) {
- zend_string_release_ex(key, 0);
- }
- pcre2_get_error_message(errnumber, error, sizeof(error));
- php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
- pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
- efree(pattern);
- return NULL;
- }
- #ifdef HAVE_PCRE_JIT_SUPPORT
- if (PCRE_G(jit)) {
- /* Enable PCRE JIT compiler */
- rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
- if (EXPECTED(rc >= 0)) {
- size_t jit_size = 0;
- if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
- poptions |= PREG_JIT;
- }
- } else if (rc == PCRE2_ERROR_NOMEMORY) {
- php_error_docref(NULL, E_WARNING,
- "Allocation of JIT memory failed, PCRE JIT will be disabled. "
- "This is likely caused by security restrictions. "
- "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
- PCRE_G(jit) = 0;
- } else {
- pcre2_get_error_message(rc, error, sizeof(error));
- php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
- pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
- }
- }
- #endif
- efree(pattern);
- /*
- * If we reached cache limit, clean out the items from the head of the list;
- * these are supposedly the oldest ones (but not necessarily the least used
- * ones).
- */
- if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
- int num_clean = PCRE_CACHE_SIZE / 8;
- zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
- }
- /* Store the compiled pattern and extra info in the cache. */
- new_entry.re = re;
- new_entry.preg_options = poptions;
- new_entry.compile_options = coptions;
- new_entry.refcount = 0;
- rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
- if (rc < 0) {
- if (key != regex) {
- zend_string_release_ex(key, 0);
- }
- php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
- pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
- return NULL;
- }
- rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
- if (rc < 0) {
- if (key != regex) {
- zend_string_release_ex(key, 0);
- }
- php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
- pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
- return NULL;
- }
- /*
- * Interned strings are not duplicated when stored in HashTable,
- * but all the interned strings created during HTTP request are removed
- * at end of request. However PCRE_G(pcre_cache) must be consistent
- * on the next request as well. So we disable usage of interned strings
- * as hash keys especually for this table.
- * See bug #63180
- */
- if (!(GC_FLAGS(key) & IS_STR_PERMANENT) && !PCRE_G(per_request_cache)) {
- zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
- GC_MAKE_PERSISTENT_LOCAL(str);
- ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
- zend_string_release(str);
- } else {
- ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
- }
- if (key != regex) {
- zend_string_release_ex(key, 0);
- }
- return ret;
- }
- /* }}} */
- /* {{{ pcre_get_compiled_regex_cache */
- PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
- {
- return pcre_get_compiled_regex_cache_ex(regex, 1);
- }
- /* }}} */
- /* {{{ pcre_get_compiled_regex */
- PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
- {
- pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
- if (capture_count) {
- *capture_count = pce ? pce->capture_count : 0;
- }
- return pce ? pce->re : NULL;
- }
- /* }}} */
- /* {{{ pcre_get_compiled_regex_ex */
- PHPAPI pcre2_code* pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options, uint32_t *compile_options)
- {
- pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
- if (preg_options) {
- *preg_options = pce ? pce->preg_options : 0;
- }
- if (compile_options) {
- *compile_options = pce ? pce->compile_options : 0;
- }
- if (capture_count) {
- *capture_count = pce ? pce->capture_count : 0;
- }
- return pce ? pce->re : NULL;
- }
- /* }}} */
- /* XXX For the cases where it's only about match yes/no and no capture
- required, perhaps just a minimum sized data would suffice. */
- PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
- {/*{{{*/
- assert(NULL != re);
- if (EXPECTED(!mdata_used)) {
- int rc = 0;
- if (!capture_count) {
- /* As we deal with a non cached pattern, no other way to gather this info. */
- rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
- }
- if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
- mdata_used = 1;
- return mdata;
- }
- }
- return pcre2_match_data_create_from_pattern(re, gctx);
- }/*}}}*/
- PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
- {/*{{{*/
- if (UNEXPECTED(match_data != mdata)) {
- pcre2_match_data_free(match_data);
- } else {
- mdata_used = 0;
- }
- }/*}}}*/
- static void init_unmatched_null_pair(void) {
- zval val1, val2;
- ZVAL_NULL(&val1);
- ZVAL_LONG(&val2, -1);
- ZVAL_ARR(&PCRE_G(unmatched_null_pair), zend_new_pair(&val1, &val2));
- }
- static void init_unmatched_empty_pair(void) {
- zval val1, val2;
- ZVAL_EMPTY_STRING(&val1);
- ZVAL_LONG(&val2, -1);
- ZVAL_ARR(&PCRE_G(unmatched_empty_pair), zend_new_pair(&val1, &val2));
- }
- static zend_always_inline void populate_match_value_str(
- zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
- ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset);
- }
- static inline void populate_match_value(
- zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
- uint32_t unmatched_as_null) {
- if (PCRE2_UNSET == start_offset) {
- if (unmatched_as_null) {
- ZVAL_NULL(val);
- } else {
- ZVAL_EMPTY_STRING(val);
- }
- } else {
- populate_match_value_str(val, subject, start_offset, end_offset);
- }
- }
- static inline void add_named(
- zval *subpats, zend_string *name, zval *val, bool unmatched) {
- /* If the DUPNAMES option is used, multiple subpatterns might have the same name.
- * In this case we want to preserve the one that actually has a value. */
- if (!unmatched) {
- zend_hash_update(Z_ARRVAL_P(subpats), name, val);
- } else {
- if (!zend_hash_add(Z_ARRVAL_P(subpats), name, val)) {
- return;
- }
- }
- Z_TRY_ADDREF_P(val);
- }
- /* {{{ add_offset_pair */
- static inline void add_offset_pair(
- zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
- zend_string *name, uint32_t unmatched_as_null)
- {
- zval match_pair;
- /* Add (match, offset) to the return value */
- if (PCRE2_UNSET == start_offset) {
- if (unmatched_as_null) {
- if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
- init_unmatched_null_pair();
- }
- ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
- } else {
- if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
- init_unmatched_empty_pair();
- }
- ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
- }
- } else {
- zval val1, val2;
- populate_match_value_str(&val1, subject, start_offset, end_offset);
- ZVAL_LONG(&val2, start_offset);
- ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
- }
- if (name) {
- add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
- }
- zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
- }
- /* }}} */
- static void populate_subpat_array(
- zval *subpats, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
- uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
- bool offset_capture = (flags & PREG_OFFSET_CAPTURE) != 0;
- bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0;
- zval val;
- int i;
- if (subpat_names) {
- if (offset_capture) {
- for (i = 0; i < count; i++) {
- add_offset_pair(
- subpats, subject, offsets[2*i], offsets[2*i+1],
- subpat_names[i], unmatched_as_null);
- }
- if (unmatched_as_null) {
- for (i = count; i < num_subpats; i++) {
- add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
- }
- }
- } else {
- for (i = 0; i < count; i++) {
- populate_match_value(
- &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
- if (subpat_names[i]) {
- add_named(subpats, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
- }
- zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
- }
- if (unmatched_as_null) {
- for (i = count; i < num_subpats; i++) {
- ZVAL_NULL(&val);
- if (subpat_names[i]) {
- zend_hash_add(Z_ARRVAL_P(subpats), subpat_names[i], &val);
- }
- zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
- }
- }
- }
- } else {
- if (offset_capture) {
- for (i = 0; i < count; i++) {
- add_offset_pair(
- subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
- }
- if (unmatched_as_null) {
- for (i = count; i < num_subpats; i++) {
- add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
- }
- }
- } else {
- for (i = 0; i < count; i++) {
- populate_match_value(
- &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
- zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
- }
- if (unmatched_as_null) {
- for (i = count; i < num_subpats; i++) {
- add_next_index_null(subpats);
- }
- }
- }
- }
- /* Add MARK, if available */
- if (mark) {
- add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
- }
- }
- static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
- {
- /* parameters */
- zend_string *regex; /* Regular expression */
- zend_string *subject; /* String to match against */
- pcre_cache_entry *pce; /* Compiled regular expression */
- zval *subpats = NULL; /* Array for subpatterns */
- zend_long flags = 0; /* Match control flags */
- zend_long start_offset = 0; /* Where the new search starts */
- ZEND_PARSE_PARAMETERS_START(2, 5)
- Z_PARAM_STR(regex)
- Z_PARAM_STR(subject)
- Z_PARAM_OPTIONAL
- Z_PARAM_ZVAL(subpats)
- Z_PARAM_LONG(flags)
- Z_PARAM_LONG(start_offset)
- ZEND_PARSE_PARAMETERS_END();
- /* Compile regex or get it from cache. */
- if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
- RETURN_FALSE;
- }
- pce->refcount++;
- php_pcre_match_impl(pce, subject, return_value, subpats,
- global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
- pce->refcount--;
- }
- /* }}} */
- static zend_always_inline bool is_known_valid_utf8(
- zend_string *subject_str, PCRE2_SIZE start_offset) {
- if (!(GC_FLAGS(subject_str) & IS_STR_VALID_UTF8)) {
- /* We don't know whether the string is valid UTF-8 or not. */
- return 0;
- }
- if (start_offset == ZSTR_LEN(subject_str)) {
- /* Degenerate case: Offset points to end of string. */
- return 1;
- }
- /* Check that the offset does not point to an UTF-8 continuation byte. */
- return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
- }
- /* {{{ php_pcre_match_impl() */
- PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
- zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
- {
- zval result_set, /* Holds a set of subpatterns after
- a global match */
- *match_sets = NULL; /* An array of sets of matches for each
- subpattern after a global match */
- uint32_t options; /* Execution options */
- int count; /* Count of matched subpatterns */
- PCRE2_SIZE *offsets; /* Array of subpattern offsets */
- uint32_t num_subpats; /* Number of captured subpatterns */
- int matched; /* Has anything matched */
- zend_string **subpat_names; /* Array for named subpatterns */
- size_t i;
- uint32_t subpats_order; /* Order of subpattern matches */
- uint32_t offset_capture; /* Capture match offsets: yes/no */
- uint32_t unmatched_as_null; /* Null non-matches: yes/no */
- PCRE2_SPTR mark = NULL; /* Target for MARK name */
- zval marks; /* Array of marks for PREG_PATTERN_ORDER */
- pcre2_match_data *match_data;
- PCRE2_SIZE start_offset2, orig_start_offset;
- char *subject = ZSTR_VAL(subject_str);
- size_t subject_len = ZSTR_LEN(subject_str);
- ZVAL_UNDEF(&marks);
- /* Overwrite the passed-in value for subpatterns with an empty array. */
- if (subpats != NULL) {
- subpats = zend_try_array_init(subpats);
- if (!subpats) {
- RETURN_THROWS();
- }
- }
- subpats_order = global ? PREG_PATTERN_ORDER : 0;
- if (use_flags) {
- offset_capture = flags & PREG_OFFSET_CAPTURE;
- unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
- /*
- * subpats_order is pre-set to pattern mode so we change it only if
- * necessary.
- */
- if (flags & 0xff) {
- subpats_order = flags & 0xff;
- }
- if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
- (!global && subpats_order != 0)) {
- zend_argument_value_error(4, "must be a PREG_* constant");
- RETURN_THROWS();
- }
- } else {
- offset_capture = 0;
- unmatched_as_null = 0;
- }
- /* Negative offset counts from the end of the string. */
- if (start_offset < 0) {
- if ((PCRE2_SIZE)-start_offset <= subject_len) {
- start_offset2 = subject_len + start_offset;
- } else {
- start_offset2 = 0;
- }
- } else {
- start_offset2 = (PCRE2_SIZE)start_offset;
- }
- if (start_offset2 > subject_len) {
- pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
- RETURN_FALSE;
- }
- /* Calculate the size of the offsets array, and allocate memory for it. */
- num_subpats = pce->capture_count + 1;
- /*
- * Build a mapping from subpattern numbers to their names. We will
- * allocate the table only if there are any named subpatterns.
- */
- subpat_names = NULL;
- if (subpats && pce->name_count > 0) {
- subpat_names = make_subpats_table(num_subpats, pce);
- if (!subpat_names) {
- RETURN_FALSE;
- }
- }
- /* Allocate match sets array and initialize the values. */
- if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
- match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
- for (i=0; i<num_subpats; i++) {
- array_init(&match_sets[i]);
- }
- }
- matched = 0;
- PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
- if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
- match_data = mdata;
- } else {
- match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
- if (!match_data) {
- PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
- if (subpat_names) {
- free_subpats_table(subpat_names, num_subpats);
- }
- if (match_sets) {
- efree(match_sets);
- }
- RETURN_FALSE;
- }
- }
- orig_start_offset = start_offset2;
- options =
- (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
- ? 0 : PCRE2_NO_UTF_CHECK;
- /* Execute the regular expression. */
- #ifdef HAVE_PCRE_JIT_SUPPORT
- if ((pce->preg_options & PREG_JIT) && options) {
- count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
- PCRE2_NO_UTF_CHECK, match_data, mctx);
- } else
- #endif
- count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
- options, match_data, mctx);
- while (1) {
- /* If something has matched */
- if (count >= 0) {
- /* Check for too many substrings condition. */
- if (UNEXPECTED(count == 0)) {
- php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
- count = num_subpats;
- }
- matched:
- matched++;
- offsets = pcre2_get_ovector_pointer(match_data);
- /* If subpatterns array has been passed, fill it in with values. */
- if (subpats != NULL) {
- /* Try to get the list of substrings and display a warning if failed. */
- if (offsets[1] < offsets[0]) {
- if (subpat_names) {
- free_subpats_table(subpat_names, num_subpats);
- }
- if (match_sets) efree(match_sets);
- php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
- RETURN_FALSE;
- }
- if (global) { /* global pattern matching */
- if (subpats && subpats_order == PREG_PATTERN_ORDER) {
- /* For each subpattern, insert it into the appropriate array. */
- if (offset_capture) {
- for (i = 0; i < count; i++) {
- add_offset_pair(
- &match_sets[i], subject, offsets[2*i], offsets[2*i+1],
- NULL, unmatched_as_null);
- }
- } else {
- for (i = 0; i < count; i++) {
- zval val;
- populate_match_value(
- &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
- zend_hash_next_index_insert_new(Z_ARRVAL(match_sets[i]), &val);
- }
- }
- mark = pcre2_get_mark(match_data);
- /* Add MARK, if available */
- if (mark) {
- if (Z_TYPE(marks) == IS_UNDEF) {
- array_init(&marks);
- }
- add_index_string(&marks, matched - 1, (char *) mark);
- }
- /*
- * If the number of captured subpatterns on this run is
- * less than the total possible number, pad the result
- * arrays with NULLs or empty strings.
- */
- if (count < num_subpats) {
- for (; i < num_subpats; i++) {
- if (offset_capture) {
- add_offset_pair(
- &match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
- NULL, unmatched_as_null);
- } else if (unmatched_as_null) {
- add_next_index_null(&match_sets[i]);
- } else {
- add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
- }
- }
- }
- } else {
- /* Allocate and populate the result set array */
- array_init_size(&result_set, count + (mark ? 1 : 0));
- mark = pcre2_get_mark(match_data);
- populate_subpat_array(
- &result_set, subject, offsets, subpat_names,
- num_subpats, count, mark, flags);
- /* And add it to the output array */
- zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
- }
- } else { /* single pattern matching */
- /* For each subpattern, insert it into the subpatterns array. */
- mark = pcre2_get_mark(match_data);
- populate_subpat_array(
- subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
- break;
- }
- }
- /* Advance to the next piece. */
- start_offset2 = offsets[1];
- /* If we have matched an empty string, mimic what Perl's /g options does.
- This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
- the match again at the same point. If this fails (picked up above) we
- advance to the next character. */
- if (start_offset2 == offsets[0]) {
- count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
- PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
- if (count >= 0) {
- if (global) {
- goto matched;
- } else {
- break;
- }
- } else if (count == PCRE2_ERROR_NOMATCH) {
- /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
- this is not necessarily the end. We need to advance
- the start offset, and continue. Fudge the offset values
- to achieve this, unless we're already at the end of the string. */
- if (start_offset2 < subject_len) {
- size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
- start_offset2 += unit_len;
- } else {
- break;
- }
- } else {
- goto error;
- }
- }
- } else if (count == PCRE2_ERROR_NOMATCH) {
- break;
- } else {
- error:
- pcre_handle_exec_error(count);
- break;
- }
- if (!global) {
- break;
- }
- /* Execute the regular expression. */
- #ifdef HAVE_PCRE_JIT_SUPPORT
- if ((pce->preg_options & PREG_JIT)) {
- if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
- pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
- break;
- }
- count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
- PCRE2_NO_UTF_CHECK, match_data, mctx);
- } else
- #endif
- count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
- PCRE2_NO_UTF_CHECK, match_data, mctx);
- }
- if (match_data != mdata) {
- pcre2_match_data_free(match_data);
- }
- /* Add the match sets to the output array and clean up */
- if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
- if (subpat_names) {
- for (i = 0; i < num_subpats; i++) {
- if (subpat_names[i]) {
- zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &match_sets[i]);
- Z_ADDREF(match_sets[i]);
- }
- zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
- }
- } else {
- for (i = 0; i < num_subpats; i++) {
- zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
- }
- }
- efree(match_sets);
- if (Z_TYPE(marks) != IS_UNDEF) {
- add_assoc_zval(subpats, "MARK", &marks);
- }
- }
- if (subpat_names) {
- free_subpats_table(subpat_names, num_subpats);
- }
- if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
- /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
- if ((pce->compile_options & PCRE2_UTF)
- && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
- GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
- }
- RETVAL_LONG(matched);
- } else {
- RETVAL_FALSE;
- }
- }
- /* }}} */
- /* {{{ Perform a Perl-style regular expression match */
- PHP_FUNCTION(preg_match)
- {
- php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
- }
- /* }}} */
- /* {{{ Perform a Perl-style global regular expression match */
- PHP_FUNCTION(preg_match_all)
- {
- php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
- }
- /* }}} */
- /* {{{ preg_get_backref */
- static int preg_get_backref(char **str, int *backref)
- {
- char in_brace = 0;
- char *walk = *str;
- if (walk[1] == 0)
- return 0;
- if (*walk == '$' && walk[1] == '{') {
- in_brace = 1;
- walk++;
- }
- walk++;
- if (*walk >= '0' && *walk <= '9') {
- *backref = *walk - '0';
- walk++;
- } else
- return 0;
- if (*walk && *walk >= '0' && *walk <= '9') {
- *backref = *backref * 10 + *walk - '0';
- walk++;
- }
- if (in_brace) {
- if (*walk != '}')
- return 0;
- else
- walk++;
- }
- *str = walk;
- return 1;
- }
- /* }}} */
- /* {{{ preg_do_repl_func */
- static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
- {
- zend_string *result_str;
- zval retval; /* Function return value */
- zval arg; /* Argument to pass to function */
- array_init_size(&arg, count + (mark ? 1 : 0));
- populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
- fci->retval = &retval;
- fci->param_count = 1;
- fci->params = &arg;
- if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
- if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
- result_str = Z_STR(retval);
- } else {
- result_str = zval_get_string_func(&retval);
- zval_ptr_dtor(&retval);
- }
- } else {
- if (!EG(exception)) {
- php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
- }
- result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
- }
- zval_ptr_dtor(&arg);
- return result_str;
- }
- /* }}} */
- /* {{{ php_pcre_replace */
- PHPAPI zend_string *php_pcre_replace(zend_string *regex,
- zend_string *subject_str,
- const char *subject, size_t subject_len,
- zend_string *replace_str,
- size_t limit, size_t *replace_count)
- {
- pcre_cache_entry *pce; /* Compiled regular expression */
- zend_string *result; /* Function result */
- /* Abort on pending exception, e.g. thrown from __toString(). */
- if (UNEXPECTED(EG(exception))) {
- return NULL;
- }
- /* Compile regex or get it from cache. */
- if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
- return NULL;
- }
- pce->refcount++;
- result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
- limit, replace_count);
- pce->refcount--;
- return result;
- }
- /* }}} */
- /* {{{ php_pcre_replace_impl() */
- PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
- {
- uint32_t options; /* Execution options */
- int count; /* Count of matched subpatterns */
- PCRE2_SIZE *offsets; /* Array of subpattern offsets */
- uint32_t num_subpats; /* Number of captured subpatterns */
- size_t new_len; /* Length of needed storage */
- size_t alloc_len; /* Actual allocated length */
- size_t match_len; /* Length of the current match */
- int backref; /* Backreference number */
- PCRE2_SIZE start_offset; /* Where the new search starts */
- size_t last_end_offset; /* Where the last search ended */
- char *walkbuf, /* Location of current replacement in the result */
- *walk, /* Used to walk the replacement string */
- walk_last; /* Last walked character */
- const char *match, /* The current match */
- *piece, /* The current piece of subject */
- *replace_end; /* End of replacement string */
- size_t result_len; /* Length of result */
- zend_string *result; /* Result of replacement */
- pcre2_match_data *match_data;
- /* Calculate the size of the offsets array, and allocate memory for it. */
- num_subpats = pce->capture_count + 1;
- alloc_len = 0;
- result = NULL;
- /* Initialize */
- match = NULL;
- start_offset = 0;
- last_end_offset = 0;
- result_len = 0;
- PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
- if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
- match_data = mdata;
- } else {
- match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
- if (!match_data) {
- PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
- return NULL;
- }
- }
- options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
- /* Execute the regular expression. */
- #ifdef HAVE_PCRE_JIT_SUPPORT
- if ((pce->preg_options & PREG_JIT) && options) {
- count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
- PCRE2_NO_UTF_CHECK, match_data, mctx);
- } else
- #endif
- count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
- options, match_data, mctx);
- while (1) {
- piece = subject + last_end_offset;
- if (count >= 0 && limit > 0) {
- bool simple_string;
- /* Check for too many substrings condition. */
- if (UNEXPECTED(count == 0)) {
- php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
- count = num_subpats;
- }
- matched:
- offsets = pcre2_get_ovector_pointer(match_data);
- if (UNEXPECTED(offsets[1] < offsets[0])) {
- PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
- if (result) {
- zend_string_release_ex(result, 0);
- result = NULL;
- }
- break;
- }
- if (replace_count) {
- ++*replace_count;
- }
- /* Set the match location in subject */
- match = subject + offsets[0];
- new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
- walk = ZSTR_VAL(replace_str);
- replace_end = walk + ZSTR_LEN(replace_str);
- walk_last = 0;
- simple_string = 1;
- while (walk < replace_end) {
- if ('\\' == *walk || '$' == *walk) {
- simple_string = 0;
- if (walk_last == '\\') {
- walk++;
- walk_last = 0;
- continue;
- }
- if (preg_get_backref(&walk, &backref)) {
- if (backref < count)
- new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
- continue;
- }
- }
- new_len++;
- walk++;
- walk_last = walk[-1];
- }
- if (new_len >= alloc_len) {
- alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
- if (result == NULL) {
- result = zend_string_alloc(alloc_len, 0);
- } else {
- result = zend_string_extend(result, alloc_len, 0);
- }
- }
- if (match-piece > 0) {
- /* copy the part of the string before the match */
- memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
- result_len += (match-piece);
- }
- if (simple_string) {
- /* copy replacement */
- memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
- result_len += ZSTR_LEN(replace_str);
- } else {
- /* copy replacement and backrefs */
- walkbuf = ZSTR_VAL(result) + result_len;
- walk = ZSTR_VAL(replace_str);
- walk_last = 0;
- while (walk < replace_end) {
- if ('\\' == *walk || '$' == *walk) {
- if (walk_last == '\\') {
- *(walkbuf-1) = *walk++;
- walk_last = 0;
- continue;
- }
- if (preg_get_backref(&walk, &backref)) {
- if (backref < count) {
- match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
- memcpy(walkbuf, subject + offsets[backref<<1], match_len);
- walkbuf += match_len;
- }
- continue;
- }
- }
- *walkbuf++ = *walk++;
- walk_last = walk[-1];
- }
- *walkbuf = '\0';
- /* increment the result length by how much we've added to the string */
- result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
- }
- limit--;
- /* Advance to the next piece. */
- start_offset = last_end_offset = offsets[1];
- /* If we have matched an empty string, mimic what Perl's /g options does.
- This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
- the match again at the same point. If this fails (picked up above) we
- advance to the next character. */
- if (start_offset == offsets[0]) {
- count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
- PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
- piece = subject + start_offset;
- if (count >= 0 && limit > 0) {
- goto matched;
- } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
- /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
- this is not necessarily the end. We need to advance
- the start offset, and continue. Fudge the offset values
- to achieve this, unless we're already at the end of the string. */
- if (start_offset < subject_len) {
- size_t unit_len = calculate_unit_length(pce, piece);
- start_offset += unit_len;
- } else {
- goto not_matched;
- }
- } else {
- goto error;
- }
- }
- } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
- not_matched:
- if (!result && subject_str) {
- result = zend_string_copy(subject_str);
- break;
- }
- /* now we know exactly how long it is */
- alloc_len = result_len + subject_len - last_end_offset;
- if (NULL != result) {
- result = zend_string_realloc(result, alloc_len, 0);
- } else {
- result = zend_string_alloc(alloc_len, 0);
- }
- /* stick that last bit of string on our output */
- memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
- result_len += subject_len - last_end_offset;
- ZSTR_VAL(result)[result_len] = '\0';
- ZSTR_LEN(result) = result_len;
- break;
- } else {
- error:
- pcre_handle_exec_error(count);
- if (result) {
- zend_string_release_ex(result, 0);
- result = NULL;
- }
- break;
- }
- #ifdef HAVE_PCRE_JIT_SUPPORT
- if (pce->preg_options & PREG_JIT) {
- count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
- PCRE2_NO_UTF_CHECK, match_data, mctx);
- } else
- #endif
- count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
- PCRE2_NO_UTF_CHECK, match_data, mctx);
- }
- if (match_data != mdata) {
- pcre2_match_data_free(match_data);
- }
- return result;
- }
- /* }}} */
- /* {{{ php_pcre_replace_func_impl() */
- static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)
- {
- uint32_t options; /* Execution options */
- int count; /* Count of matched subpatterns */
- PCRE2_SIZE *offsets; /* Array of subpattern offsets */
- zend_string **subpat_names; /* Array for named subpatterns */
- uint32_t num_subpats; /* Number of captured subpatterns */
- size_t new_len; /* Length of needed storage */
- size_t alloc_len; /* Actual allocated length */
- PCRE2_SIZE start_offset; /* Where the new search starts */
- size_t last_end_offset; /* Where the last search ended */
- const char *match, /* The current match */
- *piece; /* The current piece of subject */
- size_t result_len; /* Length of result */
- zend_string *result; /* Result of replacement */
- zend_string *eval_result; /* Result of custom function */
- pcre2_match_data *match_data;
- bool old_mdata_used;
- /* Calculate the size of the offsets array, and allocate memory for it. */
- num_subpats = pce->capture_count + 1;
- /*
- * Build a mapping from subpattern numbers to their names. We will
- * allocate the table only if there are any named subpatterns.
- */
- subpat_names = NULL;
- if (UNEXPECTED(pce->name_count > 0)) {
- subpat_names = make_subpats_table(num_subpats, pce);
- if (!subpat_names) {
- return NULL;
- }
- }
- alloc_len = 0;
- result = NULL;
- /* Initialize */
- match = NULL;
- start_offset = 0;
- last_end_offset = 0;
- result_len = 0;
- PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
- old_mdata_used = mdata_used;
- if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
- mdata_used = 1;
- match_data = mdata;
- } else {
- match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
- if (!match_data) {
- PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
- if (subpat_names) {
- free_subpats_table(subpat_names, num_subpats);
- }
- mdata_used = old_mdata_used;
- return NULL;
- }
- }
- options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
- /* Execute the regular expression. */
- #ifdef HAVE_PCRE_JIT_SUPPORT
- if ((pce->preg_options & PREG_JIT) && options) {
- count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
- PCRE2_NO_UTF_CHECK, match_data, mctx);
- } else
- #endif
- count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
- options, match_data, mctx);
- while (1) {
- piece = subject + last_end_offset;
- if (count >= 0 && limit) {
- /* Check for too many substrings condition. */
- if (UNEXPECTED(count == 0)) {
- php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
- count = num_subpats;
- }
- matched:
- offsets = pcre2_get_ovector_pointer(match_data);
- if (UNEXPECTED(offsets[1] < offsets[0])) {
- PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
- if (result) {
- zend_string_release_ex(result, 0);
- result = NULL;
- }
- break;
- }
- if (replace_count) {
- ++*replace_count;
- }
- /* Set the match location in subject */
- match = subject + offsets[0];
- new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
- /* Use custom function to get replacement string and its length. */
- eval_result = preg_do_repl_func(
- fci, fcc, subject, offsets, subpat_names, num_subpats, count,
- pcre2_get_mark(match_data), flags);
- ZEND_ASSERT(eval_result);
- new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
- if (new_len >= alloc_len) {
- alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
- if (result == NULL) {
- result = zend_string_alloc(alloc_len, 0);
- } else {
- result = zend_string_extend(result, alloc_len, 0);
- }
- }
- if (match-piece > 0) {
- /* copy the part of the string before the match */
- memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
- result_len += (match-piece);
- }
- /* If using custom function, copy result to the buffer and clean up. */
- memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
- result_len += ZSTR_LEN(eval_result);
- zend_string_release_ex(eval_result, 0);
- limit--;
- /* Advance to the next piece. */
- start_offset = last_end_offset = offsets[1];
- /* If we have matched an empty string, mimic what Perl's /g options does.
- This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
- the match again at the same point. If this fails (picked up above) we
- advance to the next character. */
- if (start_offset == offsets[0]) {
- count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
- PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
- piece = subject + start_offset;
- if (count >= 0 && limit) {
- goto matched;
- } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
- /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
- this is not necessarily the end. We need to advance
- the start offset, and continue. Fudge the offset values
- to achieve this, unless we're already at the end of the string. */
- if (start_offset < subject_len) {
- size_t unit_len = calculate_unit_length(pce, piece);
- start_offset += unit_len;
- } else {
- goto not_matched;
- }
- } else {
- goto error;
- }
- }
- } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
- not_matched:
- if (!result && subject_str) {
- result = zend_string_copy(subject_str);
- break;
- }
- /* now we know exactly how long it is */
- alloc_len = result_len + subject_len - last_end_offset;
- if (NULL != result) {
- result = zend_string_realloc(result, alloc_len, 0);
- } else {
- result = zend_string_alloc(alloc_len, 0);
- }
- /* stick that last bit of string on our output */
- memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
- result_len += subject_len - last_end_offset;
- ZSTR_VAL(result)[result_len] = '\0';
- ZSTR_LEN(result) = result_len;
- break;
- } else {
- error:
- pcre_handle_exec_error(count);
- if (result) {
- zend_string_release_ex(result, 0);
- result = NULL;
- }
- break;
- }
- #ifdef HAVE_PCRE_JIT_SUPPORT
- if ((pce->preg_options & PREG_JIT)) {
- count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
- PCRE2_NO_UTF_CHECK, match_data, mctx);
- } else
- #endif
- count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
- PCRE2_NO_UTF_CHECK, match_data, mctx);
- }
- if (match_data != mdata) {
- pcre2_match_data_free(match_data);
- }
- mdata_used = old_mdata_used;
- if (UNEXPECTED(subpat_names)) {
- free_subpats_table(subpat_names, num_subpats);
- }
- return result;
- }
- /* }}} */
- /* {{{ php_pcre_replace_func */
- static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
- zend_string *subject_str,
- zend_fcall_info *fci, zend_fcall_info_cache *fcc,
- size_t limit, size_t *replace_count, zend_long flags)
- {
- pcre_cache_entry *pce; /* Compiled regular expression */
- zend_string *result; /* Function result */
- /* Compile regex or get it from cache. */
- if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
- return NULL;
- }
- pce->refcount++;
- result = php_pcre_replace_func_impl(
- pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
- limit, replace_count, flags);
- pce->refcount--;
- return result;
- }
- /* }}} */
- /* {{{ php_pcre_replace_array */
- static zend_string *php_pcre_replace_array(HashTable *regex,
- zend_string *replace_str, HashTable *replace_ht,
- zend_string *subject_str, size_t limit, size_t *replace_count)
- {
- zval *regex_entry;
- zend_string *result;
- zend_string_addref(subject_str);
- if (replace_ht) {
- uint32_t replace_idx = 0;
- /* For each entry in the regex array, get the entry */
- ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
- /* Make sure we're dealing with strings. */
- zend_string *tmp_regex_str;
- zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
- zend_string *replace_entry_str, *tmp_replace_entry_str;
- zval *zv;
- /* Get current entry */
- while (1) {
- if (replace_idx == replace_ht->nNumUsed) {
- replace_entry_str = ZSTR_EMPTY_ALLOC();
- tmp_replace_entry_str = NULL;
- break;
- }
- zv = &replace_ht->arData[replace_idx].val;
- replace_idx++;
- if (Z_TYPE_P(zv) != IS_UNDEF) {
- replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str);
- break;
- }
- }
- /* Do the actual replacement and put the result back into subject_str
- for further replacements. */
- result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
- ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
- zend_tmp_string_release(tmp_replace_entry_str);
- zend_tmp_string_release(tmp_regex_str);
- zend_string_release_ex(subject_str, 0);
- subject_str = result;
- if (UNEXPECTED(result == NULL)) {
- break;
- }
- } ZEND_HASH_FOREACH_END();
- } else {
- ZEND_ASSERT(replace_str != NULL);
- /* For each entry in the regex array, get the entry */
- ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
- /* Make sure we're dealing with strings. */
- zend_string *tmp_regex_str;
- zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
- /* Do the actual replacement and put the result back into subject_str
- for further replacements. */
- result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
- ZSTR_LEN(subject_str), replace_str, limit, replace_count);
- zend_tmp_string_release(tmp_regex_str);
- zend_string_release_ex(subject_str, 0);
- subject_str = result;
- if (UNEXPECTED(result == NULL)) {
- break;
- }
- } ZEND_HASH_FOREACH_END();
- }
- return subject_str;
- }
- /* }}} */
- /* {{{ php_replace_in_subject */
- static zend_always_inline zend_string *php_replace_in_subject(
- zend_string *regex_str, HashTable *regex_ht,
- zend_string *replace_str, HashTable *replace_ht,
- zend_string *subject, size_t limit, size_t *replace_count)
- {
- zend_string *result;
- if (regex_str) {
- ZEND_ASSERT(replace_str != NULL);
- result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
- replace_str, limit, replace_count);
- } else {
- ZEND_ASSERT(regex_ht != NULL);
- result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
- limit, replace_count);
- }
- return result;
- }
- /* }}} */
- /* {{{ php_replace_in_subject_func */
- static zend_string *php_replace_in_subject_func(zend_string *regex_str, HashTable *regex_ht,
- zend_fcall_info *fci, zend_fcall_info_cache *fcc,
- zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
- {
- zend_string *result;
- if (regex_str) {
- result = php_pcre_replace_func(
- regex_str, subject, fci, fcc, limit, replace_count, flags);
- return result;
- } else {
- /* If regex is an array */
- zval *regex_entry;
- ZEND_ASSERT(regex_ht != NULL);
- zend_string_addref(subject);
- /* For each entry in the regex array, get the entry */
- ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) {
- /* Make sure we're dealing with strings. */
- zend_string *tmp_regex_entry_str;
- zend_string *regex_entry_str = zval_get_tmp_string(regex_entry, &tmp_regex_entry_str);
- /* Do the actual replacement and put the result back into subject
- for further replacements. */
- result = php_pcre_replace_func(
- regex_entry_str, subject, fci, fcc, limit, replace_count, flags);
- zend_tmp_string_release(tmp_regex_entry_str);
- zend_string_release(subject);
- subject = result;
- if (UNEXPECTED(result == NULL)) {
- break;
- }
- } ZEND_HASH_FOREACH_END();
- return subject;
- }
- }
- /* }}} */
- /* {{{ preg_replace_func_impl */
- static size_t preg_replace_func_impl(zval *return_value,
- zend_string *regex_str, HashTable *regex_ht,
- zend_fcall_info *fci, zend_fcall_info_cache *fcc,
- zend_string *subject_str, HashTable *subject_ht, zend_long limit_val, zend_long flags)
- {
- zend_string *result;
- size_t replace_count = 0;
- if (subject_str) {
- result = php_replace_in_subject_func(
- regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags);
- if (result != NULL) {
- RETVAL_STR(result);
- } else {
- RETVAL_NULL();
- }
- } else {
- /* if subject is an array */
- zval *subject_entry, zv;
- zend_string *string_key;
- zend_ulong num_key;
- ZEND_ASSERT(subject_ht != NULL);
- array_init_size(return_value, zend_hash_num_elements(subject_ht));
- /* For each subject entry, convert it to string, then perform replacement
- and add the result to the return_value array. */
- ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
- zend_string *tmp_subject_entry_str;
- zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
- result = php_replace_in_subject_func(
- regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags);
- if (result != NULL) {
- /* Add to return array */
- ZVAL_STR(&zv, result);
- if (string_key) {
- zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
- } else {
- zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
- }
- }
- zend_tmp_string_release(tmp_subject_entry_str);
- } ZEND_HASH_FOREACH_END();
- }
- return replace_count;
- }
- /* }}} */
- /* {{{ preg_replace_common */
- static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
- {
- zval *zcount = NULL;
- zend_string *regex_str;
- HashTable *regex_ht;
- zend_string *replace_str;
- HashTable *replace_ht;
- zend_string *subject_str;
- HashTable *subject_ht;
- zend_long limit = -1;
- size_t replace_count = 0;
- zend_string *result;
- size_t old_replace_count;
- /* Get function parameters and do error-checking. */
- ZEND_PARSE_PARAMETERS_START(3, 5)
- Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
- Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
- Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
- Z_PARAM_OPTIONAL
- Z_PARAM_LONG(limit)
- Z_PARAM_ZVAL(zcount)
- ZEND_PARSE_PARAMETERS_END();
- /* If replace is an array then the regex argument needs to also be an array */
- if (replace_ht && !regex_ht) {
- zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given");
- RETURN_THROWS();
- }
- if (subject_str) {
- old_replace_count = replace_count;
- result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
- subject_str, limit, &replace_count);
- if (result != NULL) {
- if (!is_filter || replace_count > old_replace_count) {
- RETVAL_STR(result);
- } else {
- zend_string_release_ex(result, 0);
- RETVAL_NULL();
- }
- } else {
- RETVAL_NULL();
- }
- } else {
- /* if subject is an array */
- zval *subject_entry, zv;
- zend_string *string_key;
- zend_ulong num_key;
- ZEND_ASSERT(subject_ht != NULL);
- array_init_size(return_value, zend_hash_num_elements(subject_ht));
- /* For each subject entry, convert it to string, then perform replacement
- and add the result to the return_value array. */
- ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
- old_replace_count = replace_count;
- zend_string *tmp_subject_entry_str;
- zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
- result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
- subject_entry_str, limit, &replace_count);
- if (result != NULL) {
- if (!is_filter || replace_count > old_replace_count) {
- /* Add to return array */
- ZVAL_STR(&zv, result);
- if (string_key) {
- zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
- } else {
- zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
- }
- } else {
- zend_string_release_ex(result, 0);
- }
- }
- zend_tmp_string_release(tmp_subject_entry_str);
- } ZEND_HASH_FOREACH_END();
- }
- if (zcount) {
- ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
- }
- }
- /* }}} */
- /* {{{ Perform Perl-style regular expression replacement. */
- PHP_FUNCTION(preg_replace)
- {
- preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
- }
- /* }}} */
- /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
- PHP_FUNCTION(preg_replace_callback)
- {
- zval *zcount = NULL;
- zend_string *regex_str;
- HashTable *regex_ht;
- zend_string *subject_str;
- HashTable *subject_ht;
- zend_long limit = -1, flags = 0;
- size_t replace_count;
- zend_fcall_info fci;
- zend_fcall_info_cache fcc;
- /* Get function parameters and do error-checking. */
- ZEND_PARSE_PARAMETERS_START(3, 6)
- Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
- Z_PARAM_FUNC(fci, fcc)
- Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
- Z_PARAM_OPTIONAL
- Z_PARAM_LONG(limit)
- Z_PARAM_ZVAL(zcount)
- Z_PARAM_LONG(flags)
- ZEND_PARSE_PARAMETERS_END();
- replace_count = preg_replace_func_impl(return_value, regex_str, regex_ht,
- &fci, &fcc,
- subject_str, subject_ht, limit, flags);
- if (zcount) {
- ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
- }
- }
- /* }}} */
- /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
- PHP_FUNCTION(preg_replace_callback_array)
- {
- zval zv, *replace, *zcount = NULL;
- HashTable *pattern, *subject_ht;
- zend_string *subject_str, *str_idx_regex;
- zend_long limit = -1, flags = 0;
- size_t replace_count = 0;
- zend_fcall_info fci;
- zend_fcall_info_cache fcc;
- /* Get function parameters and do error-checking. */
- ZEND_PARSE_PARAMETERS_START(2, 5)
- Z_PARAM_ARRAY_HT(pattern)
- Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
- Z_PARAM_OPTIONAL
- Z_PARAM_LONG(limit)
- Z_PARAM_ZVAL(zcount)
- Z_PARAM_LONG(flags)
- ZEND_PARSE_PARAMETERS_END();
- fci.size = sizeof(fci);
- fci.object = NULL;
- fci.named_params = NULL;
- if (subject_ht) {
- GC_TRY_ADDREF(subject_ht);
- } else {
- GC_TRY_ADDREF(subject_str);
- }
- ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
- if (!str_idx_regex) {
- php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
- RETVAL_NULL();
- goto error;
- }
- if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
- zend_argument_type_error(1, "must contain only valid callbacks");
- goto error;
- }
- ZVAL_COPY_VALUE(&fci.function_name, replace);
- replace_count += preg_replace_func_impl(&zv, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc,
- subject_str, subject_ht, limit, flags);
- switch (Z_TYPE(zv)) {
- case IS_ARRAY:
- ZEND_ASSERT(subject_ht);
- zend_array_release(subject_ht);
- subject_ht = Z_ARR(zv);
- break;
- case IS_STRING:
- ZEND_ASSERT(subject_str);
- zend_string_release(subject_str);
- subject_str = Z_STR(zv);
- break;
- case IS_NULL:
- RETVAL_NULL();
- goto error;
- EMPTY_SWITCH_DEFAULT_CASE()
- }
- if (EG(exception)) {
- goto error;
- }
- } ZEND_HASH_FOREACH_END();
- if (zcount) {
- ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
- }
- if (subject_ht) {
- RETURN_ARR(subject_ht);
- } else {
- RETURN_STR(subject_str);
- }
- error:
- if (subject_ht) {
- zend_array_release(subject_ht);
- } else {
- zend_string_release(subject_str);
- }
- }
- /* }}} */
- /* {{{ Perform Perl-style regular expression replacement and only return matches. */
- PHP_FUNCTION(preg_filter)
- {
- preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
- }
- /* }}} */
- /* {{{ Split string into an array using a perl-style regular expression as a delimiter */
- PHP_FUNCTION(preg_split)
- {
- zend_string *regex; /* Regular expression */
- zend_string *subject; /* String to match against */
- zend_long limit_val = -1;/* Integer value of limit */
- zend_long flags = 0; /* Match control flags */
- pcre_cache_entry *pce; /* Compiled regular expression */
- /* Get function parameters and do error checking */
- ZEND_PARSE_PARAMETERS_START(2, 4)
- Z_PARAM_STR(regex)
- Z_PARAM_STR(subject)
- Z_PARAM_OPTIONAL
- Z_PARAM_LONG(limit_val)
- Z_PARAM_LONG(flags)
- ZEND_PARSE_PARAMETERS_END();
- /* Compile regex or get it from cache. */
- if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
- RETURN_FALSE;
- }
- pce->refcount++;
- php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
- pce->refcount--;
- }
- /* }}} */
- /* {{{ php_pcre_split */
- PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
- zend_long limit_val, zend_long flags)
- {
- PCRE2_SIZE *offsets; /* Array of subpattern offsets */
- uint32_t options; /* Execution options */
- int count; /* Count of matched subpatterns */
- PCRE2_SIZE start_offset; /* Where the new search starts */
- PCRE2_SIZE last_match_offset; /* Location of last match */
- uint32_t no_empty; /* If NO_EMPTY flag is set */
- uint32_t delim_capture; /* If delimiters should be captured */
- uint32_t offset_capture; /* If offsets should be captured */
- uint32_t num_subpats; /* Number of captured subpatterns */
- zval tmp;
- pcre2_match_data *match_data;
- char *subject = ZSTR_VAL(subject_str);
- no_empty = flags & PREG_SPLIT_NO_EMPTY;
- delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
- offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
- /* Initialize return value */
- array_init(return_value);
- /* Calculate the size of the offsets array, and allocate memory for it. */
- num_subpats = pce->capture_count + 1;
- /* Start at the beginning of the string */
- start_offset = 0;
- last_match_offset = 0;
- PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
- if (limit_val == -1) {
- /* pass */
- } else if (limit_val == 0) {
- limit_val = -1;
- } else if (limit_val <= 1) {
- goto last;
- }
- if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
- match_data = mdata;
- } else {
- match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
- if (!match_data) {
- PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
- zval_ptr_dtor(return_value);
- RETURN_FALSE;
- }
- }
- options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
- #ifdef HAVE_PCRE_JIT_SUPPORT
- if ((pce->preg_options & PREG_JIT) && options) {
- count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
- PCRE2_NO_UTF_CHECK, match_data, mctx);
- } else
- #endif
- count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
- options, match_data, mctx);
- while (1) {
- /* If something matched */
- if (count >= 0) {
- /* Check for too many substrings condition. */
- if (UNEXPECTED(count == 0)) {
- php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
- count = num_subpats;
- }
- matched:
- offsets = pcre2_get_ovector_pointer(match_data);
- if (UNEXPECTED(offsets[1] < offsets[0])) {
- PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
- break;
- }
- if (!no_empty || offsets[0] != last_match_offset) {
- if (offset_capture) {
- /* Add (match, offset) pair to the return value */
- add_offset_pair(
- return_value, subject, last_match_offset, offsets[0],
- NULL, 0);
- } else {
- /* Add the piece to the return value */
- populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
- zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
- }
- /* One less left to do */
- if (limit_val != -1)
- limit_val--;
- }
- if (delim_capture) {
- size_t i;
- for (i = 1; i < count; i++) {
- /* If we have matched a delimiter */
- if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
- if (offset_capture) {
- add_offset_pair(
- return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
- } else {
- populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
- zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
- }
- }
- }
- }
- /* Advance to the position right after the last full match */
- start_offset = last_match_offset = offsets[1];
- /* If we have matched an empty string, mimic what Perl's /g options does.
- This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
- the match again at the same point. If this fails (picked up above) we
- advance to the next character. */
- if (start_offset == offsets[0]) {
- /* Get next piece if no limit or limit not yet reached and something matched*/
- if (limit_val != -1 && limit_val <= 1) {
- break;
- }
- count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
- PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
- if (count >= 0) {
- goto matched;
- } else if (count == PCRE2_ERROR_NOMATCH) {
- /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
- this is not necessarily the end. We need to advance
- the start offset, and continue. Fudge the offset values
- to achieve this, unless we're already at the end of the string. */
- if (start_offset < ZSTR_LEN(subject_str)) {
- start_offset += calculate_unit_length(pce, subject + start_offset);
- } else {
- break;
- }
- } else {
- goto error;
- }
- }
- } else if (count == PCRE2_ERROR_NOMATCH) {
- break;
- } else {
- error:
- pcre_handle_exec_error(count);
- break;
- }
- /* Get next piece if no limit or limit not yet reached and something matched*/
- if (limit_val != -1 && limit_val <= 1) {
- break;
- }
- #ifdef HAVE_PCRE_JIT_SUPPORT
- if (pce->preg_options & PREG_JIT) {
- count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
- PCRE2_NO_UTF_CHECK, match_data, mctx);
- } else
- #endif
- count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
- PCRE2_NO_UTF_CHECK, match_data, mctx);
- }
- if (match_data != mdata) {
- pcre2_match_data_free(match_data);
- }
- if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
- zval_ptr_dtor(return_value);
- RETURN_FALSE;
- }
- last:
- start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
- if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
- if (offset_capture) {
- /* Add the last (match, offset) pair to the return value */
- add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
- } else {
- /* Add the last piece to the return value */
- if (start_offset == 0) {
- ZVAL_STR_COPY(&tmp, subject_str);
- } else {
- populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
- }
- zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
- }
- }
- }
- /* }}} */
- /* {{{ Quote regular expression characters plus an optional character */
- PHP_FUNCTION(preg_quote)
- {
- zend_string *str; /* Input string argument */
- zend_string *delim = NULL; /* Additional delimiter argument */
- char *in_str; /* Input string */
- char *in_str_end; /* End of the input string */
- zend_string *out_str; /* Output string with quoted characters */
- size_t extra_len; /* Number of additional characters */
- char *p, /* Iterator for input string */
- *q, /* Iterator for output string */
- delim_char = '\0', /* Delimiter character to be quoted */
- c; /* Current character */
- /* Get the arguments and check for errors */
- ZEND_PARSE_PARAMETERS_START(1, 2)
- Z_PARAM_STR(str)
- Z_PARAM_OPTIONAL
- Z_PARAM_STR_OR_NULL(delim)
- ZEND_PARSE_PARAMETERS_END();
- /* Nothing to do if we got an empty string */
- if (ZSTR_LEN(str) == 0) {
- RETURN_EMPTY_STRING();
- }
- in_str = ZSTR_VAL(str);
- in_str_end = in_str + ZSTR_LEN(str);
- if (delim) {
- delim_char = ZSTR_VAL(delim)[0];
- }
- /* Go through the string and quote necessary characters */
- extra_len = 0;
- p = in_str;
- do {
- c = *p;
- switch(c) {
- case '.':
- case '\\':
- case '+':
- case '*':
- case '?':
- case '[':
- case '^':
- case ']':
- case '$':
- case '(':
- case ')':
- case '{':
- case '}':
- case '=':
- case '!':
- case '>':
- case '<':
- case '|':
- case ':':
- case '-':
- case '#':
- extra_len++;
- break;
- case '\0':
- extra_len+=3;
- break;
- default:
- if (c == delim_char) {
- extra_len++;
- }
- break;
- }
- p++;
- } while (p != in_str_end);
- if (extra_len == 0) {
- RETURN_STR_COPY(str);
- }
- /* Allocate enough memory so that even if each character
- is quoted, we won't run out of room */
- out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
- q = ZSTR_VAL(out_str);
- p = in_str;
- do {
- c = *p;
- switch(c) {
- case '.':
- case '\\':
- case '+':
- case '*':
- case '?':
- case '[':
- case '^':
- case ']':
- case '$':
- case '(':
- case ')':
- case '{':
- case '}':
- case '=':
- case '!':
- case '>':
- case '<':
- case '|':
- case ':':
- case '-':
- case '#':
- *q++ = '\\';
- *q++ = c;
- break;
- case '\0':
- *q++ = '\\';
- *q++ = '0';
- *q++ = '0';
- *q++ = '0';
- break;
- default:
- if (c == delim_char) {
- *q++ = '\\';
- }
- *q++ = c;
- break;
- }
- p++;
- } while (p != in_str_end);
- *q = '\0';
- RETURN_NEW_STR(out_str);
- }
- /* }}} */
- /* {{{ Searches array and returns entries which match regex */
- PHP_FUNCTION(preg_grep)
- {
- zend_string *regex; /* Regular expression */
- zval *input; /* Input array */
- zend_long flags = 0; /* Match control flags */
- pcre_cache_entry *pce; /* Compiled regular expression */
- /* Get arguments and do error checking */
- ZEND_PARSE_PARAMETERS_START(2, 3)
- Z_PARAM_STR(regex)
- Z_PARAM_ARRAY(input)
- Z_PARAM_OPTIONAL
- Z_PARAM_LONG(flags)
- ZEND_PARSE_PARAMETERS_END();
- /* Compile regex or get it from cache. */
- if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
- RETURN_FALSE;
- }
- pce->refcount++;
- php_pcre_grep_impl(pce, input, return_value, flags);
- pce->refcount--;
- }
- /* }}} */
- PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
- {
- zval *entry; /* An entry in the input array */
- uint32_t num_subpats; /* Number of captured subpatterns */
- int count; /* Count of matched subpatterns */
- uint32_t options; /* Execution options */
- zend_string *string_key;
- zend_ulong num_key;
- bool invert; /* Whether to return non-matching
- entries */
- pcre2_match_data *match_data;
- invert = flags & PREG_GREP_INVERT ? 1 : 0;
- /* Calculate the size of the offsets array, and allocate memory for it. */
- num_subpats = pce->capture_count + 1;
- /* Initialize return array */
- array_init(return_value);
- PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
- if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
- match_data = mdata;
- } else {
- match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
- if (!match_data) {
- PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
- return;
- }
- }
- options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
- /* Go through the input array */
- ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
- zend_string *tmp_subject_str;
- zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
- /* Perform the match */
- #ifdef HAVE_PCRE_JIT_SUPPORT
- if ((pce->preg_options & PREG_JIT) && options) {
- count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
- PCRE2_NO_UTF_CHECK, match_data, mctx);
- } else
- #endif
- count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
- options, match_data, mctx);
- /* If the entry fits our requirements */
- if (count >= 0) {
- /* Check for too many substrings condition. */
- if (UNEXPECTED(count == 0)) {
- php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
- }
- if (!invert) {
- Z_TRY_ADDREF_P(entry);
- /* Add to return array */
- if (string_key) {
- zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
- } else {
- zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
- }
- }
- } else if (count == PCRE2_ERROR_NOMATCH) {
- if (invert) {
- Z_TRY_ADDREF_P(entry);
- /* Add to return array */
- if (string_key) {
- zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
- } else {
- zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
- }
- }
- } else {
- pcre_handle_exec_error(count);
- zend_tmp_string_release(tmp_subject_str);
- break;
- }
- zend_tmp_string_release(tmp_subject_str);
- } ZEND_HASH_FOREACH_END();
- if (match_data != mdata) {
- pcre2_match_data_free(match_data);
- }
- }
- /* }}} */
- /* {{{ Returns the error code of the last regexp execution. */
- PHP_FUNCTION(preg_last_error)
- {
- ZEND_PARSE_PARAMETERS_NONE();
- RETURN_LONG(PCRE_G(error_code));
- }
- /* }}} */
- /* {{{ Returns the error message of the last regexp execution. */
- PHP_FUNCTION(preg_last_error_msg)
- {
- ZEND_PARSE_PARAMETERS_NONE();
- RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
- }
- /* }}} */
- /* {{{ module definition structures */
- zend_module_entry pcre_module_entry = {
- STANDARD_MODULE_HEADER,
- "pcre",
- ext_functions,
- PHP_MINIT(pcre),
- PHP_MSHUTDOWN(pcre),
- PHP_RINIT(pcre),
- PHP_RSHUTDOWN(pcre),
- PHP_MINFO(pcre),
- PHP_PCRE_VERSION,
- PHP_MODULE_GLOBALS(pcre),
- PHP_GINIT(pcre),
- PHP_GSHUTDOWN(pcre),
- NULL,
- STANDARD_MODULE_PROPERTIES_EX
- };
- #ifdef COMPILE_DL_PCRE
- ZEND_GET_MODULE(pcre)
- #endif
- /* }}} */
- PHPAPI pcre2_match_context *php_pcre_mctx(void)
- {/*{{{*/
- return mctx;
- }/*}}}*/
- PHPAPI pcre2_general_context *php_pcre_gctx(void)
- {/*{{{*/
- return gctx;
- }/*}}}*/
- PHPAPI pcre2_compile_context *php_pcre_cctx(void)
- {/*{{{*/
- return cctx;
- }/*}}}*/
- PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
- {/*{{{*/
- assert(NULL != pce);
- pce->refcount++;
- }/*}}}*/
- PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
- {/*{{{*/
- assert(NULL != pce);
- assert(0 != pce->refcount);
- pce->refcount--;
- }/*}}}*/
- PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
- {/*{{{*/
- assert(NULL != pce);
- return pce->re;
- }/*}}}*/
|