pcre2_jit_compile.c 429 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259
  1. /*************************************************
  2. * Perl-Compatible Regular Expressions *
  3. *************************************************/
  4. /* PCRE is a library of functions to support regular expressions whose syntax
  5. and semantics are as close as possible to those of the Perl 5 language.
  6. Written by Philip Hazel
  7. This module by Zoltan Herczeg
  8. Original API code Copyright (c) 1997-2012 University of Cambridge
  9. New API code Copyright (c) 2016-2019 University of Cambridge
  10. -----------------------------------------------------------------------------
  11. Redistribution and use in source and binary forms, with or without
  12. modification, are permitted provided that the following conditions are met:
  13. * Redistributions of source code must retain the above copyright notice,
  14. this list of conditions and the following disclaimer.
  15. * Redistributions in binary form must reproduce the above copyright
  16. notice, this list of conditions and the following disclaimer in the
  17. documentation and/or other materials provided with the distribution.
  18. * Neither the name of the University of Cambridge nor the names of its
  19. contributors may be used to endorse or promote products derived from
  20. this software without specific prior written permission.
  21. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  25. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26. CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27. SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28. INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29. CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30. ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31. POSSIBILITY OF SUCH DAMAGE.
  32. -----------------------------------------------------------------------------
  33. */
  34. #ifdef HAVE_CONFIG_H
  35. #include "config.h"
  36. #endif
  37. #include "pcre2_internal.h"
  38. #ifdef SUPPORT_JIT
  39. /* All-in-one: Since we use the JIT compiler only from here,
  40. we just include it. This way we don't need to touch the build
  41. system files. */
  42. #define SLJIT_CONFIG_AUTO 1
  43. #define SLJIT_CONFIG_STATIC 1
  44. #define SLJIT_VERBOSE 0
  45. #ifdef PCRE2_DEBUG
  46. #define SLJIT_DEBUG 1
  47. #else
  48. #define SLJIT_DEBUG 0
  49. #endif
  50. #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
  51. #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
  52. static void * pcre2_jit_malloc(size_t size, void *allocator_data)
  53. {
  54. pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
  55. return allocator->malloc(size, allocator->memory_data);
  56. }
  57. static void pcre2_jit_free(void *ptr, void *allocator_data)
  58. {
  59. pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
  60. allocator->free(ptr, allocator->memory_data);
  61. }
  62. #include "sljit/sljitLir.c"
  63. #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
  64. #error Unsupported architecture
  65. #endif
  66. /* Defines for debugging purposes. */
  67. /* 1 - Use unoptimized capturing brackets.
  68. 2 - Enable capture_last_ptr (includes option 1). */
  69. /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
  70. /* 1 - Always have a control head. */
  71. /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
  72. /* Allocate memory for the regex stack on the real machine stack.
  73. Fast, but limited size. */
  74. #define MACHINE_STACK_SIZE 32768
  75. /* Growth rate for stack allocated by the OS. Should be the multiply
  76. of page size. */
  77. #define STACK_GROWTH_RATE 8192
  78. /* Enable to check that the allocation could destroy temporaries. */
  79. #if defined SLJIT_DEBUG && SLJIT_DEBUG
  80. #define DESTROY_REGISTERS 1
  81. #endif
  82. /*
  83. Short summary about the backtracking mechanism empolyed by the jit code generator:
  84. The code generator follows the recursive nature of the PERL compatible regular
  85. expressions. The basic blocks of regular expressions are condition checkers
  86. whose execute different commands depending on the result of the condition check.
  87. The relationship between the operators can be horizontal (concatenation) and
  88. vertical (sub-expression) (See struct backtrack_common for more details).
  89. 'ab' - 'a' and 'b' regexps are concatenated
  90. 'a+' - 'a' is the sub-expression of the '+' operator
  91. The condition checkers are boolean (true/false) checkers. Machine code is generated
  92. for the checker itself and for the actions depending on the result of the checker.
  93. The 'true' case is called as the matching path (expected path), and the other is called as
  94. the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
  95. branches on the matching path.
  96. Greedy star operator (*) :
  97. Matching path: match happens.
  98. Backtrack path: match failed.
  99. Non-greedy star operator (*?) :
  100. Matching path: no need to perform a match.
  101. Backtrack path: match is required.
  102. The following example shows how the code generated for a capturing bracket
  103. with two alternatives. Let A, B, C, D are arbirary regular expressions, and
  104. we have the following regular expression:
  105. A(B|C)D
  106. The generated code will be the following:
  107. A matching path
  108. '(' matching path (pushing arguments to the stack)
  109. B matching path
  110. ')' matching path (pushing arguments to the stack)
  111. D matching path
  112. return with successful match
  113. D backtrack path
  114. ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
  115. B backtrack path
  116. C expected path
  117. jump to D matching path
  118. C backtrack path
  119. A backtrack path
  120. Notice, that the order of backtrack code paths are the opposite of the fast
  121. code paths. In this way the topmost value on the stack is always belong
  122. to the current backtrack code path. The backtrack path must check
  123. whether there is a next alternative. If so, it needs to jump back to
  124. the matching path eventually. Otherwise it needs to clear out its own stack
  125. frame and continue the execution on the backtrack code paths.
  126. */
  127. /*
  128. Saved stack frames:
  129. Atomic blocks and asserts require reloading the values of private data
  130. when the backtrack mechanism performed. Because of OP_RECURSE, the data
  131. are not necessarly known in compile time, thus we need a dynamic restore
  132. mechanism.
  133. The stack frames are stored in a chain list, and have the following format:
  134. ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
  135. Thus we can restore the private data to a particular point in the stack.
  136. */
  137. typedef struct jit_arguments {
  138. /* Pointers first. */
  139. struct sljit_stack *stack;
  140. PCRE2_SPTR str;
  141. PCRE2_SPTR begin;
  142. PCRE2_SPTR end;
  143. pcre2_match_data *match_data;
  144. PCRE2_SPTR startchar_ptr;
  145. PCRE2_UCHAR *mark_ptr;
  146. int (*callout)(pcre2_callout_block *, void *);
  147. void *callout_data;
  148. /* Everything else after. */
  149. sljit_uw offset_limit;
  150. sljit_u32 limit_match;
  151. sljit_u32 oveccount;
  152. sljit_u32 options;
  153. } jit_arguments;
  154. #define JIT_NUMBER_OF_COMPILE_MODES 3
  155. typedef struct executable_functions {
  156. void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
  157. void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
  158. sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
  159. sljit_u32 top_bracket;
  160. sljit_u32 limit_match;
  161. } executable_functions;
  162. typedef struct jump_list {
  163. struct sljit_jump *jump;
  164. struct jump_list *next;
  165. } jump_list;
  166. typedef struct stub_list {
  167. struct sljit_jump *start;
  168. struct sljit_label *quit;
  169. struct stub_list *next;
  170. } stub_list;
  171. enum frame_types {
  172. no_frame = -1,
  173. no_stack = -2
  174. };
  175. enum control_types {
  176. type_mark = 0,
  177. type_then_trap = 1
  178. };
  179. enum early_fail_types {
  180. type_skip = 0,
  181. type_fail = 1,
  182. type_fail_range = 2
  183. };
  184. typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
  185. /* The following structure is the key data type for the recursive
  186. code generator. It is allocated by compile_matchingpath, and contains
  187. the arguments for compile_backtrackingpath. Must be the first member
  188. of its descendants. */
  189. typedef struct backtrack_common {
  190. /* Concatenation stack. */
  191. struct backtrack_common *prev;
  192. jump_list *nextbacktracks;
  193. /* Internal stack (for component operators). */
  194. struct backtrack_common *top;
  195. jump_list *topbacktracks;
  196. /* Opcode pointer. */
  197. PCRE2_SPTR cc;
  198. } backtrack_common;
  199. typedef struct assert_backtrack {
  200. backtrack_common common;
  201. jump_list *condfailed;
  202. /* Less than 0 if a frame is not needed. */
  203. int framesize;
  204. /* Points to our private memory word on the stack. */
  205. int private_data_ptr;
  206. /* For iterators. */
  207. struct sljit_label *matchingpath;
  208. } assert_backtrack;
  209. typedef struct bracket_backtrack {
  210. backtrack_common common;
  211. /* Where to coninue if an alternative is successfully matched. */
  212. struct sljit_label *alternative_matchingpath;
  213. /* For rmin and rmax iterators. */
  214. struct sljit_label *recursive_matchingpath;
  215. /* For greedy ? operator. */
  216. struct sljit_label *zero_matchingpath;
  217. /* Contains the branches of a failed condition. */
  218. union {
  219. /* Both for OP_COND, OP_SCOND. */
  220. jump_list *condfailed;
  221. assert_backtrack *assert;
  222. /* For OP_ONCE. Less than 0 if not needed. */
  223. int framesize;
  224. /* For brackets with >3 alternatives. */
  225. struct sljit_put_label *matching_put_label;
  226. } u;
  227. /* Points to our private memory word on the stack. */
  228. int private_data_ptr;
  229. } bracket_backtrack;
  230. typedef struct bracketpos_backtrack {
  231. backtrack_common common;
  232. /* Points to our private memory word on the stack. */
  233. int private_data_ptr;
  234. /* Reverting stack is needed. */
  235. int framesize;
  236. /* Allocated stack size. */
  237. int stacksize;
  238. } bracketpos_backtrack;
  239. typedef struct braminzero_backtrack {
  240. backtrack_common common;
  241. struct sljit_label *matchingpath;
  242. } braminzero_backtrack;
  243. typedef struct char_iterator_backtrack {
  244. backtrack_common common;
  245. /* Next iteration. */
  246. struct sljit_label *matchingpath;
  247. union {
  248. jump_list *backtracks;
  249. struct {
  250. unsigned int othercasebit;
  251. PCRE2_UCHAR chr;
  252. BOOL enabled;
  253. } charpos;
  254. } u;
  255. } char_iterator_backtrack;
  256. typedef struct ref_iterator_backtrack {
  257. backtrack_common common;
  258. /* Next iteration. */
  259. struct sljit_label *matchingpath;
  260. } ref_iterator_backtrack;
  261. typedef struct recurse_entry {
  262. struct recurse_entry *next;
  263. /* Contains the function entry label. */
  264. struct sljit_label *entry_label;
  265. /* Contains the function entry label. */
  266. struct sljit_label *backtrack_label;
  267. /* Collects the entry calls until the function is not created. */
  268. jump_list *entry_calls;
  269. /* Collects the backtrack calls until the function is not created. */
  270. jump_list *backtrack_calls;
  271. /* Points to the starting opcode. */
  272. sljit_sw start;
  273. } recurse_entry;
  274. typedef struct recurse_backtrack {
  275. backtrack_common common;
  276. /* Return to the matching path. */
  277. struct sljit_label *matchingpath;
  278. /* Recursive pattern. */
  279. recurse_entry *entry;
  280. /* Pattern is inlined. */
  281. BOOL inlined_pattern;
  282. } recurse_backtrack;
  283. #define OP_THEN_TRAP OP_TABLE_LENGTH
  284. typedef struct then_trap_backtrack {
  285. backtrack_common common;
  286. /* If then_trap is not NULL, this structure contains the real
  287. then_trap for the backtracking path. */
  288. struct then_trap_backtrack *then_trap;
  289. /* Points to the starting opcode. */
  290. sljit_sw start;
  291. /* Exit point for the then opcodes of this alternative. */
  292. jump_list *quit;
  293. /* Frame size of the current alternative. */
  294. int framesize;
  295. } then_trap_backtrack;
  296. #define MAX_N_CHARS 12
  297. #define MAX_DIFF_CHARS 5
  298. typedef struct fast_forward_char_data {
  299. /* Number of characters in the chars array, 255 for any character. */
  300. sljit_u8 count;
  301. /* Number of last UTF-8 characters in the chars array. */
  302. sljit_u8 last_count;
  303. /* Available characters in the current position. */
  304. PCRE2_UCHAR chars[MAX_DIFF_CHARS];
  305. } fast_forward_char_data;
  306. #define MAX_CLASS_RANGE_SIZE 4
  307. #define MAX_CLASS_CHARS_SIZE 3
  308. typedef struct compiler_common {
  309. /* The sljit ceneric compiler. */
  310. struct sljit_compiler *compiler;
  311. /* Compiled regular expression. */
  312. pcre2_real_code *re;
  313. /* First byte code. */
  314. PCRE2_SPTR start;
  315. /* Maps private data offset to each opcode. */
  316. sljit_s32 *private_data_ptrs;
  317. /* Chain list of read-only data ptrs. */
  318. void *read_only_data_head;
  319. /* Tells whether the capturing bracket is optimized. */
  320. sljit_u8 *optimized_cbracket;
  321. /* Tells whether the starting offset is a target of then. */
  322. sljit_u8 *then_offsets;
  323. /* Current position where a THEN must jump. */
  324. then_trap_backtrack *then_trap;
  325. /* Starting offset of private data for capturing brackets. */
  326. sljit_s32 cbra_ptr;
  327. /* Output vector starting point. Must be divisible by 2. */
  328. sljit_s32 ovector_start;
  329. /* Points to the starting character of the current match. */
  330. sljit_s32 start_ptr;
  331. /* Last known position of the requested byte. */
  332. sljit_s32 req_char_ptr;
  333. /* Head of the last recursion. */
  334. sljit_s32 recursive_head_ptr;
  335. /* First inspected character for partial matching.
  336. (Needed for avoiding zero length partial matches.) */
  337. sljit_s32 start_used_ptr;
  338. /* Starting pointer for partial soft matches. */
  339. sljit_s32 hit_start;
  340. /* Pointer of the match end position. */
  341. sljit_s32 match_end_ptr;
  342. /* Points to the marked string. */
  343. sljit_s32 mark_ptr;
  344. /* Recursive control verb management chain. */
  345. sljit_s32 control_head_ptr;
  346. /* Points to the last matched capture block index. */
  347. sljit_s32 capture_last_ptr;
  348. /* Fast forward skipping byte code pointer. */
  349. PCRE2_SPTR fast_forward_bc_ptr;
  350. /* Locals used by fast fail optimization. */
  351. sljit_s32 early_fail_start_ptr;
  352. sljit_s32 early_fail_end_ptr;
  353. /* Flipped and lower case tables. */
  354. const sljit_u8 *fcc;
  355. sljit_sw lcc;
  356. /* Mode can be PCRE2_JIT_COMPLETE and others. */
  357. int mode;
  358. /* TRUE, when empty match is accepted for partial matching. */
  359. BOOL allow_empty_partial;
  360. /* TRUE, when minlength is greater than 0. */
  361. BOOL might_be_empty;
  362. /* \K is found in the pattern. */
  363. BOOL has_set_som;
  364. /* (*SKIP:arg) is found in the pattern. */
  365. BOOL has_skip_arg;
  366. /* (*THEN) is found in the pattern. */
  367. BOOL has_then;
  368. /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
  369. BOOL has_skip_in_assert_back;
  370. /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
  371. BOOL local_quit_available;
  372. /* Currently in a positive assertion. */
  373. BOOL in_positive_assertion;
  374. /* Newline control. */
  375. int nltype;
  376. sljit_u32 nlmax;
  377. sljit_u32 nlmin;
  378. int newline;
  379. int bsr_nltype;
  380. sljit_u32 bsr_nlmax;
  381. sljit_u32 bsr_nlmin;
  382. /* Dollar endonly. */
  383. int endonly;
  384. /* Tables. */
  385. sljit_sw ctypes;
  386. /* Named capturing brackets. */
  387. PCRE2_SPTR name_table;
  388. sljit_sw name_count;
  389. sljit_sw name_entry_size;
  390. /* Labels and jump lists. */
  391. struct sljit_label *partialmatchlabel;
  392. struct sljit_label *quit_label;
  393. struct sljit_label *abort_label;
  394. struct sljit_label *accept_label;
  395. struct sljit_label *ff_newline_shortcut;
  396. stub_list *stubs;
  397. recurse_entry *entries;
  398. recurse_entry *currententry;
  399. jump_list *partialmatch;
  400. jump_list *quit;
  401. jump_list *positive_assertion_quit;
  402. jump_list *abort;
  403. jump_list *failed_match;
  404. jump_list *accept;
  405. jump_list *calllimit;
  406. jump_list *stackalloc;
  407. jump_list *revertframes;
  408. jump_list *wordboundary;
  409. jump_list *anynewline;
  410. jump_list *hspace;
  411. jump_list *vspace;
  412. jump_list *casefulcmp;
  413. jump_list *caselesscmp;
  414. jump_list *reset_match;
  415. BOOL unset_backref;
  416. BOOL alt_circumflex;
  417. #ifdef SUPPORT_UNICODE
  418. BOOL utf;
  419. BOOL invalid_utf;
  420. BOOL ucp;
  421. /* Points to saving area for iref. */
  422. sljit_s32 iref_ptr;
  423. jump_list *getucd;
  424. jump_list *getucdtype;
  425. #if PCRE2_CODE_UNIT_WIDTH == 8
  426. jump_list *utfreadchar;
  427. jump_list *utfreadtype8;
  428. jump_list *utfpeakcharback;
  429. #endif
  430. #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
  431. jump_list *utfreadchar_invalid;
  432. jump_list *utfreadnewline_invalid;
  433. jump_list *utfmoveback_invalid;
  434. jump_list *utfpeakcharback_invalid;
  435. #endif
  436. #endif /* SUPPORT_UNICODE */
  437. } compiler_common;
  438. /* For byte_sequence_compare. */
  439. typedef struct compare_context {
  440. int length;
  441. int sourcereg;
  442. #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
  443. int ucharptr;
  444. union {
  445. sljit_s32 asint;
  446. sljit_u16 asushort;
  447. #if PCRE2_CODE_UNIT_WIDTH == 8
  448. sljit_u8 asbyte;
  449. sljit_u8 asuchars[4];
  450. #elif PCRE2_CODE_UNIT_WIDTH == 16
  451. sljit_u16 asuchars[2];
  452. #elif PCRE2_CODE_UNIT_WIDTH == 32
  453. sljit_u32 asuchars[1];
  454. #endif
  455. } c;
  456. union {
  457. sljit_s32 asint;
  458. sljit_u16 asushort;
  459. #if PCRE2_CODE_UNIT_WIDTH == 8
  460. sljit_u8 asbyte;
  461. sljit_u8 asuchars[4];
  462. #elif PCRE2_CODE_UNIT_WIDTH == 16
  463. sljit_u16 asuchars[2];
  464. #elif PCRE2_CODE_UNIT_WIDTH == 32
  465. sljit_u32 asuchars[1];
  466. #endif
  467. } oc;
  468. #endif
  469. } compare_context;
  470. /* Undefine sljit macros. */
  471. #undef CMP
  472. /* Used for accessing the elements of the stack. */
  473. #define STACK(i) ((i) * (int)sizeof(sljit_sw))
  474. #ifdef SLJIT_PREF_SHIFT_REG
  475. #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
  476. /* Nothing. */
  477. #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
  478. #define SHIFT_REG_IS_R3
  479. #else
  480. #error "Unsupported shift register"
  481. #endif
  482. #endif
  483. #define TMP1 SLJIT_R0
  484. #ifdef SHIFT_REG_IS_R3
  485. #define TMP2 SLJIT_R3
  486. #define TMP3 SLJIT_R2
  487. #else
  488. #define TMP2 SLJIT_R2
  489. #define TMP3 SLJIT_R3
  490. #endif
  491. #define STR_PTR SLJIT_R1
  492. #define STR_END SLJIT_S0
  493. #define STACK_TOP SLJIT_S1
  494. #define STACK_LIMIT SLJIT_S2
  495. #define COUNT_MATCH SLJIT_S3
  496. #define ARGUMENTS SLJIT_S4
  497. #define RETURN_ADDR SLJIT_R4
  498. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  499. #define HAS_VIRTUAL_REGISTERS 1
  500. #else
  501. #define HAS_VIRTUAL_REGISTERS 0
  502. #endif
  503. /* Local space layout. */
  504. /* These two locals can be used by the current opcode. */
  505. #define LOCALS0 (0 * sizeof(sljit_sw))
  506. #define LOCALS1 (1 * sizeof(sljit_sw))
  507. /* Two local variables for possessive quantifiers (char1 cannot use them). */
  508. #define POSSESSIVE0 (2 * sizeof(sljit_sw))
  509. #define POSSESSIVE1 (3 * sizeof(sljit_sw))
  510. /* Max limit of recursions. */
  511. #define LIMIT_MATCH (4 * sizeof(sljit_sw))
  512. /* The output vector is stored on the stack, and contains pointers
  513. to characters. The vector data is divided into two groups: the first
  514. group contains the start / end character pointers, and the second is
  515. the start pointers when the end of the capturing group has not yet reached. */
  516. #define OVECTOR_START (common->ovector_start)
  517. #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
  518. #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
  519. #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
  520. #if PCRE2_CODE_UNIT_WIDTH == 8
  521. #define MOV_UCHAR SLJIT_MOV_U8
  522. #define IN_UCHARS(x) (x)
  523. #elif PCRE2_CODE_UNIT_WIDTH == 16
  524. #define MOV_UCHAR SLJIT_MOV_U16
  525. #define UCHAR_SHIFT (1)
  526. #define IN_UCHARS(x) ((x) * 2)
  527. #elif PCRE2_CODE_UNIT_WIDTH == 32
  528. #define MOV_UCHAR SLJIT_MOV_U32
  529. #define UCHAR_SHIFT (2)
  530. #define IN_UCHARS(x) ((x) * 4)
  531. #else
  532. #error Unsupported compiling mode
  533. #endif
  534. /* Shortcuts. */
  535. #define DEFINE_COMPILER \
  536. struct sljit_compiler *compiler = common->compiler
  537. #define OP1(op, dst, dstw, src, srcw) \
  538. sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
  539. #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
  540. sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
  541. #define OP_SRC(op, src, srcw) \
  542. sljit_emit_op_src(compiler, (op), (src), (srcw))
  543. #define LABEL() \
  544. sljit_emit_label(compiler)
  545. #define JUMP(type) \
  546. sljit_emit_jump(compiler, (type))
  547. #define JUMPTO(type, label) \
  548. sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
  549. #define JUMPHERE(jump) \
  550. sljit_set_label((jump), sljit_emit_label(compiler))
  551. #define SET_LABEL(jump, label) \
  552. sljit_set_label((jump), (label))
  553. #define CMP(type, src1, src1w, src2, src2w) \
  554. sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
  555. #define CMPTO(type, src1, src1w, src2, src2w, label) \
  556. sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
  557. #define OP_FLAGS(op, dst, dstw, type) \
  558. sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
  559. #define CMOV(type, dst_reg, src, srcw) \
  560. sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
  561. #define GET_LOCAL_BASE(dst, dstw, offset) \
  562. sljit_get_local_base(compiler, (dst), (dstw), (offset))
  563. #define READ_CHAR_MAX 0x7fffffff
  564. #define INVALID_UTF_CHAR -1
  565. #define UNASSIGNED_UTF_CHAR 888
  566. #if defined SUPPORT_UNICODE
  567. #if PCRE2_CODE_UNIT_WIDTH == 8
  568. #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
  569. { \
  570. if (ptr[0] <= 0x7f) \
  571. c = *ptr++; \
  572. else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
  573. { \
  574. c = ptr[1] - 0x80; \
  575. \
  576. if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
  577. { \
  578. c |= (ptr[0] - 0xc0) << 6; \
  579. ptr += 2; \
  580. } \
  581. else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
  582. { \
  583. c = c << 6 | (ptr[2] - 0x80); \
  584. \
  585. if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
  586. { \
  587. c |= (ptr[0] - 0xe0) << 12; \
  588. ptr += 3; \
  589. \
  590. if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
  591. { \
  592. invalid_action; \
  593. } \
  594. } \
  595. else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
  596. { \
  597. c = c << 6 | (ptr[3] - 0x80); \
  598. \
  599. if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
  600. { \
  601. c |= (ptr[0] - 0xf0) << 18; \
  602. ptr += 4; \
  603. \
  604. if (c >= 0x110000 || c < 0x10000) \
  605. { \
  606. invalid_action; \
  607. } \
  608. } \
  609. else \
  610. { \
  611. invalid_action; \
  612. } \
  613. } \
  614. else \
  615. { \
  616. invalid_action; \
  617. } \
  618. } \
  619. else \
  620. { \
  621. invalid_action; \
  622. } \
  623. } \
  624. else \
  625. { \
  626. invalid_action; \
  627. } \
  628. }
  629. #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
  630. { \
  631. c = ptr[-1]; \
  632. if (c <= 0x7f) \
  633. ptr--; \
  634. else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
  635. { \
  636. c -= 0x80; \
  637. \
  638. if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
  639. { \
  640. c |= (ptr[-2] - 0xc0) << 6; \
  641. ptr -= 2; \
  642. } \
  643. else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
  644. { \
  645. c = c << 6 | (ptr[-2] - 0x80); \
  646. \
  647. if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
  648. { \
  649. c |= (ptr[-3] - 0xe0) << 12; \
  650. ptr -= 3; \
  651. \
  652. if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
  653. { \
  654. invalid_action; \
  655. } \
  656. } \
  657. else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
  658. { \
  659. c = c << 6 | (ptr[-3] - 0x80); \
  660. \
  661. if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
  662. { \
  663. c |= (ptr[-4] - 0xf0) << 18; \
  664. ptr -= 4; \
  665. \
  666. if (c >= 0x110000 || c < 0x10000) \
  667. { \
  668. invalid_action; \
  669. } \
  670. } \
  671. else \
  672. { \
  673. invalid_action; \
  674. } \
  675. } \
  676. else \
  677. { \
  678. invalid_action; \
  679. } \
  680. } \
  681. else \
  682. { \
  683. invalid_action; \
  684. } \
  685. } \
  686. else \
  687. { \
  688. invalid_action; \
  689. } \
  690. }
  691. #elif PCRE2_CODE_UNIT_WIDTH == 16
  692. #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
  693. { \
  694. if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
  695. c = *ptr++; \
  696. else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
  697. { \
  698. c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
  699. ptr += 2; \
  700. } \
  701. else \
  702. { \
  703. invalid_action; \
  704. } \
  705. }
  706. #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
  707. { \
  708. c = ptr[-1]; \
  709. if (c < 0xd800 || c >= 0xe000) \
  710. ptr--; \
  711. else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
  712. { \
  713. c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
  714. ptr -= 2; \
  715. } \
  716. else \
  717. { \
  718. invalid_action; \
  719. } \
  720. }
  721. #elif PCRE2_CODE_UNIT_WIDTH == 32
  722. #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
  723. { \
  724. if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
  725. c = *ptr++; \
  726. else \
  727. { \
  728. invalid_action; \
  729. } \
  730. }
  731. #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
  732. { \
  733. c = ptr[-1]; \
  734. if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
  735. ptr--; \
  736. else \
  737. { \
  738. invalid_action; \
  739. } \
  740. }
  741. #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
  742. #endif /* SUPPORT_UNICODE */
  743. static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
  744. {
  745. SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
  746. do cc += GET(cc, 1); while (*cc == OP_ALT);
  747. SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
  748. cc += 1 + LINK_SIZE;
  749. return cc;
  750. }
  751. static int no_alternatives(PCRE2_SPTR cc)
  752. {
  753. int count = 0;
  754. SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
  755. do
  756. {
  757. cc += GET(cc, 1);
  758. count++;
  759. }
  760. while (*cc == OP_ALT);
  761. SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
  762. return count;
  763. }
  764. /* Functions whose might need modification for all new supported opcodes:
  765. next_opcode
  766. check_opcode_types
  767. set_private_data_ptrs
  768. get_framesize
  769. init_frame
  770. get_recurse_data_length
  771. copy_recurse_data
  772. compile_matchingpath
  773. compile_backtrackingpath
  774. */
  775. static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
  776. {
  777. SLJIT_UNUSED_ARG(common);
  778. switch(*cc)
  779. {
  780. case OP_SOD:
  781. case OP_SOM:
  782. case OP_SET_SOM:
  783. case OP_NOT_WORD_BOUNDARY:
  784. case OP_WORD_BOUNDARY:
  785. case OP_NOT_DIGIT:
  786. case OP_DIGIT:
  787. case OP_NOT_WHITESPACE:
  788. case OP_WHITESPACE:
  789. case OP_NOT_WORDCHAR:
  790. case OP_WORDCHAR:
  791. case OP_ANY:
  792. case OP_ALLANY:
  793. case OP_NOTPROP:
  794. case OP_PROP:
  795. case OP_ANYNL:
  796. case OP_NOT_HSPACE:
  797. case OP_HSPACE:
  798. case OP_NOT_VSPACE:
  799. case OP_VSPACE:
  800. case OP_EXTUNI:
  801. case OP_EODN:
  802. case OP_EOD:
  803. case OP_CIRC:
  804. case OP_CIRCM:
  805. case OP_DOLL:
  806. case OP_DOLLM:
  807. case OP_CRSTAR:
  808. case OP_CRMINSTAR:
  809. case OP_CRPLUS:
  810. case OP_CRMINPLUS:
  811. case OP_CRQUERY:
  812. case OP_CRMINQUERY:
  813. case OP_CRRANGE:
  814. case OP_CRMINRANGE:
  815. case OP_CRPOSSTAR:
  816. case OP_CRPOSPLUS:
  817. case OP_CRPOSQUERY:
  818. case OP_CRPOSRANGE:
  819. case OP_CLASS:
  820. case OP_NCLASS:
  821. case OP_REF:
  822. case OP_REFI:
  823. case OP_DNREF:
  824. case OP_DNREFI:
  825. case OP_RECURSE:
  826. case OP_CALLOUT:
  827. case OP_ALT:
  828. case OP_KET:
  829. case OP_KETRMAX:
  830. case OP_KETRMIN:
  831. case OP_KETRPOS:
  832. case OP_REVERSE:
  833. case OP_ASSERT:
  834. case OP_ASSERT_NOT:
  835. case OP_ASSERTBACK:
  836. case OP_ASSERTBACK_NOT:
  837. case OP_ASSERT_NA:
  838. case OP_ASSERTBACK_NA:
  839. case OP_ONCE:
  840. case OP_SCRIPT_RUN:
  841. case OP_BRA:
  842. case OP_BRAPOS:
  843. case OP_CBRA:
  844. case OP_CBRAPOS:
  845. case OP_COND:
  846. case OP_SBRA:
  847. case OP_SBRAPOS:
  848. case OP_SCBRA:
  849. case OP_SCBRAPOS:
  850. case OP_SCOND:
  851. case OP_CREF:
  852. case OP_DNCREF:
  853. case OP_RREF:
  854. case OP_DNRREF:
  855. case OP_FALSE:
  856. case OP_TRUE:
  857. case OP_BRAZERO:
  858. case OP_BRAMINZERO:
  859. case OP_BRAPOSZERO:
  860. case OP_PRUNE:
  861. case OP_SKIP:
  862. case OP_THEN:
  863. case OP_COMMIT:
  864. case OP_FAIL:
  865. case OP_ACCEPT:
  866. case OP_ASSERT_ACCEPT:
  867. case OP_CLOSE:
  868. case OP_SKIPZERO:
  869. return cc + PRIV(OP_lengths)[*cc];
  870. case OP_CHAR:
  871. case OP_CHARI:
  872. case OP_NOT:
  873. case OP_NOTI:
  874. case OP_STAR:
  875. case OP_MINSTAR:
  876. case OP_PLUS:
  877. case OP_MINPLUS:
  878. case OP_QUERY:
  879. case OP_MINQUERY:
  880. case OP_UPTO:
  881. case OP_MINUPTO:
  882. case OP_EXACT:
  883. case OP_POSSTAR:
  884. case OP_POSPLUS:
  885. case OP_POSQUERY:
  886. case OP_POSUPTO:
  887. case OP_STARI:
  888. case OP_MINSTARI:
  889. case OP_PLUSI:
  890. case OP_MINPLUSI:
  891. case OP_QUERYI:
  892. case OP_MINQUERYI:
  893. case OP_UPTOI:
  894. case OP_MINUPTOI:
  895. case OP_EXACTI:
  896. case OP_POSSTARI:
  897. case OP_POSPLUSI:
  898. case OP_POSQUERYI:
  899. case OP_POSUPTOI:
  900. case OP_NOTSTAR:
  901. case OP_NOTMINSTAR:
  902. case OP_NOTPLUS:
  903. case OP_NOTMINPLUS:
  904. case OP_NOTQUERY:
  905. case OP_NOTMINQUERY:
  906. case OP_NOTUPTO:
  907. case OP_NOTMINUPTO:
  908. case OP_NOTEXACT:
  909. case OP_NOTPOSSTAR:
  910. case OP_NOTPOSPLUS:
  911. case OP_NOTPOSQUERY:
  912. case OP_NOTPOSUPTO:
  913. case OP_NOTSTARI:
  914. case OP_NOTMINSTARI:
  915. case OP_NOTPLUSI:
  916. case OP_NOTMINPLUSI:
  917. case OP_NOTQUERYI:
  918. case OP_NOTMINQUERYI:
  919. case OP_NOTUPTOI:
  920. case OP_NOTMINUPTOI:
  921. case OP_NOTEXACTI:
  922. case OP_NOTPOSSTARI:
  923. case OP_NOTPOSPLUSI:
  924. case OP_NOTPOSQUERYI:
  925. case OP_NOTPOSUPTOI:
  926. cc += PRIV(OP_lengths)[*cc];
  927. #ifdef SUPPORT_UNICODE
  928. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  929. #endif
  930. return cc;
  931. /* Special cases. */
  932. case OP_TYPESTAR:
  933. case OP_TYPEMINSTAR:
  934. case OP_TYPEPLUS:
  935. case OP_TYPEMINPLUS:
  936. case OP_TYPEQUERY:
  937. case OP_TYPEMINQUERY:
  938. case OP_TYPEUPTO:
  939. case OP_TYPEMINUPTO:
  940. case OP_TYPEEXACT:
  941. case OP_TYPEPOSSTAR:
  942. case OP_TYPEPOSPLUS:
  943. case OP_TYPEPOSQUERY:
  944. case OP_TYPEPOSUPTO:
  945. return cc + PRIV(OP_lengths)[*cc] - 1;
  946. case OP_ANYBYTE:
  947. #ifdef SUPPORT_UNICODE
  948. if (common->utf) return NULL;
  949. #endif
  950. return cc + 1;
  951. case OP_CALLOUT_STR:
  952. return cc + GET(cc, 1 + 2*LINK_SIZE);
  953. #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
  954. case OP_XCLASS:
  955. return cc + GET(cc, 1);
  956. #endif
  957. case OP_MARK:
  958. case OP_COMMIT_ARG:
  959. case OP_PRUNE_ARG:
  960. case OP_SKIP_ARG:
  961. case OP_THEN_ARG:
  962. return cc + 1 + 2 + cc[1];
  963. default:
  964. SLJIT_UNREACHABLE();
  965. return NULL;
  966. }
  967. }
  968. static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
  969. {
  970. int count;
  971. PCRE2_SPTR slot;
  972. PCRE2_SPTR assert_back_end = cc - 1;
  973. PCRE2_SPTR assert_na_end = cc - 1;
  974. /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
  975. while (cc < ccend)
  976. {
  977. switch(*cc)
  978. {
  979. case OP_SET_SOM:
  980. common->has_set_som = TRUE;
  981. common->might_be_empty = TRUE;
  982. cc += 1;
  983. break;
  984. case OP_REFI:
  985. #ifdef SUPPORT_UNICODE
  986. if (common->iref_ptr == 0)
  987. {
  988. common->iref_ptr = common->ovector_start;
  989. common->ovector_start += 3 * sizeof(sljit_sw);
  990. }
  991. #endif /* SUPPORT_UNICODE */
  992. /* Fall through. */
  993. case OP_REF:
  994. common->optimized_cbracket[GET2(cc, 1)] = 0;
  995. cc += 1 + IMM2_SIZE;
  996. break;
  997. case OP_ASSERT_NA:
  998. case OP_ASSERTBACK_NA:
  999. slot = bracketend(cc);
  1000. if (slot > assert_na_end)
  1001. assert_na_end = slot;
  1002. cc += 1 + LINK_SIZE;
  1003. break;
  1004. case OP_CBRAPOS:
  1005. case OP_SCBRAPOS:
  1006. common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
  1007. cc += 1 + LINK_SIZE + IMM2_SIZE;
  1008. break;
  1009. case OP_COND:
  1010. case OP_SCOND:
  1011. /* Only AUTO_CALLOUT can insert this opcode. We do
  1012. not intend to support this case. */
  1013. if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
  1014. return FALSE;
  1015. cc += 1 + LINK_SIZE;
  1016. break;
  1017. case OP_CREF:
  1018. common->optimized_cbracket[GET2(cc, 1)] = 0;
  1019. cc += 1 + IMM2_SIZE;
  1020. break;
  1021. case OP_DNREF:
  1022. case OP_DNREFI:
  1023. case OP_DNCREF:
  1024. count = GET2(cc, 1 + IMM2_SIZE);
  1025. slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
  1026. while (count-- > 0)
  1027. {
  1028. common->optimized_cbracket[GET2(slot, 0)] = 0;
  1029. slot += common->name_entry_size;
  1030. }
  1031. cc += 1 + 2 * IMM2_SIZE;
  1032. break;
  1033. case OP_RECURSE:
  1034. /* Set its value only once. */
  1035. if (common->recursive_head_ptr == 0)
  1036. {
  1037. common->recursive_head_ptr = common->ovector_start;
  1038. common->ovector_start += sizeof(sljit_sw);
  1039. }
  1040. cc += 1 + LINK_SIZE;
  1041. break;
  1042. case OP_CALLOUT:
  1043. case OP_CALLOUT_STR:
  1044. if (common->capture_last_ptr == 0)
  1045. {
  1046. common->capture_last_ptr = common->ovector_start;
  1047. common->ovector_start += sizeof(sljit_sw);
  1048. }
  1049. cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
  1050. break;
  1051. case OP_ASSERTBACK:
  1052. slot = bracketend(cc);
  1053. if (slot > assert_back_end)
  1054. assert_back_end = slot;
  1055. cc += 1 + LINK_SIZE;
  1056. break;
  1057. case OP_THEN_ARG:
  1058. common->has_then = TRUE;
  1059. common->control_head_ptr = 1;
  1060. /* Fall through. */
  1061. case OP_COMMIT_ARG:
  1062. case OP_PRUNE_ARG:
  1063. if (cc < assert_na_end)
  1064. return FALSE;
  1065. /* Fall through */
  1066. case OP_MARK:
  1067. if (common->mark_ptr == 0)
  1068. {
  1069. common->mark_ptr = common->ovector_start;
  1070. common->ovector_start += sizeof(sljit_sw);
  1071. }
  1072. cc += 1 + 2 + cc[1];
  1073. break;
  1074. case OP_THEN:
  1075. common->has_then = TRUE;
  1076. common->control_head_ptr = 1;
  1077. cc += 1;
  1078. break;
  1079. case OP_SKIP:
  1080. if (cc < assert_back_end)
  1081. common->has_skip_in_assert_back = TRUE;
  1082. if (cc < assert_na_end)
  1083. return FALSE;
  1084. cc += 1;
  1085. break;
  1086. case OP_SKIP_ARG:
  1087. common->control_head_ptr = 1;
  1088. common->has_skip_arg = TRUE;
  1089. if (cc < assert_back_end)
  1090. common->has_skip_in_assert_back = TRUE;
  1091. if (cc < assert_na_end)
  1092. return FALSE;
  1093. cc += 1 + 2 + cc[1];
  1094. break;
  1095. case OP_PRUNE:
  1096. case OP_COMMIT:
  1097. case OP_ASSERT_ACCEPT:
  1098. if (cc < assert_na_end)
  1099. return FALSE;
  1100. cc++;
  1101. break;
  1102. default:
  1103. cc = next_opcode(common, cc);
  1104. if (cc == NULL)
  1105. return FALSE;
  1106. break;
  1107. }
  1108. }
  1109. return TRUE;
  1110. }
  1111. #define EARLY_FAIL_ENHANCE_MAX (1 + 3)
  1112. /*
  1113. start:
  1114. 0 - skip / early fail allowed
  1115. 1 - only early fail with range allowed
  1116. >1 - (start - 1) early fail is processed
  1117. return: current number of iterators enhanced with fast fail
  1118. */
  1119. static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start,
  1120. sljit_s32 depth, int start, BOOL fast_forward_allowed)
  1121. {
  1122. PCRE2_SPTR begin = cc;
  1123. PCRE2_SPTR next_alt;
  1124. PCRE2_SPTR end;
  1125. PCRE2_SPTR accelerated_start;
  1126. BOOL prev_fast_forward_allowed;
  1127. int result = 0;
  1128. int count;
  1129. SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
  1130. SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
  1131. SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
  1132. next_alt = cc + GET(cc, 1);
  1133. if (*next_alt == OP_ALT)
  1134. fast_forward_allowed = FALSE;
  1135. do
  1136. {
  1137. count = start;
  1138. cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
  1139. while (TRUE)
  1140. {
  1141. accelerated_start = NULL;
  1142. switch(*cc)
  1143. {
  1144. case OP_SOD:
  1145. case OP_SOM:
  1146. case OP_SET_SOM:
  1147. case OP_NOT_WORD_BOUNDARY:
  1148. case OP_WORD_BOUNDARY:
  1149. case OP_EODN:
  1150. case OP_EOD:
  1151. case OP_CIRC:
  1152. case OP_CIRCM:
  1153. case OP_DOLL:
  1154. case OP_DOLLM:
  1155. /* Zero width assertions. */
  1156. cc++;
  1157. continue;
  1158. case OP_NOT_DIGIT:
  1159. case OP_DIGIT:
  1160. case OP_NOT_WHITESPACE:
  1161. case OP_WHITESPACE:
  1162. case OP_NOT_WORDCHAR:
  1163. case OP_WORDCHAR:
  1164. case OP_ANY:
  1165. case OP_ALLANY:
  1166. case OP_ANYBYTE:
  1167. case OP_NOT_HSPACE:
  1168. case OP_HSPACE:
  1169. case OP_NOT_VSPACE:
  1170. case OP_VSPACE:
  1171. fast_forward_allowed = FALSE;
  1172. cc++;
  1173. continue;
  1174. case OP_ANYNL:
  1175. case OP_EXTUNI:
  1176. fast_forward_allowed = FALSE;
  1177. if (count == 0)
  1178. count = 1;
  1179. cc++;
  1180. continue;
  1181. case OP_NOTPROP:
  1182. case OP_PROP:
  1183. fast_forward_allowed = FALSE;
  1184. cc += 1 + 2;
  1185. continue;
  1186. case OP_CHAR:
  1187. case OP_CHARI:
  1188. case OP_NOT:
  1189. case OP_NOTI:
  1190. fast_forward_allowed = FALSE;
  1191. cc += 2;
  1192. #ifdef SUPPORT_UNICODE
  1193. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  1194. #endif
  1195. continue;
  1196. case OP_TYPESTAR:
  1197. case OP_TYPEMINSTAR:
  1198. case OP_TYPEPLUS:
  1199. case OP_TYPEMINPLUS:
  1200. case OP_TYPEPOSSTAR:
  1201. case OP_TYPEPOSPLUS:
  1202. /* The type or prop opcode is skipped in the next iteration. */
  1203. cc += 1;
  1204. if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
  1205. {
  1206. accelerated_start = cc - 1;
  1207. break;
  1208. }
  1209. if (count == 0)
  1210. count = 1;
  1211. fast_forward_allowed = FALSE;
  1212. continue;
  1213. case OP_TYPEUPTO:
  1214. case OP_TYPEMINUPTO:
  1215. case OP_TYPEEXACT:
  1216. case OP_TYPEPOSUPTO:
  1217. cc += IMM2_SIZE;
  1218. /* Fall through */
  1219. case OP_TYPEQUERY:
  1220. case OP_TYPEMINQUERY:
  1221. case OP_TYPEPOSQUERY:
  1222. /* The type or prop opcode is skipped in the next iteration. */
  1223. fast_forward_allowed = FALSE;
  1224. if (count == 0)
  1225. count = 1;
  1226. cc += 1;
  1227. continue;
  1228. case OP_STAR:
  1229. case OP_MINSTAR:
  1230. case OP_PLUS:
  1231. case OP_MINPLUS:
  1232. case OP_POSSTAR:
  1233. case OP_POSPLUS:
  1234. case OP_STARI:
  1235. case OP_MINSTARI:
  1236. case OP_PLUSI:
  1237. case OP_MINPLUSI:
  1238. case OP_POSSTARI:
  1239. case OP_POSPLUSI:
  1240. case OP_NOTSTAR:
  1241. case OP_NOTMINSTAR:
  1242. case OP_NOTPLUS:
  1243. case OP_NOTMINPLUS:
  1244. case OP_NOTPOSSTAR:
  1245. case OP_NOTPOSPLUS:
  1246. case OP_NOTSTARI:
  1247. case OP_NOTMINSTARI:
  1248. case OP_NOTPLUSI:
  1249. case OP_NOTMINPLUSI:
  1250. case OP_NOTPOSSTARI:
  1251. case OP_NOTPOSPLUSI:
  1252. accelerated_start = cc;
  1253. cc += 2;
  1254. #ifdef SUPPORT_UNICODE
  1255. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  1256. #endif
  1257. break;
  1258. case OP_UPTO:
  1259. case OP_MINUPTO:
  1260. case OP_EXACT:
  1261. case OP_POSUPTO:
  1262. case OP_UPTOI:
  1263. case OP_MINUPTOI:
  1264. case OP_EXACTI:
  1265. case OP_POSUPTOI:
  1266. case OP_NOTUPTO:
  1267. case OP_NOTMINUPTO:
  1268. case OP_NOTEXACT:
  1269. case OP_NOTPOSUPTO:
  1270. case OP_NOTUPTOI:
  1271. case OP_NOTMINUPTOI:
  1272. case OP_NOTEXACTI:
  1273. case OP_NOTPOSUPTOI:
  1274. cc += IMM2_SIZE;
  1275. /* Fall through */
  1276. case OP_QUERY:
  1277. case OP_MINQUERY:
  1278. case OP_POSQUERY:
  1279. case OP_QUERYI:
  1280. case OP_MINQUERYI:
  1281. case OP_POSQUERYI:
  1282. case OP_NOTQUERY:
  1283. case OP_NOTMINQUERY:
  1284. case OP_NOTPOSQUERY:
  1285. case OP_NOTQUERYI:
  1286. case OP_NOTMINQUERYI:
  1287. case OP_NOTPOSQUERYI:
  1288. fast_forward_allowed = FALSE;
  1289. if (count == 0)
  1290. count = 1;
  1291. cc += 2;
  1292. #ifdef SUPPORT_UNICODE
  1293. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  1294. #endif
  1295. continue;
  1296. case OP_CLASS:
  1297. case OP_NCLASS:
  1298. #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
  1299. case OP_XCLASS:
  1300. accelerated_start = cc;
  1301. cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
  1302. #else
  1303. accelerated_start = cc;
  1304. cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
  1305. #endif
  1306. switch (*cc)
  1307. {
  1308. case OP_CRSTAR:
  1309. case OP_CRMINSTAR:
  1310. case OP_CRPLUS:
  1311. case OP_CRMINPLUS:
  1312. case OP_CRPOSSTAR:
  1313. case OP_CRPOSPLUS:
  1314. cc++;
  1315. break;
  1316. case OP_CRRANGE:
  1317. case OP_CRMINRANGE:
  1318. case OP_CRPOSRANGE:
  1319. cc += 2 * IMM2_SIZE;
  1320. /* Fall through */
  1321. case OP_CRQUERY:
  1322. case OP_CRMINQUERY:
  1323. case OP_CRPOSQUERY:
  1324. cc++;
  1325. if (count == 0)
  1326. count = 1;
  1327. /* Fall through */
  1328. default:
  1329. accelerated_start = NULL;
  1330. fast_forward_allowed = FALSE;
  1331. continue;
  1332. }
  1333. break;
  1334. case OP_ONCE:
  1335. case OP_BRA:
  1336. case OP_CBRA:
  1337. end = cc + GET(cc, 1);
  1338. prev_fast_forward_allowed = fast_forward_allowed;
  1339. fast_forward_allowed = FALSE;
  1340. if (depth >= 4)
  1341. break;
  1342. end = bracketend(cc) - (1 + LINK_SIZE);
  1343. if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
  1344. break;
  1345. count = detect_early_fail(common, cc, private_data_start, depth + 1, count, prev_fast_forward_allowed);
  1346. if (PRIVATE_DATA(cc) != 0)
  1347. common->private_data_ptrs[begin - common->start] = 1;
  1348. if (count < EARLY_FAIL_ENHANCE_MAX)
  1349. {
  1350. cc = end + (1 + LINK_SIZE);
  1351. continue;
  1352. }
  1353. break;
  1354. case OP_KET:
  1355. SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
  1356. if (cc >= next_alt)
  1357. break;
  1358. cc += 1 + LINK_SIZE;
  1359. continue;
  1360. }
  1361. if (accelerated_start != NULL)
  1362. {
  1363. if (count == 0)
  1364. {
  1365. count++;
  1366. if (fast_forward_allowed)
  1367. {
  1368. common->fast_forward_bc_ptr = accelerated_start;
  1369. common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
  1370. *private_data_start += sizeof(sljit_sw);
  1371. }
  1372. else
  1373. {
  1374. common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
  1375. if (common->early_fail_start_ptr == 0)
  1376. common->early_fail_start_ptr = *private_data_start;
  1377. *private_data_start += sizeof(sljit_sw);
  1378. common->early_fail_end_ptr = *private_data_start;
  1379. if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
  1380. return EARLY_FAIL_ENHANCE_MAX;
  1381. }
  1382. }
  1383. else
  1384. {
  1385. common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
  1386. if (common->early_fail_start_ptr == 0)
  1387. common->early_fail_start_ptr = *private_data_start;
  1388. *private_data_start += 2 * sizeof(sljit_sw);
  1389. common->early_fail_end_ptr = *private_data_start;
  1390. if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
  1391. return EARLY_FAIL_ENHANCE_MAX;
  1392. }
  1393. /* Cannot be part of a repeat. */
  1394. common->private_data_ptrs[begin - common->start] = 1;
  1395. count++;
  1396. if (count < EARLY_FAIL_ENHANCE_MAX)
  1397. continue;
  1398. }
  1399. break;
  1400. }
  1401. if (*cc != OP_ALT && *cc != OP_KET)
  1402. result = EARLY_FAIL_ENHANCE_MAX;
  1403. else if (result < count)
  1404. result = count;
  1405. cc = next_alt;
  1406. next_alt = cc + GET(cc, 1);
  1407. }
  1408. while (*cc == OP_ALT);
  1409. return result;
  1410. }
  1411. static int get_class_iterator_size(PCRE2_SPTR cc)
  1412. {
  1413. sljit_u32 min;
  1414. sljit_u32 max;
  1415. switch(*cc)
  1416. {
  1417. case OP_CRSTAR:
  1418. case OP_CRPLUS:
  1419. return 2;
  1420. case OP_CRMINSTAR:
  1421. case OP_CRMINPLUS:
  1422. case OP_CRQUERY:
  1423. case OP_CRMINQUERY:
  1424. return 1;
  1425. case OP_CRRANGE:
  1426. case OP_CRMINRANGE:
  1427. min = GET2(cc, 1);
  1428. max = GET2(cc, 1 + IMM2_SIZE);
  1429. if (max == 0)
  1430. return (*cc == OP_CRRANGE) ? 2 : 1;
  1431. max -= min;
  1432. if (max > 2)
  1433. max = 2;
  1434. return max;
  1435. default:
  1436. return 0;
  1437. }
  1438. }
  1439. static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
  1440. {
  1441. PCRE2_SPTR end = bracketend(begin);
  1442. PCRE2_SPTR next;
  1443. PCRE2_SPTR next_end;
  1444. PCRE2_SPTR max_end;
  1445. PCRE2_UCHAR type;
  1446. sljit_sw length = end - begin;
  1447. sljit_s32 min, max, i;
  1448. /* Detect fixed iterations first. */
  1449. if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
  1450. return FALSE;
  1451. /* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
  1452. * Skip the check of the second part. */
  1453. if (PRIVATE_DATA(end - LINK_SIZE) != 0)
  1454. return TRUE;
  1455. next = end;
  1456. min = 1;
  1457. while (1)
  1458. {
  1459. if (*next != *begin)
  1460. break;
  1461. next_end = bracketend(next);
  1462. if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
  1463. break;
  1464. next = next_end;
  1465. min++;
  1466. }
  1467. if (min == 2)
  1468. return FALSE;
  1469. max = 0;
  1470. max_end = next;
  1471. if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
  1472. {
  1473. type = *next;
  1474. while (1)
  1475. {
  1476. if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
  1477. break;
  1478. next_end = bracketend(next + 2 + LINK_SIZE);
  1479. if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
  1480. break;
  1481. next = next_end;
  1482. max++;
  1483. }
  1484. if (next[0] == type && next[1] == *begin && max >= 1)
  1485. {
  1486. next_end = bracketend(next + 1);
  1487. if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
  1488. {
  1489. for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
  1490. if (*next_end != OP_KET)
  1491. break;
  1492. if (i == max)
  1493. {
  1494. common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
  1495. common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
  1496. /* +2 the original and the last. */
  1497. common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
  1498. if (min == 1)
  1499. return TRUE;
  1500. min--;
  1501. max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
  1502. }
  1503. }
  1504. }
  1505. }
  1506. if (min >= 3)
  1507. {
  1508. common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
  1509. common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
  1510. common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
  1511. return TRUE;
  1512. }
  1513. return FALSE;
  1514. }
  1515. #define CASE_ITERATOR_PRIVATE_DATA_1 \
  1516. case OP_MINSTAR: \
  1517. case OP_MINPLUS: \
  1518. case OP_QUERY: \
  1519. case OP_MINQUERY: \
  1520. case OP_MINSTARI: \
  1521. case OP_MINPLUSI: \
  1522. case OP_QUERYI: \
  1523. case OP_MINQUERYI: \
  1524. case OP_NOTMINSTAR: \
  1525. case OP_NOTMINPLUS: \
  1526. case OP_NOTQUERY: \
  1527. case OP_NOTMINQUERY: \
  1528. case OP_NOTMINSTARI: \
  1529. case OP_NOTMINPLUSI: \
  1530. case OP_NOTQUERYI: \
  1531. case OP_NOTMINQUERYI:
  1532. #define CASE_ITERATOR_PRIVATE_DATA_2A \
  1533. case OP_STAR: \
  1534. case OP_PLUS: \
  1535. case OP_STARI: \
  1536. case OP_PLUSI: \
  1537. case OP_NOTSTAR: \
  1538. case OP_NOTPLUS: \
  1539. case OP_NOTSTARI: \
  1540. case OP_NOTPLUSI:
  1541. #define CASE_ITERATOR_PRIVATE_DATA_2B \
  1542. case OP_UPTO: \
  1543. case OP_MINUPTO: \
  1544. case OP_UPTOI: \
  1545. case OP_MINUPTOI: \
  1546. case OP_NOTUPTO: \
  1547. case OP_NOTMINUPTO: \
  1548. case OP_NOTUPTOI: \
  1549. case OP_NOTMINUPTOI:
  1550. #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
  1551. case OP_TYPEMINSTAR: \
  1552. case OP_TYPEMINPLUS: \
  1553. case OP_TYPEQUERY: \
  1554. case OP_TYPEMINQUERY:
  1555. #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
  1556. case OP_TYPESTAR: \
  1557. case OP_TYPEPLUS:
  1558. #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
  1559. case OP_TYPEUPTO: \
  1560. case OP_TYPEMINUPTO:
  1561. static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
  1562. {
  1563. PCRE2_SPTR cc = common->start;
  1564. PCRE2_SPTR alternative;
  1565. PCRE2_SPTR end = NULL;
  1566. int private_data_ptr = *private_data_start;
  1567. int space, size, bracketlen;
  1568. BOOL repeat_check = TRUE;
  1569. while (cc < ccend)
  1570. {
  1571. space = 0;
  1572. size = 0;
  1573. bracketlen = 0;
  1574. if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
  1575. break;
  1576. /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
  1577. if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
  1578. {
  1579. if (detect_repeat(common, cc))
  1580. {
  1581. /* These brackets are converted to repeats, so no global
  1582. based single character repeat is allowed. */
  1583. if (cc >= end)
  1584. end = bracketend(cc);
  1585. }
  1586. }
  1587. repeat_check = TRUE;
  1588. switch(*cc)
  1589. {
  1590. case OP_KET:
  1591. if (common->private_data_ptrs[cc + 1 - common->start] != 0)
  1592. {
  1593. common->private_data_ptrs[cc - common->start] = private_data_ptr;
  1594. private_data_ptr += sizeof(sljit_sw);
  1595. cc += common->private_data_ptrs[cc + 1 - common->start];
  1596. }
  1597. cc += 1 + LINK_SIZE;
  1598. break;
  1599. case OP_ASSERT:
  1600. case OP_ASSERT_NOT:
  1601. case OP_ASSERTBACK:
  1602. case OP_ASSERTBACK_NOT:
  1603. case OP_ASSERT_NA:
  1604. case OP_ASSERTBACK_NA:
  1605. case OP_ONCE:
  1606. case OP_SCRIPT_RUN:
  1607. case OP_BRAPOS:
  1608. case OP_SBRA:
  1609. case OP_SBRAPOS:
  1610. case OP_SCOND:
  1611. common->private_data_ptrs[cc - common->start] = private_data_ptr;
  1612. private_data_ptr += sizeof(sljit_sw);
  1613. bracketlen = 1 + LINK_SIZE;
  1614. break;
  1615. case OP_CBRAPOS:
  1616. case OP_SCBRAPOS:
  1617. common->private_data_ptrs[cc - common->start] = private_data_ptr;
  1618. private_data_ptr += sizeof(sljit_sw);
  1619. bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
  1620. break;
  1621. case OP_COND:
  1622. /* Might be a hidden SCOND. */
  1623. common->private_data_ptrs[cc - common->start] = 0;
  1624. alternative = cc + GET(cc, 1);
  1625. if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
  1626. {
  1627. common->private_data_ptrs[cc - common->start] = private_data_ptr;
  1628. private_data_ptr += sizeof(sljit_sw);
  1629. }
  1630. bracketlen = 1 + LINK_SIZE;
  1631. break;
  1632. case OP_BRA:
  1633. bracketlen = 1 + LINK_SIZE;
  1634. break;
  1635. case OP_CBRA:
  1636. case OP_SCBRA:
  1637. bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
  1638. break;
  1639. case OP_BRAZERO:
  1640. case OP_BRAMINZERO:
  1641. case OP_BRAPOSZERO:
  1642. size = 1;
  1643. repeat_check = FALSE;
  1644. break;
  1645. CASE_ITERATOR_PRIVATE_DATA_1
  1646. size = -2;
  1647. space = 1;
  1648. break;
  1649. CASE_ITERATOR_PRIVATE_DATA_2A
  1650. size = -2;
  1651. space = 2;
  1652. break;
  1653. CASE_ITERATOR_PRIVATE_DATA_2B
  1654. size = -(2 + IMM2_SIZE);
  1655. space = 2;
  1656. break;
  1657. CASE_ITERATOR_TYPE_PRIVATE_DATA_1
  1658. size = 1;
  1659. space = 1;
  1660. break;
  1661. CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
  1662. size = 1;
  1663. if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
  1664. space = 2;
  1665. break;
  1666. case OP_TYPEUPTO:
  1667. size = 1 + IMM2_SIZE;
  1668. if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
  1669. space = 2;
  1670. break;
  1671. case OP_TYPEMINUPTO:
  1672. size = 1 + IMM2_SIZE;
  1673. space = 2;
  1674. break;
  1675. case OP_CLASS:
  1676. case OP_NCLASS:
  1677. size = 1 + 32 / sizeof(PCRE2_UCHAR);
  1678. space = get_class_iterator_size(cc + size);
  1679. break;
  1680. #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
  1681. case OP_XCLASS:
  1682. size = GET(cc, 1);
  1683. space = get_class_iterator_size(cc + size);
  1684. break;
  1685. #endif
  1686. default:
  1687. cc = next_opcode(common, cc);
  1688. SLJIT_ASSERT(cc != NULL);
  1689. break;
  1690. }
  1691. /* Character iterators, which are not inside a repeated bracket,
  1692. gets a private slot instead of allocating it on the stack. */
  1693. if (space > 0 && cc >= end)
  1694. {
  1695. common->private_data_ptrs[cc - common->start] = private_data_ptr;
  1696. private_data_ptr += sizeof(sljit_sw) * space;
  1697. }
  1698. if (size != 0)
  1699. {
  1700. if (size < 0)
  1701. {
  1702. cc += -size;
  1703. #ifdef SUPPORT_UNICODE
  1704. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  1705. #endif
  1706. }
  1707. else
  1708. cc += size;
  1709. }
  1710. if (bracketlen > 0)
  1711. {
  1712. if (cc >= end)
  1713. {
  1714. end = bracketend(cc);
  1715. if (end[-1 - LINK_SIZE] == OP_KET)
  1716. end = NULL;
  1717. }
  1718. cc += bracketlen;
  1719. }
  1720. }
  1721. *private_data_start = private_data_ptr;
  1722. }
  1723. /* Returns with a frame_types (always < 0) if no need for frame. */
  1724. static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
  1725. {
  1726. int length = 0;
  1727. int possessive = 0;
  1728. BOOL stack_restore = FALSE;
  1729. BOOL setsom_found = recursive;
  1730. BOOL setmark_found = recursive;
  1731. /* The last capture is a local variable even for recursions. */
  1732. BOOL capture_last_found = FALSE;
  1733. #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
  1734. SLJIT_ASSERT(common->control_head_ptr != 0);
  1735. *needs_control_head = TRUE;
  1736. #else
  1737. *needs_control_head = FALSE;
  1738. #endif
  1739. if (ccend == NULL)
  1740. {
  1741. ccend = bracketend(cc) - (1 + LINK_SIZE);
  1742. if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
  1743. {
  1744. possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
  1745. /* This is correct regardless of common->capture_last_ptr. */
  1746. capture_last_found = TRUE;
  1747. }
  1748. cc = next_opcode(common, cc);
  1749. }
  1750. SLJIT_ASSERT(cc != NULL);
  1751. while (cc < ccend)
  1752. switch(*cc)
  1753. {
  1754. case OP_SET_SOM:
  1755. SLJIT_ASSERT(common->has_set_som);
  1756. stack_restore = TRUE;
  1757. if (!setsom_found)
  1758. {
  1759. length += 2;
  1760. setsom_found = TRUE;
  1761. }
  1762. cc += 1;
  1763. break;
  1764. case OP_MARK:
  1765. case OP_COMMIT_ARG:
  1766. case OP_PRUNE_ARG:
  1767. case OP_THEN_ARG:
  1768. SLJIT_ASSERT(common->mark_ptr != 0);
  1769. stack_restore = TRUE;
  1770. if (!setmark_found)
  1771. {
  1772. length += 2;
  1773. setmark_found = TRUE;
  1774. }
  1775. if (common->control_head_ptr != 0)
  1776. *needs_control_head = TRUE;
  1777. cc += 1 + 2 + cc[1];
  1778. break;
  1779. case OP_RECURSE:
  1780. stack_restore = TRUE;
  1781. if (common->has_set_som && !setsom_found)
  1782. {
  1783. length += 2;
  1784. setsom_found = TRUE;
  1785. }
  1786. if (common->mark_ptr != 0 && !setmark_found)
  1787. {
  1788. length += 2;
  1789. setmark_found = TRUE;
  1790. }
  1791. if (common->capture_last_ptr != 0 && !capture_last_found)
  1792. {
  1793. length += 2;
  1794. capture_last_found = TRUE;
  1795. }
  1796. cc += 1 + LINK_SIZE;
  1797. break;
  1798. case OP_CBRA:
  1799. case OP_CBRAPOS:
  1800. case OP_SCBRA:
  1801. case OP_SCBRAPOS:
  1802. stack_restore = TRUE;
  1803. if (common->capture_last_ptr != 0 && !capture_last_found)
  1804. {
  1805. length += 2;
  1806. capture_last_found = TRUE;
  1807. }
  1808. length += 3;
  1809. cc += 1 + LINK_SIZE + IMM2_SIZE;
  1810. break;
  1811. case OP_THEN:
  1812. stack_restore = TRUE;
  1813. if (common->control_head_ptr != 0)
  1814. *needs_control_head = TRUE;
  1815. cc ++;
  1816. break;
  1817. default:
  1818. stack_restore = TRUE;
  1819. /* Fall through. */
  1820. case OP_NOT_WORD_BOUNDARY:
  1821. case OP_WORD_BOUNDARY:
  1822. case OP_NOT_DIGIT:
  1823. case OP_DIGIT:
  1824. case OP_NOT_WHITESPACE:
  1825. case OP_WHITESPACE:
  1826. case OP_NOT_WORDCHAR:
  1827. case OP_WORDCHAR:
  1828. case OP_ANY:
  1829. case OP_ALLANY:
  1830. case OP_ANYBYTE:
  1831. case OP_NOTPROP:
  1832. case OP_PROP:
  1833. case OP_ANYNL:
  1834. case OP_NOT_HSPACE:
  1835. case OP_HSPACE:
  1836. case OP_NOT_VSPACE:
  1837. case OP_VSPACE:
  1838. case OP_EXTUNI:
  1839. case OP_EODN:
  1840. case OP_EOD:
  1841. case OP_CIRC:
  1842. case OP_CIRCM:
  1843. case OP_DOLL:
  1844. case OP_DOLLM:
  1845. case OP_CHAR:
  1846. case OP_CHARI:
  1847. case OP_NOT:
  1848. case OP_NOTI:
  1849. case OP_EXACT:
  1850. case OP_POSSTAR:
  1851. case OP_POSPLUS:
  1852. case OP_POSQUERY:
  1853. case OP_POSUPTO:
  1854. case OP_EXACTI:
  1855. case OP_POSSTARI:
  1856. case OP_POSPLUSI:
  1857. case OP_POSQUERYI:
  1858. case OP_POSUPTOI:
  1859. case OP_NOTEXACT:
  1860. case OP_NOTPOSSTAR:
  1861. case OP_NOTPOSPLUS:
  1862. case OP_NOTPOSQUERY:
  1863. case OP_NOTPOSUPTO:
  1864. case OP_NOTEXACTI:
  1865. case OP_NOTPOSSTARI:
  1866. case OP_NOTPOSPLUSI:
  1867. case OP_NOTPOSQUERYI:
  1868. case OP_NOTPOSUPTOI:
  1869. case OP_TYPEEXACT:
  1870. case OP_TYPEPOSSTAR:
  1871. case OP_TYPEPOSPLUS:
  1872. case OP_TYPEPOSQUERY:
  1873. case OP_TYPEPOSUPTO:
  1874. case OP_CLASS:
  1875. case OP_NCLASS:
  1876. case OP_XCLASS:
  1877. case OP_CALLOUT:
  1878. case OP_CALLOUT_STR:
  1879. cc = next_opcode(common, cc);
  1880. SLJIT_ASSERT(cc != NULL);
  1881. break;
  1882. }
  1883. /* Possessive quantifiers can use a special case. */
  1884. if (SLJIT_UNLIKELY(possessive == length))
  1885. return stack_restore ? no_frame : no_stack;
  1886. if (length > 0)
  1887. return length + 1;
  1888. return stack_restore ? no_frame : no_stack;
  1889. }
  1890. static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
  1891. {
  1892. DEFINE_COMPILER;
  1893. BOOL setsom_found = FALSE;
  1894. BOOL setmark_found = FALSE;
  1895. /* The last capture is a local variable even for recursions. */
  1896. BOOL capture_last_found = FALSE;
  1897. int offset;
  1898. /* >= 1 + shortest item size (2) */
  1899. SLJIT_UNUSED_ARG(stacktop);
  1900. SLJIT_ASSERT(stackpos >= stacktop + 2);
  1901. stackpos = STACK(stackpos);
  1902. if (ccend == NULL)
  1903. {
  1904. ccend = bracketend(cc) - (1 + LINK_SIZE);
  1905. if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
  1906. cc = next_opcode(common, cc);
  1907. }
  1908. SLJIT_ASSERT(cc != NULL);
  1909. while (cc < ccend)
  1910. switch(*cc)
  1911. {
  1912. case OP_SET_SOM:
  1913. SLJIT_ASSERT(common->has_set_som);
  1914. if (!setsom_found)
  1915. {
  1916. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
  1917. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
  1918. stackpos -= (int)sizeof(sljit_sw);
  1919. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
  1920. stackpos -= (int)sizeof(sljit_sw);
  1921. setsom_found = TRUE;
  1922. }
  1923. cc += 1;
  1924. break;
  1925. case OP_MARK:
  1926. case OP_COMMIT_ARG:
  1927. case OP_PRUNE_ARG:
  1928. case OP_THEN_ARG:
  1929. SLJIT_ASSERT(common->mark_ptr != 0);
  1930. if (!setmark_found)
  1931. {
  1932. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
  1933. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
  1934. stackpos -= (int)sizeof(sljit_sw);
  1935. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
  1936. stackpos -= (int)sizeof(sljit_sw);
  1937. setmark_found = TRUE;
  1938. }
  1939. cc += 1 + 2 + cc[1];
  1940. break;
  1941. case OP_RECURSE:
  1942. if (common->has_set_som && !setsom_found)
  1943. {
  1944. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
  1945. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
  1946. stackpos -= (int)sizeof(sljit_sw);
  1947. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
  1948. stackpos -= (int)sizeof(sljit_sw);
  1949. setsom_found = TRUE;
  1950. }
  1951. if (common->mark_ptr != 0 && !setmark_found)
  1952. {
  1953. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
  1954. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
  1955. stackpos -= (int)sizeof(sljit_sw);
  1956. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
  1957. stackpos -= (int)sizeof(sljit_sw);
  1958. setmark_found = TRUE;
  1959. }
  1960. if (common->capture_last_ptr != 0 && !capture_last_found)
  1961. {
  1962. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
  1963. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
  1964. stackpos -= (int)sizeof(sljit_sw);
  1965. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
  1966. stackpos -= (int)sizeof(sljit_sw);
  1967. capture_last_found = TRUE;
  1968. }
  1969. cc += 1 + LINK_SIZE;
  1970. break;
  1971. case OP_CBRA:
  1972. case OP_CBRAPOS:
  1973. case OP_SCBRA:
  1974. case OP_SCBRAPOS:
  1975. if (common->capture_last_ptr != 0 && !capture_last_found)
  1976. {
  1977. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
  1978. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
  1979. stackpos -= (int)sizeof(sljit_sw);
  1980. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
  1981. stackpos -= (int)sizeof(sljit_sw);
  1982. capture_last_found = TRUE;
  1983. }
  1984. offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
  1985. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
  1986. stackpos -= (int)sizeof(sljit_sw);
  1987. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
  1988. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
  1989. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
  1990. stackpos -= (int)sizeof(sljit_sw);
  1991. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
  1992. stackpos -= (int)sizeof(sljit_sw);
  1993. cc += 1 + LINK_SIZE + IMM2_SIZE;
  1994. break;
  1995. default:
  1996. cc = next_opcode(common, cc);
  1997. SLJIT_ASSERT(cc != NULL);
  1998. break;
  1999. }
  2000. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
  2001. SLJIT_ASSERT(stackpos == STACK(stacktop));
  2002. }
  2003. #define RECURSE_TMP_REG_COUNT 3
  2004. typedef struct delayed_mem_copy_status {
  2005. struct sljit_compiler *compiler;
  2006. int store_bases[RECURSE_TMP_REG_COUNT];
  2007. int store_offsets[RECURSE_TMP_REG_COUNT];
  2008. int tmp_regs[RECURSE_TMP_REG_COUNT];
  2009. int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
  2010. int next_tmp_reg;
  2011. } delayed_mem_copy_status;
  2012. static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
  2013. {
  2014. int i;
  2015. for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
  2016. {
  2017. SLJIT_ASSERT(status->tmp_regs[i] >= 0);
  2018. SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
  2019. status->store_bases[i] = -1;
  2020. }
  2021. status->next_tmp_reg = 0;
  2022. status->compiler = common->compiler;
  2023. }
  2024. static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
  2025. int store_base, sljit_sw store_offset)
  2026. {
  2027. struct sljit_compiler *compiler = status->compiler;
  2028. int next_tmp_reg = status->next_tmp_reg;
  2029. int tmp_reg = status->tmp_regs[next_tmp_reg];
  2030. SLJIT_ASSERT(load_base > 0 && store_base > 0);
  2031. if (status->store_bases[next_tmp_reg] == -1)
  2032. {
  2033. /* Preserve virtual registers. */
  2034. if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
  2035. OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
  2036. }
  2037. else
  2038. OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
  2039. OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
  2040. status->store_bases[next_tmp_reg] = store_base;
  2041. status->store_offsets[next_tmp_reg] = store_offset;
  2042. status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
  2043. }
  2044. static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
  2045. {
  2046. struct sljit_compiler *compiler = status->compiler;
  2047. int next_tmp_reg = status->next_tmp_reg;
  2048. int tmp_reg, saved_tmp_reg, i;
  2049. for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
  2050. {
  2051. if (status->store_bases[next_tmp_reg] != -1)
  2052. {
  2053. tmp_reg = status->tmp_regs[next_tmp_reg];
  2054. saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
  2055. OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
  2056. /* Restore virtual registers. */
  2057. if (sljit_get_register_index(saved_tmp_reg) < 0)
  2058. OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
  2059. }
  2060. next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
  2061. }
  2062. }
  2063. #undef RECURSE_TMP_REG_COUNT
  2064. static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
  2065. BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
  2066. {
  2067. int length = 1;
  2068. int size;
  2069. PCRE2_SPTR alternative;
  2070. BOOL quit_found = FALSE;
  2071. BOOL accept_found = FALSE;
  2072. BOOL setsom_found = FALSE;
  2073. BOOL setmark_found = FALSE;
  2074. BOOL capture_last_found = FALSE;
  2075. BOOL control_head_found = FALSE;
  2076. #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
  2077. SLJIT_ASSERT(common->control_head_ptr != 0);
  2078. control_head_found = TRUE;
  2079. #endif
  2080. /* Calculate the sum of the private machine words. */
  2081. while (cc < ccend)
  2082. {
  2083. size = 0;
  2084. switch(*cc)
  2085. {
  2086. case OP_SET_SOM:
  2087. SLJIT_ASSERT(common->has_set_som);
  2088. setsom_found = TRUE;
  2089. cc += 1;
  2090. break;
  2091. case OP_RECURSE:
  2092. if (common->has_set_som)
  2093. setsom_found = TRUE;
  2094. if (common->mark_ptr != 0)
  2095. setmark_found = TRUE;
  2096. if (common->capture_last_ptr != 0)
  2097. capture_last_found = TRUE;
  2098. cc += 1 + LINK_SIZE;
  2099. break;
  2100. case OP_KET:
  2101. if (PRIVATE_DATA(cc) != 0)
  2102. {
  2103. length++;
  2104. SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
  2105. cc += PRIVATE_DATA(cc + 1);
  2106. }
  2107. cc += 1 + LINK_SIZE;
  2108. break;
  2109. case OP_ASSERT:
  2110. case OP_ASSERT_NOT:
  2111. case OP_ASSERTBACK:
  2112. case OP_ASSERTBACK_NOT:
  2113. case OP_ASSERT_NA:
  2114. case OP_ASSERTBACK_NA:
  2115. case OP_ONCE:
  2116. case OP_SCRIPT_RUN:
  2117. case OP_BRAPOS:
  2118. case OP_SBRA:
  2119. case OP_SBRAPOS:
  2120. case OP_SCOND:
  2121. length++;
  2122. SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
  2123. cc += 1 + LINK_SIZE;
  2124. break;
  2125. case OP_CBRA:
  2126. case OP_SCBRA:
  2127. length += 2;
  2128. if (common->capture_last_ptr != 0)
  2129. capture_last_found = TRUE;
  2130. if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
  2131. length++;
  2132. cc += 1 + LINK_SIZE + IMM2_SIZE;
  2133. break;
  2134. case OP_CBRAPOS:
  2135. case OP_SCBRAPOS:
  2136. length += 2 + 2;
  2137. if (common->capture_last_ptr != 0)
  2138. capture_last_found = TRUE;
  2139. cc += 1 + LINK_SIZE + IMM2_SIZE;
  2140. break;
  2141. case OP_COND:
  2142. /* Might be a hidden SCOND. */
  2143. alternative = cc + GET(cc, 1);
  2144. if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
  2145. length++;
  2146. cc += 1 + LINK_SIZE;
  2147. break;
  2148. CASE_ITERATOR_PRIVATE_DATA_1
  2149. if (PRIVATE_DATA(cc) != 0)
  2150. length++;
  2151. cc += 2;
  2152. #ifdef SUPPORT_UNICODE
  2153. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  2154. #endif
  2155. break;
  2156. CASE_ITERATOR_PRIVATE_DATA_2A
  2157. if (PRIVATE_DATA(cc) != 0)
  2158. length += 2;
  2159. cc += 2;
  2160. #ifdef SUPPORT_UNICODE
  2161. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  2162. #endif
  2163. break;
  2164. CASE_ITERATOR_PRIVATE_DATA_2B
  2165. if (PRIVATE_DATA(cc) != 0)
  2166. length += 2;
  2167. cc += 2 + IMM2_SIZE;
  2168. #ifdef SUPPORT_UNICODE
  2169. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  2170. #endif
  2171. break;
  2172. CASE_ITERATOR_TYPE_PRIVATE_DATA_1
  2173. if (PRIVATE_DATA(cc) != 0)
  2174. length++;
  2175. cc += 1;
  2176. break;
  2177. CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
  2178. if (PRIVATE_DATA(cc) != 0)
  2179. length += 2;
  2180. cc += 1;
  2181. break;
  2182. CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
  2183. if (PRIVATE_DATA(cc) != 0)
  2184. length += 2;
  2185. cc += 1 + IMM2_SIZE;
  2186. break;
  2187. case OP_CLASS:
  2188. case OP_NCLASS:
  2189. #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
  2190. case OP_XCLASS:
  2191. size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
  2192. #else
  2193. size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
  2194. #endif
  2195. if (PRIVATE_DATA(cc) != 0)
  2196. length += get_class_iterator_size(cc + size);
  2197. cc += size;
  2198. break;
  2199. case OP_MARK:
  2200. case OP_COMMIT_ARG:
  2201. case OP_PRUNE_ARG:
  2202. case OP_THEN_ARG:
  2203. SLJIT_ASSERT(common->mark_ptr != 0);
  2204. if (!setmark_found)
  2205. setmark_found = TRUE;
  2206. if (common->control_head_ptr != 0)
  2207. control_head_found = TRUE;
  2208. if (*cc != OP_MARK)
  2209. quit_found = TRUE;
  2210. cc += 1 + 2 + cc[1];
  2211. break;
  2212. case OP_PRUNE:
  2213. case OP_SKIP:
  2214. case OP_COMMIT:
  2215. quit_found = TRUE;
  2216. cc++;
  2217. break;
  2218. case OP_SKIP_ARG:
  2219. quit_found = TRUE;
  2220. cc += 1 + 2 + cc[1];
  2221. break;
  2222. case OP_THEN:
  2223. SLJIT_ASSERT(common->control_head_ptr != 0);
  2224. quit_found = TRUE;
  2225. if (!control_head_found)
  2226. control_head_found = TRUE;
  2227. cc++;
  2228. break;
  2229. case OP_ACCEPT:
  2230. case OP_ASSERT_ACCEPT:
  2231. accept_found = TRUE;
  2232. cc++;
  2233. break;
  2234. default:
  2235. cc = next_opcode(common, cc);
  2236. SLJIT_ASSERT(cc != NULL);
  2237. break;
  2238. }
  2239. }
  2240. SLJIT_ASSERT(cc == ccend);
  2241. if (control_head_found)
  2242. length++;
  2243. if (capture_last_found)
  2244. length++;
  2245. if (quit_found)
  2246. {
  2247. if (setsom_found)
  2248. length++;
  2249. if (setmark_found)
  2250. length++;
  2251. }
  2252. *needs_control_head = control_head_found;
  2253. *has_quit = quit_found;
  2254. *has_accept = accept_found;
  2255. return length;
  2256. }
  2257. enum copy_recurse_data_types {
  2258. recurse_copy_from_global,
  2259. recurse_copy_private_to_global,
  2260. recurse_copy_shared_to_global,
  2261. recurse_copy_kept_shared_to_global,
  2262. recurse_swap_global
  2263. };
  2264. static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
  2265. int type, int stackptr, int stacktop, BOOL has_quit)
  2266. {
  2267. delayed_mem_copy_status status;
  2268. PCRE2_SPTR alternative;
  2269. sljit_sw private_srcw[2];
  2270. sljit_sw shared_srcw[3];
  2271. sljit_sw kept_shared_srcw[2];
  2272. int private_count, shared_count, kept_shared_count;
  2273. int from_sp, base_reg, offset, i;
  2274. BOOL setsom_found = FALSE;
  2275. BOOL setmark_found = FALSE;
  2276. BOOL capture_last_found = FALSE;
  2277. BOOL control_head_found = FALSE;
  2278. #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
  2279. SLJIT_ASSERT(common->control_head_ptr != 0);
  2280. control_head_found = TRUE;
  2281. #endif
  2282. switch (type)
  2283. {
  2284. case recurse_copy_from_global:
  2285. from_sp = TRUE;
  2286. base_reg = STACK_TOP;
  2287. break;
  2288. case recurse_copy_private_to_global:
  2289. case recurse_copy_shared_to_global:
  2290. case recurse_copy_kept_shared_to_global:
  2291. from_sp = FALSE;
  2292. base_reg = STACK_TOP;
  2293. break;
  2294. default:
  2295. SLJIT_ASSERT(type == recurse_swap_global);
  2296. from_sp = FALSE;
  2297. base_reg = TMP2;
  2298. break;
  2299. }
  2300. stackptr = STACK(stackptr);
  2301. stacktop = STACK(stacktop);
  2302. status.tmp_regs[0] = TMP1;
  2303. status.saved_tmp_regs[0] = TMP1;
  2304. if (base_reg != TMP2)
  2305. {
  2306. status.tmp_regs[1] = TMP2;
  2307. status.saved_tmp_regs[1] = TMP2;
  2308. }
  2309. else
  2310. {
  2311. status.saved_tmp_regs[1] = RETURN_ADDR;
  2312. if (HAS_VIRTUAL_REGISTERS)
  2313. status.tmp_regs[1] = STR_PTR;
  2314. else
  2315. status.tmp_regs[1] = RETURN_ADDR;
  2316. }
  2317. status.saved_tmp_regs[2] = TMP3;
  2318. if (HAS_VIRTUAL_REGISTERS)
  2319. status.tmp_regs[2] = STR_END;
  2320. else
  2321. status.tmp_regs[2] = TMP3;
  2322. delayed_mem_copy_init(&status, common);
  2323. if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
  2324. {
  2325. SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
  2326. if (!from_sp)
  2327. delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
  2328. if (from_sp || type == recurse_swap_global)
  2329. delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
  2330. }
  2331. stackptr += sizeof(sljit_sw);
  2332. #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
  2333. if (type != recurse_copy_shared_to_global)
  2334. {
  2335. if (!from_sp)
  2336. delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
  2337. if (from_sp || type == recurse_swap_global)
  2338. delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
  2339. }
  2340. stackptr += sizeof(sljit_sw);
  2341. #endif
  2342. while (cc < ccend)
  2343. {
  2344. private_count = 0;
  2345. shared_count = 0;
  2346. kept_shared_count = 0;
  2347. switch(*cc)
  2348. {
  2349. case OP_SET_SOM:
  2350. SLJIT_ASSERT(common->has_set_som);
  2351. if (has_quit && !setsom_found)
  2352. {
  2353. kept_shared_srcw[0] = OVECTOR(0);
  2354. kept_shared_count = 1;
  2355. setsom_found = TRUE;
  2356. }
  2357. cc += 1;
  2358. break;
  2359. case OP_RECURSE:
  2360. if (has_quit)
  2361. {
  2362. if (common->has_set_som && !setsom_found)
  2363. {
  2364. kept_shared_srcw[0] = OVECTOR(0);
  2365. kept_shared_count = 1;
  2366. setsom_found = TRUE;
  2367. }
  2368. if (common->mark_ptr != 0 && !setmark_found)
  2369. {
  2370. kept_shared_srcw[kept_shared_count] = common->mark_ptr;
  2371. kept_shared_count++;
  2372. setmark_found = TRUE;
  2373. }
  2374. }
  2375. if (common->capture_last_ptr != 0 && !capture_last_found)
  2376. {
  2377. shared_srcw[0] = common->capture_last_ptr;
  2378. shared_count = 1;
  2379. capture_last_found = TRUE;
  2380. }
  2381. cc += 1 + LINK_SIZE;
  2382. break;
  2383. case OP_KET:
  2384. if (PRIVATE_DATA(cc) != 0)
  2385. {
  2386. private_count = 1;
  2387. private_srcw[0] = PRIVATE_DATA(cc);
  2388. SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
  2389. cc += PRIVATE_DATA(cc + 1);
  2390. }
  2391. cc += 1 + LINK_SIZE;
  2392. break;
  2393. case OP_ASSERT:
  2394. case OP_ASSERT_NOT:
  2395. case OP_ASSERTBACK:
  2396. case OP_ASSERTBACK_NOT:
  2397. case OP_ASSERT_NA:
  2398. case OP_ASSERTBACK_NA:
  2399. case OP_ONCE:
  2400. case OP_SCRIPT_RUN:
  2401. case OP_BRAPOS:
  2402. case OP_SBRA:
  2403. case OP_SBRAPOS:
  2404. case OP_SCOND:
  2405. private_count = 1;
  2406. private_srcw[0] = PRIVATE_DATA(cc);
  2407. cc += 1 + LINK_SIZE;
  2408. break;
  2409. case OP_CBRA:
  2410. case OP_SCBRA:
  2411. offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
  2412. shared_srcw[0] = OVECTOR(offset);
  2413. shared_srcw[1] = OVECTOR(offset + 1);
  2414. shared_count = 2;
  2415. if (common->capture_last_ptr != 0 && !capture_last_found)
  2416. {
  2417. shared_srcw[2] = common->capture_last_ptr;
  2418. shared_count = 3;
  2419. capture_last_found = TRUE;
  2420. }
  2421. if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
  2422. {
  2423. private_count = 1;
  2424. private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
  2425. }
  2426. cc += 1 + LINK_SIZE + IMM2_SIZE;
  2427. break;
  2428. case OP_CBRAPOS:
  2429. case OP_SCBRAPOS:
  2430. offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
  2431. shared_srcw[0] = OVECTOR(offset);
  2432. shared_srcw[1] = OVECTOR(offset + 1);
  2433. shared_count = 2;
  2434. if (common->capture_last_ptr != 0 && !capture_last_found)
  2435. {
  2436. shared_srcw[2] = common->capture_last_ptr;
  2437. shared_count = 3;
  2438. capture_last_found = TRUE;
  2439. }
  2440. private_count = 2;
  2441. private_srcw[0] = PRIVATE_DATA(cc);
  2442. private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
  2443. cc += 1 + LINK_SIZE + IMM2_SIZE;
  2444. break;
  2445. case OP_COND:
  2446. /* Might be a hidden SCOND. */
  2447. alternative = cc + GET(cc, 1);
  2448. if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
  2449. {
  2450. private_count = 1;
  2451. private_srcw[0] = PRIVATE_DATA(cc);
  2452. }
  2453. cc += 1 + LINK_SIZE;
  2454. break;
  2455. CASE_ITERATOR_PRIVATE_DATA_1
  2456. if (PRIVATE_DATA(cc))
  2457. {
  2458. private_count = 1;
  2459. private_srcw[0] = PRIVATE_DATA(cc);
  2460. }
  2461. cc += 2;
  2462. #ifdef SUPPORT_UNICODE
  2463. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  2464. #endif
  2465. break;
  2466. CASE_ITERATOR_PRIVATE_DATA_2A
  2467. if (PRIVATE_DATA(cc))
  2468. {
  2469. private_count = 2;
  2470. private_srcw[0] = PRIVATE_DATA(cc);
  2471. private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
  2472. }
  2473. cc += 2;
  2474. #ifdef SUPPORT_UNICODE
  2475. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  2476. #endif
  2477. break;
  2478. CASE_ITERATOR_PRIVATE_DATA_2B
  2479. if (PRIVATE_DATA(cc))
  2480. {
  2481. private_count = 2;
  2482. private_srcw[0] = PRIVATE_DATA(cc);
  2483. private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
  2484. }
  2485. cc += 2 + IMM2_SIZE;
  2486. #ifdef SUPPORT_UNICODE
  2487. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  2488. #endif
  2489. break;
  2490. CASE_ITERATOR_TYPE_PRIVATE_DATA_1
  2491. if (PRIVATE_DATA(cc))
  2492. {
  2493. private_count = 1;
  2494. private_srcw[0] = PRIVATE_DATA(cc);
  2495. }
  2496. cc += 1;
  2497. break;
  2498. CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
  2499. if (PRIVATE_DATA(cc))
  2500. {
  2501. private_count = 2;
  2502. private_srcw[0] = PRIVATE_DATA(cc);
  2503. private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
  2504. }
  2505. cc += 1;
  2506. break;
  2507. CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
  2508. if (PRIVATE_DATA(cc))
  2509. {
  2510. private_count = 2;
  2511. private_srcw[0] = PRIVATE_DATA(cc);
  2512. private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
  2513. }
  2514. cc += 1 + IMM2_SIZE;
  2515. break;
  2516. case OP_CLASS:
  2517. case OP_NCLASS:
  2518. #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
  2519. case OP_XCLASS:
  2520. i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
  2521. #else
  2522. i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
  2523. #endif
  2524. if (PRIVATE_DATA(cc) != 0)
  2525. switch(get_class_iterator_size(cc + i))
  2526. {
  2527. case 1:
  2528. private_count = 1;
  2529. private_srcw[0] = PRIVATE_DATA(cc);
  2530. break;
  2531. case 2:
  2532. private_count = 2;
  2533. private_srcw[0] = PRIVATE_DATA(cc);
  2534. private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
  2535. break;
  2536. default:
  2537. SLJIT_UNREACHABLE();
  2538. break;
  2539. }
  2540. cc += i;
  2541. break;
  2542. case OP_MARK:
  2543. case OP_COMMIT_ARG:
  2544. case OP_PRUNE_ARG:
  2545. case OP_THEN_ARG:
  2546. SLJIT_ASSERT(common->mark_ptr != 0);
  2547. if (has_quit && !setmark_found)
  2548. {
  2549. kept_shared_srcw[0] = common->mark_ptr;
  2550. kept_shared_count = 1;
  2551. setmark_found = TRUE;
  2552. }
  2553. if (common->control_head_ptr != 0 && !control_head_found)
  2554. {
  2555. private_srcw[0] = common->control_head_ptr;
  2556. private_count = 1;
  2557. control_head_found = TRUE;
  2558. }
  2559. cc += 1 + 2 + cc[1];
  2560. break;
  2561. case OP_THEN:
  2562. SLJIT_ASSERT(common->control_head_ptr != 0);
  2563. if (!control_head_found)
  2564. {
  2565. private_srcw[0] = common->control_head_ptr;
  2566. private_count = 1;
  2567. control_head_found = TRUE;
  2568. }
  2569. cc++;
  2570. break;
  2571. default:
  2572. cc = next_opcode(common, cc);
  2573. SLJIT_ASSERT(cc != NULL);
  2574. break;
  2575. }
  2576. if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
  2577. {
  2578. SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
  2579. for (i = 0; i < private_count; i++)
  2580. {
  2581. SLJIT_ASSERT(private_srcw[i] != 0);
  2582. if (!from_sp)
  2583. delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
  2584. if (from_sp || type == recurse_swap_global)
  2585. delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
  2586. stackptr += sizeof(sljit_sw);
  2587. }
  2588. }
  2589. else
  2590. stackptr += sizeof(sljit_sw) * private_count;
  2591. if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
  2592. {
  2593. SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
  2594. for (i = 0; i < shared_count; i++)
  2595. {
  2596. SLJIT_ASSERT(shared_srcw[i] != 0);
  2597. if (!from_sp)
  2598. delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
  2599. if (from_sp || type == recurse_swap_global)
  2600. delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
  2601. stackptr += sizeof(sljit_sw);
  2602. }
  2603. }
  2604. else
  2605. stackptr += sizeof(sljit_sw) * shared_count;
  2606. if (type != recurse_copy_private_to_global && type != recurse_swap_global)
  2607. {
  2608. SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
  2609. for (i = 0; i < kept_shared_count; i++)
  2610. {
  2611. SLJIT_ASSERT(kept_shared_srcw[i] != 0);
  2612. if (!from_sp)
  2613. delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
  2614. if (from_sp || type == recurse_swap_global)
  2615. delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
  2616. stackptr += sizeof(sljit_sw);
  2617. }
  2618. }
  2619. else
  2620. stackptr += sizeof(sljit_sw) * kept_shared_count;
  2621. }
  2622. SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
  2623. delayed_mem_copy_finish(&status);
  2624. }
  2625. static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
  2626. {
  2627. PCRE2_SPTR end = bracketend(cc);
  2628. BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
  2629. /* Assert captures then. */
  2630. if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
  2631. current_offset = NULL;
  2632. /* Conditional block does not. */
  2633. if (*cc == OP_COND || *cc == OP_SCOND)
  2634. has_alternatives = FALSE;
  2635. cc = next_opcode(common, cc);
  2636. if (has_alternatives)
  2637. current_offset = common->then_offsets + (cc - common->start);
  2638. while (cc < end)
  2639. {
  2640. if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
  2641. cc = set_then_offsets(common, cc, current_offset);
  2642. else
  2643. {
  2644. if (*cc == OP_ALT && has_alternatives)
  2645. current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
  2646. if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
  2647. *current_offset = 1;
  2648. cc = next_opcode(common, cc);
  2649. }
  2650. }
  2651. return end;
  2652. }
  2653. #undef CASE_ITERATOR_PRIVATE_DATA_1
  2654. #undef CASE_ITERATOR_PRIVATE_DATA_2A
  2655. #undef CASE_ITERATOR_PRIVATE_DATA_2B
  2656. #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
  2657. #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
  2658. #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
  2659. static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
  2660. {
  2661. return (value & (value - 1)) == 0;
  2662. }
  2663. static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
  2664. {
  2665. while (list)
  2666. {
  2667. /* sljit_set_label is clever enough to do nothing
  2668. if either the jump or the label is NULL. */
  2669. SET_LABEL(list->jump, label);
  2670. list = list->next;
  2671. }
  2672. }
  2673. static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
  2674. {
  2675. jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
  2676. if (list_item)
  2677. {
  2678. list_item->next = *list;
  2679. list_item->jump = jump;
  2680. *list = list_item;
  2681. }
  2682. }
  2683. static void add_stub(compiler_common *common, struct sljit_jump *start)
  2684. {
  2685. DEFINE_COMPILER;
  2686. stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
  2687. if (list_item)
  2688. {
  2689. list_item->start = start;
  2690. list_item->quit = LABEL();
  2691. list_item->next = common->stubs;
  2692. common->stubs = list_item;
  2693. }
  2694. }
  2695. static void flush_stubs(compiler_common *common)
  2696. {
  2697. DEFINE_COMPILER;
  2698. stub_list *list_item = common->stubs;
  2699. while (list_item)
  2700. {
  2701. JUMPHERE(list_item->start);
  2702. add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
  2703. JUMPTO(SLJIT_JUMP, list_item->quit);
  2704. list_item = list_item->next;
  2705. }
  2706. common->stubs = NULL;
  2707. }
  2708. static SLJIT_INLINE void count_match(compiler_common *common)
  2709. {
  2710. DEFINE_COMPILER;
  2711. OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
  2712. add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
  2713. }
  2714. static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
  2715. {
  2716. /* May destroy all locals and registers except TMP2. */
  2717. DEFINE_COMPILER;
  2718. SLJIT_ASSERT(size > 0);
  2719. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
  2720. #ifdef DESTROY_REGISTERS
  2721. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
  2722. OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
  2723. OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
  2724. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
  2725. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
  2726. #endif
  2727. add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
  2728. }
  2729. static SLJIT_INLINE void free_stack(compiler_common *common, int size)
  2730. {
  2731. DEFINE_COMPILER;
  2732. SLJIT_ASSERT(size > 0);
  2733. OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
  2734. }
  2735. static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
  2736. {
  2737. DEFINE_COMPILER;
  2738. sljit_uw *result;
  2739. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  2740. return NULL;
  2741. result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
  2742. if (SLJIT_UNLIKELY(result == NULL))
  2743. {
  2744. sljit_set_compiler_memory_error(compiler);
  2745. return NULL;
  2746. }
  2747. *(void**)result = common->read_only_data_head;
  2748. common->read_only_data_head = (void *)result;
  2749. return result + 1;
  2750. }
  2751. static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
  2752. {
  2753. DEFINE_COMPILER;
  2754. struct sljit_label *loop;
  2755. sljit_s32 i;
  2756. /* At this point we can freely use all temporary registers. */
  2757. SLJIT_ASSERT(length > 1);
  2758. /* TMP1 returns with begin - 1. */
  2759. OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
  2760. if (length < 8)
  2761. {
  2762. for (i = 1; i < length; i++)
  2763. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
  2764. }
  2765. else
  2766. {
  2767. if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
  2768. {
  2769. GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
  2770. OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
  2771. loop = LABEL();
  2772. sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
  2773. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
  2774. JUMPTO(SLJIT_NOT_ZERO, loop);
  2775. }
  2776. else
  2777. {
  2778. GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
  2779. OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
  2780. loop = LABEL();
  2781. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
  2782. OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
  2783. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
  2784. JUMPTO(SLJIT_NOT_ZERO, loop);
  2785. }
  2786. }
  2787. }
  2788. static SLJIT_INLINE void reset_early_fail(compiler_common *common)
  2789. {
  2790. DEFINE_COMPILER;
  2791. sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
  2792. sljit_u32 uncleared_size;
  2793. sljit_s32 src = SLJIT_IMM;
  2794. sljit_s32 i;
  2795. struct sljit_label *loop;
  2796. SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
  2797. if (size == sizeof(sljit_sw))
  2798. {
  2799. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
  2800. return;
  2801. }
  2802. if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
  2803. {
  2804. OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
  2805. src = TMP3;
  2806. }
  2807. if (size <= 6 * sizeof(sljit_sw))
  2808. {
  2809. for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
  2810. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
  2811. return;
  2812. }
  2813. GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
  2814. uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
  2815. OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
  2816. loop = LABEL();
  2817. OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
  2818. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
  2819. OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0);
  2820. OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0);
  2821. CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
  2822. if (uncleared_size >= sizeof(sljit_sw))
  2823. OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
  2824. if (uncleared_size >= 2 * sizeof(sljit_sw))
  2825. OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
  2826. }
  2827. static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
  2828. {
  2829. DEFINE_COMPILER;
  2830. struct sljit_label *loop;
  2831. int i;
  2832. SLJIT_ASSERT(length > 1);
  2833. /* OVECTOR(1) contains the "string begin - 1" constant. */
  2834. if (length > 2)
  2835. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
  2836. if (length < 8)
  2837. {
  2838. for (i = 2; i < length; i++)
  2839. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
  2840. }
  2841. else
  2842. {
  2843. if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
  2844. {
  2845. GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
  2846. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
  2847. loop = LABEL();
  2848. sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
  2849. OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
  2850. JUMPTO(SLJIT_NOT_ZERO, loop);
  2851. }
  2852. else
  2853. {
  2854. GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
  2855. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
  2856. loop = LABEL();
  2857. OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
  2858. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
  2859. OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
  2860. JUMPTO(SLJIT_NOT_ZERO, loop);
  2861. }
  2862. }
  2863. if (!HAS_VIRTUAL_REGISTERS)
  2864. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
  2865. else
  2866. OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
  2867. if (common->mark_ptr != 0)
  2868. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
  2869. if (common->control_head_ptr != 0)
  2870. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
  2871. if (HAS_VIRTUAL_REGISTERS)
  2872. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
  2873. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
  2874. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
  2875. }
  2876. static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
  2877. {
  2878. while (current != NULL)
  2879. {
  2880. switch (current[1])
  2881. {
  2882. case type_then_trap:
  2883. break;
  2884. case type_mark:
  2885. if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
  2886. return current[3];
  2887. break;
  2888. default:
  2889. SLJIT_UNREACHABLE();
  2890. break;
  2891. }
  2892. SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
  2893. current = (sljit_sw*)current[0];
  2894. }
  2895. return 0;
  2896. }
  2897. static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
  2898. {
  2899. DEFINE_COMPILER;
  2900. struct sljit_label *loop;
  2901. BOOL has_pre;
  2902. /* At this point we can freely use all registers. */
  2903. OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
  2904. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
  2905. if (HAS_VIRTUAL_REGISTERS)
  2906. {
  2907. OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
  2908. OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
  2909. if (common->mark_ptr != 0)
  2910. OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
  2911. OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
  2912. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
  2913. if (common->mark_ptr != 0)
  2914. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
  2915. OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
  2916. SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
  2917. }
  2918. else
  2919. {
  2920. OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
  2921. OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
  2922. if (common->mark_ptr != 0)
  2923. OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
  2924. OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
  2925. OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
  2926. if (common->mark_ptr != 0)
  2927. OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
  2928. OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
  2929. }
  2930. has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
  2931. GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
  2932. OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
  2933. loop = LABEL();
  2934. if (has_pre)
  2935. sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
  2936. else
  2937. {
  2938. OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
  2939. OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
  2940. }
  2941. OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
  2942. OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
  2943. /* Copy the integer value to the output buffer */
  2944. #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
  2945. OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
  2946. #endif
  2947. SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
  2948. OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
  2949. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
  2950. JUMPTO(SLJIT_NOT_ZERO, loop);
  2951. /* Calculate the return value, which is the maximum ovector value. */
  2952. if (topbracket > 1)
  2953. {
  2954. if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
  2955. {
  2956. GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
  2957. OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
  2958. /* OVECTOR(0) is never equal to SLJIT_S2. */
  2959. loop = LABEL();
  2960. sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
  2961. OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
  2962. CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
  2963. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
  2964. }
  2965. else
  2966. {
  2967. GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
  2968. OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
  2969. /* OVECTOR(0) is never equal to SLJIT_S2. */
  2970. loop = LABEL();
  2971. OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
  2972. OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
  2973. OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
  2974. CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
  2975. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
  2976. }
  2977. }
  2978. else
  2979. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
  2980. }
  2981. static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
  2982. {
  2983. DEFINE_COMPILER;
  2984. sljit_s32 mov_opcode;
  2985. sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
  2986. SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
  2987. SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
  2988. && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
  2989. if (arguments_reg != ARGUMENTS)
  2990. OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
  2991. OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
  2992. common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
  2993. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
  2994. /* Store match begin and end. */
  2995. OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
  2996. OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
  2997. OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
  2998. mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
  2999. OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
  3000. #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
  3001. OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
  3002. #endif
  3003. OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
  3004. OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
  3005. #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
  3006. OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
  3007. #endif
  3008. OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
  3009. JUMPTO(SLJIT_JUMP, quit);
  3010. }
  3011. static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
  3012. {
  3013. /* May destroy TMP1. */
  3014. DEFINE_COMPILER;
  3015. struct sljit_jump *jump;
  3016. if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
  3017. {
  3018. /* The value of -1 must be kept for start_used_ptr! */
  3019. OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
  3020. /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
  3021. is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
  3022. jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
  3023. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
  3024. JUMPHERE(jump);
  3025. }
  3026. else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
  3027. {
  3028. jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
  3029. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
  3030. JUMPHERE(jump);
  3031. }
  3032. }
  3033. static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
  3034. {
  3035. /* Detects if the character has an othercase. */
  3036. unsigned int c;
  3037. #ifdef SUPPORT_UNICODE
  3038. if (common->utf || common->ucp)
  3039. {
  3040. if (common->utf)
  3041. {
  3042. GETCHAR(c, cc);
  3043. }
  3044. else
  3045. c = *cc;
  3046. if (c > 127)
  3047. return c != UCD_OTHERCASE(c);
  3048. return common->fcc[c] != c;
  3049. }
  3050. else
  3051. #endif
  3052. c = *cc;
  3053. return MAX_255(c) ? common->fcc[c] != c : FALSE;
  3054. }
  3055. static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
  3056. {
  3057. /* Returns with the othercase. */
  3058. #ifdef SUPPORT_UNICODE
  3059. if ((common->utf || common->ucp) && c > 127)
  3060. return UCD_OTHERCASE(c);
  3061. #endif
  3062. return TABLE_GET(c, common->fcc, c);
  3063. }
  3064. static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
  3065. {
  3066. /* Detects if the character and its othercase has only 1 bit difference. */
  3067. unsigned int c, oc, bit;
  3068. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
  3069. int n;
  3070. #endif
  3071. #ifdef SUPPORT_UNICODE
  3072. if (common->utf || common->ucp)
  3073. {
  3074. if (common->utf)
  3075. {
  3076. GETCHAR(c, cc);
  3077. }
  3078. else
  3079. c = *cc;
  3080. if (c <= 127)
  3081. oc = common->fcc[c];
  3082. else
  3083. oc = UCD_OTHERCASE(c);
  3084. }
  3085. else
  3086. {
  3087. c = *cc;
  3088. oc = TABLE_GET(c, common->fcc, c);
  3089. }
  3090. #else
  3091. c = *cc;
  3092. oc = TABLE_GET(c, common->fcc, c);
  3093. #endif
  3094. SLJIT_ASSERT(c != oc);
  3095. bit = c ^ oc;
  3096. /* Optimized for English alphabet. */
  3097. if (c <= 127 && bit == 0x20)
  3098. return (0 << 8) | 0x20;
  3099. /* Since c != oc, they must have at least 1 bit difference. */
  3100. if (!is_powerof2(bit))
  3101. return 0;
  3102. #if PCRE2_CODE_UNIT_WIDTH == 8
  3103. #ifdef SUPPORT_UNICODE
  3104. if (common->utf && c > 127)
  3105. {
  3106. n = GET_EXTRALEN(*cc);
  3107. while ((bit & 0x3f) == 0)
  3108. {
  3109. n--;
  3110. bit >>= 6;
  3111. }
  3112. return (n << 8) | bit;
  3113. }
  3114. #endif /* SUPPORT_UNICODE */
  3115. return (0 << 8) | bit;
  3116. #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
  3117. #ifdef SUPPORT_UNICODE
  3118. if (common->utf && c > 65535)
  3119. {
  3120. if (bit >= (1u << 10))
  3121. bit >>= 10;
  3122. else
  3123. return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
  3124. }
  3125. #endif /* SUPPORT_UNICODE */
  3126. return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
  3127. #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
  3128. }
  3129. static void check_partial(compiler_common *common, BOOL force)
  3130. {
  3131. /* Checks whether a partial matching is occurred. Does not modify registers. */
  3132. DEFINE_COMPILER;
  3133. struct sljit_jump *jump = NULL;
  3134. SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
  3135. if (common->mode == PCRE2_JIT_COMPLETE)
  3136. return;
  3137. if (!force && !common->allow_empty_partial)
  3138. jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
  3139. else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
  3140. jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
  3141. if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
  3142. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
  3143. else
  3144. {
  3145. if (common->partialmatchlabel != NULL)
  3146. JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
  3147. else
  3148. add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
  3149. }
  3150. if (jump != NULL)
  3151. JUMPHERE(jump);
  3152. }
  3153. static void check_str_end(compiler_common *common, jump_list **end_reached)
  3154. {
  3155. /* Does not affect registers. Usually used in a tight spot. */
  3156. DEFINE_COMPILER;
  3157. struct sljit_jump *jump;
  3158. if (common->mode == PCRE2_JIT_COMPLETE)
  3159. {
  3160. add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
  3161. return;
  3162. }
  3163. jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
  3164. if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
  3165. {
  3166. add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
  3167. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
  3168. add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
  3169. }
  3170. else
  3171. {
  3172. add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
  3173. if (common->partialmatchlabel != NULL)
  3174. JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
  3175. else
  3176. add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
  3177. }
  3178. JUMPHERE(jump);
  3179. }
  3180. static void detect_partial_match(compiler_common *common, jump_list **backtracks)
  3181. {
  3182. DEFINE_COMPILER;
  3183. struct sljit_jump *jump;
  3184. if (common->mode == PCRE2_JIT_COMPLETE)
  3185. {
  3186. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
  3187. return;
  3188. }
  3189. /* Partial matching mode. */
  3190. jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
  3191. if (!common->allow_empty_partial)
  3192. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
  3193. else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
  3194. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
  3195. if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
  3196. {
  3197. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
  3198. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  3199. }
  3200. else
  3201. {
  3202. if (common->partialmatchlabel != NULL)
  3203. JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
  3204. else
  3205. add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
  3206. }
  3207. JUMPHERE(jump);
  3208. }
  3209. static void process_partial_match(compiler_common *common)
  3210. {
  3211. DEFINE_COMPILER;
  3212. struct sljit_jump *jump;
  3213. /* Partial matching mode. */
  3214. if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
  3215. {
  3216. jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
  3217. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
  3218. JUMPHERE(jump);
  3219. }
  3220. else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
  3221. {
  3222. if (common->partialmatchlabel != NULL)
  3223. CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
  3224. else
  3225. add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
  3226. }
  3227. }
  3228. static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
  3229. {
  3230. DEFINE_COMPILER;
  3231. CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
  3232. process_partial_match(common);
  3233. }
  3234. static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
  3235. {
  3236. /* Reads the character into TMP1, keeps STR_PTR.
  3237. Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
  3238. DEFINE_COMPILER;
  3239. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  3240. struct sljit_jump *jump;
  3241. #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
  3242. SLJIT_UNUSED_ARG(max);
  3243. SLJIT_UNUSED_ARG(dst);
  3244. SLJIT_UNUSED_ARG(dstw);
  3245. SLJIT_UNUSED_ARG(backtracks);
  3246. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  3247. #ifdef SUPPORT_UNICODE
  3248. #if PCRE2_CODE_UNIT_WIDTH == 8
  3249. if (common->utf)
  3250. {
  3251. if (max < 128) return;
  3252. jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
  3253. OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
  3254. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3255. add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
  3256. OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
  3257. if (backtracks && common->invalid_utf)
  3258. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
  3259. JUMPHERE(jump);
  3260. }
  3261. #elif PCRE2_CODE_UNIT_WIDTH == 16
  3262. if (common->utf)
  3263. {
  3264. if (max < 0xd800) return;
  3265. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  3266. if (common->invalid_utf)
  3267. {
  3268. jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
  3269. OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
  3270. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3271. add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
  3272. OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
  3273. if (backtracks && common->invalid_utf)
  3274. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
  3275. }
  3276. else
  3277. {
  3278. /* TMP2 contains the high surrogate. */
  3279. jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
  3280. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  3281. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
  3282. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
  3283. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  3284. }
  3285. JUMPHERE(jump);
  3286. }
  3287. #elif PCRE2_CODE_UNIT_WIDTH == 32
  3288. if (common->invalid_utf)
  3289. {
  3290. if (max < 0xd800) return;
  3291. if (backtracks != NULL)
  3292. {
  3293. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  3294. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
  3295. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
  3296. }
  3297. else
  3298. {
  3299. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  3300. OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
  3301. CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
  3302. OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
  3303. CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
  3304. }
  3305. }
  3306. #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
  3307. #endif /* SUPPORT_UNICODE */
  3308. }
  3309. static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
  3310. {
  3311. /* Reads one character back without moving STR_PTR. TMP2 must
  3312. contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
  3313. DEFINE_COMPILER;
  3314. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  3315. struct sljit_jump *jump;
  3316. #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
  3317. SLJIT_UNUSED_ARG(max);
  3318. SLJIT_UNUSED_ARG(backtracks);
  3319. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
  3320. #ifdef SUPPORT_UNICODE
  3321. #if PCRE2_CODE_UNIT_WIDTH == 8
  3322. if (common->utf)
  3323. {
  3324. if (max < 128) return;
  3325. jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
  3326. if (common->invalid_utf)
  3327. {
  3328. add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
  3329. if (backtracks != NULL)
  3330. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
  3331. }
  3332. else
  3333. add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
  3334. JUMPHERE(jump);
  3335. }
  3336. #elif PCRE2_CODE_UNIT_WIDTH == 16
  3337. if (common->utf)
  3338. {
  3339. if (max < 0xd800) return;
  3340. if (common->invalid_utf)
  3341. {
  3342. jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
  3343. add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
  3344. if (backtracks != NULL)
  3345. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
  3346. }
  3347. else
  3348. {
  3349. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
  3350. jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
  3351. /* TMP2 contains the low surrogate. */
  3352. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
  3353. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
  3354. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  3355. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
  3356. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  3357. }
  3358. JUMPHERE(jump);
  3359. }
  3360. #elif PCRE2_CODE_UNIT_WIDTH == 32
  3361. if (common->invalid_utf)
  3362. {
  3363. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  3364. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
  3365. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
  3366. }
  3367. #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
  3368. #endif /* SUPPORT_UNICODE */
  3369. }
  3370. #define READ_CHAR_UPDATE_STR_PTR 0x1
  3371. #define READ_CHAR_UTF8_NEWLINE 0x2
  3372. #define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
  3373. #define READ_CHAR_VALID_UTF 0x4
  3374. static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
  3375. jump_list **backtracks, sljit_u32 options)
  3376. {
  3377. /* Reads the precise value of a character into TMP1, if the character is
  3378. between min and max (c >= min && c <= max). Otherwise it returns with a value
  3379. outside the range. Does not check STR_END. */
  3380. DEFINE_COMPILER;
  3381. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  3382. struct sljit_jump *jump;
  3383. #endif
  3384. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
  3385. struct sljit_jump *jump2;
  3386. #endif
  3387. SLJIT_UNUSED_ARG(min);
  3388. SLJIT_UNUSED_ARG(max);
  3389. SLJIT_UNUSED_ARG(backtracks);
  3390. SLJIT_UNUSED_ARG(options);
  3391. SLJIT_ASSERT(min <= max);
  3392. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  3393. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3394. #ifdef SUPPORT_UNICODE
  3395. #if PCRE2_CODE_UNIT_WIDTH == 8
  3396. if (common->utf)
  3397. {
  3398. if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
  3399. if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
  3400. {
  3401. jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
  3402. if (options & READ_CHAR_UTF8_NEWLINE)
  3403. add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
  3404. else
  3405. add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
  3406. if (backtracks != NULL)
  3407. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
  3408. JUMPHERE(jump);
  3409. return;
  3410. }
  3411. jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
  3412. if (min >= 0x10000)
  3413. {
  3414. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
  3415. if (options & READ_CHAR_UPDATE_STR_PTR)
  3416. OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  3417. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  3418. jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
  3419. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
  3420. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
  3421. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  3422. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  3423. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  3424. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
  3425. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  3426. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
  3427. if (!(options & READ_CHAR_UPDATE_STR_PTR))
  3428. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
  3429. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  3430. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
  3431. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  3432. JUMPHERE(jump2);
  3433. if (options & READ_CHAR_UPDATE_STR_PTR)
  3434. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
  3435. }
  3436. else if (min >= 0x800 && max <= 0xffff)
  3437. {
  3438. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
  3439. if (options & READ_CHAR_UPDATE_STR_PTR)
  3440. OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  3441. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  3442. jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
  3443. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
  3444. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
  3445. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  3446. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  3447. if (!(options & READ_CHAR_UPDATE_STR_PTR))
  3448. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  3449. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  3450. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
  3451. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  3452. JUMPHERE(jump2);
  3453. if (options & READ_CHAR_UPDATE_STR_PTR)
  3454. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
  3455. }
  3456. else if (max >= 0x800)
  3457. {
  3458. add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
  3459. }
  3460. else if (max < 128)
  3461. {
  3462. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  3463. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  3464. }
  3465. else
  3466. {
  3467. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  3468. if (!(options & READ_CHAR_UPDATE_STR_PTR))
  3469. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3470. else
  3471. OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  3472. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
  3473. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  3474. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
  3475. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  3476. if (options & READ_CHAR_UPDATE_STR_PTR)
  3477. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
  3478. }
  3479. JUMPHERE(jump);
  3480. }
  3481. #elif PCRE2_CODE_UNIT_WIDTH == 16
  3482. if (common->utf)
  3483. {
  3484. if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
  3485. if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
  3486. {
  3487. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  3488. jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
  3489. if (options & READ_CHAR_UTF8_NEWLINE)
  3490. add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
  3491. else
  3492. add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
  3493. if (backtracks != NULL)
  3494. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
  3495. JUMPHERE(jump);
  3496. return;
  3497. }
  3498. if (max >= 0x10000)
  3499. {
  3500. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  3501. jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
  3502. /* TMP2 contains the high surrogate. */
  3503. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  3504. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
  3505. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3506. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
  3507. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  3508. JUMPHERE(jump);
  3509. return;
  3510. }
  3511. /* Skip low surrogate if necessary. */
  3512. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  3513. if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
  3514. {
  3515. if (options & READ_CHAR_UPDATE_STR_PTR)
  3516. OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3517. OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
  3518. if (options & READ_CHAR_UPDATE_STR_PTR)
  3519. CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
  3520. if (max >= 0xd800)
  3521. CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
  3522. }
  3523. else
  3524. {
  3525. jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
  3526. if (options & READ_CHAR_UPDATE_STR_PTR)
  3527. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3528. if (max >= 0xd800)
  3529. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
  3530. JUMPHERE(jump);
  3531. }
  3532. }
  3533. #elif PCRE2_CODE_UNIT_WIDTH == 32
  3534. if (common->invalid_utf)
  3535. {
  3536. if (backtracks != NULL)
  3537. {
  3538. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  3539. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
  3540. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
  3541. }
  3542. else
  3543. {
  3544. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  3545. OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
  3546. CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
  3547. OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
  3548. CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
  3549. }
  3550. }
  3551. #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
  3552. #endif /* SUPPORT_UNICODE */
  3553. }
  3554. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
  3555. static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
  3556. {
  3557. /* Tells whether the character codes below 128 are enough
  3558. to determine a match. */
  3559. const sljit_u8 value = nclass ? 0xff : 0;
  3560. const sljit_u8 *end = bitset + 32;
  3561. bitset += 16;
  3562. do
  3563. {
  3564. if (*bitset++ != value)
  3565. return FALSE;
  3566. }
  3567. while (bitset < end);
  3568. return TRUE;
  3569. }
  3570. static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
  3571. {
  3572. /* Reads the precise character type of a character into TMP1, if the character
  3573. is less than 128. Otherwise it returns with zero. Does not check STR_END. The
  3574. full_read argument tells whether characters above max are accepted or not. */
  3575. DEFINE_COMPILER;
  3576. struct sljit_jump *jump;
  3577. SLJIT_ASSERT(common->utf);
  3578. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
  3579. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3580. /* All values > 127 are zero in ctypes. */
  3581. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
  3582. if (negated)
  3583. {
  3584. jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
  3585. if (common->invalid_utf)
  3586. {
  3587. add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
  3588. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
  3589. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
  3590. }
  3591. else
  3592. {
  3593. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  3594. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  3595. }
  3596. JUMPHERE(jump);
  3597. }
  3598. }
  3599. #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
  3600. static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
  3601. {
  3602. /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
  3603. DEFINE_COMPILER;
  3604. #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
  3605. struct sljit_jump *jump;
  3606. #endif
  3607. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
  3608. struct sljit_jump *jump2;
  3609. #endif
  3610. SLJIT_UNUSED_ARG(backtracks);
  3611. SLJIT_UNUSED_ARG(negated);
  3612. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
  3613. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3614. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
  3615. if (common->utf)
  3616. {
  3617. /* The result of this read may be unused, but saves an "else" part. */
  3618. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
  3619. jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
  3620. if (!negated)
  3621. {
  3622. if (common->invalid_utf)
  3623. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
  3624. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  3625. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3626. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
  3627. if (common->invalid_utf)
  3628. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
  3629. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
  3630. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
  3631. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
  3632. if (common->invalid_utf)
  3633. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
  3634. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
  3635. jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
  3636. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
  3637. JUMPHERE(jump2);
  3638. }
  3639. else if (common->invalid_utf)
  3640. {
  3641. add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
  3642. OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
  3643. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
  3644. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
  3645. jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
  3646. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
  3647. JUMPHERE(jump2);
  3648. }
  3649. else
  3650. add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
  3651. JUMPHERE(jump);
  3652. return;
  3653. }
  3654. #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
  3655. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
  3656. if (common->invalid_utf && negated)
  3657. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
  3658. #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
  3659. #if PCRE2_CODE_UNIT_WIDTH != 8
  3660. /* The ctypes array contains only 256 values. */
  3661. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
  3662. jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
  3663. #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
  3664. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
  3665. #if PCRE2_CODE_UNIT_WIDTH != 8
  3666. JUMPHERE(jump);
  3667. #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
  3668. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
  3669. if (common->utf && negated)
  3670. {
  3671. /* Skip low surrogate if necessary. */
  3672. if (!common->invalid_utf)
  3673. {
  3674. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
  3675. if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
  3676. {
  3677. OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3678. OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
  3679. CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
  3680. }
  3681. else
  3682. {
  3683. jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
  3684. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3685. JUMPHERE(jump);
  3686. }
  3687. return;
  3688. }
  3689. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
  3690. jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
  3691. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
  3692. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
  3693. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  3694. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3695. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
  3696. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
  3697. JUMPHERE(jump);
  3698. return;
  3699. }
  3700. #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
  3701. }
  3702. static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
  3703. {
  3704. /* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
  3705. TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
  3706. and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
  3707. DEFINE_COMPILER;
  3708. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  3709. struct sljit_jump *jump;
  3710. #endif
  3711. #ifdef SUPPORT_UNICODE
  3712. #if PCRE2_CODE_UNIT_WIDTH == 8
  3713. struct sljit_label *label;
  3714. if (common->utf)
  3715. {
  3716. if (!must_be_valid && common->invalid_utf)
  3717. {
  3718. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
  3719. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3720. jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
  3721. add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
  3722. if (backtracks != NULL)
  3723. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
  3724. JUMPHERE(jump);
  3725. return;
  3726. }
  3727. label = LABEL();
  3728. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
  3729. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3730. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
  3731. CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
  3732. return;
  3733. }
  3734. #elif PCRE2_CODE_UNIT_WIDTH == 16
  3735. if (common->utf)
  3736. {
  3737. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
  3738. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3739. if (!must_be_valid && common->invalid_utf)
  3740. {
  3741. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  3742. jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
  3743. add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
  3744. if (backtracks != NULL)
  3745. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
  3746. JUMPHERE(jump);
  3747. return;
  3748. }
  3749. /* Skip low surrogate if necessary. */
  3750. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
  3751. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
  3752. OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
  3753. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
  3754. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  3755. return;
  3756. }
  3757. #elif PCRE2_CODE_UNIT_WIDTH == 32
  3758. if (common->invalid_utf && !must_be_valid)
  3759. {
  3760. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
  3761. if (backtracks != NULL)
  3762. {
  3763. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
  3764. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3765. return;
  3766. }
  3767. OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
  3768. OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
  3769. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
  3770. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  3771. return;
  3772. }
  3773. #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
  3774. #endif /* SUPPORT_UNICODE */
  3775. SLJIT_UNUSED_ARG(backtracks);
  3776. SLJIT_UNUSED_ARG(must_be_valid);
  3777. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3778. }
  3779. static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
  3780. {
  3781. /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
  3782. DEFINE_COMPILER;
  3783. struct sljit_jump *jump;
  3784. if (nltype == NLTYPE_ANY)
  3785. {
  3786. add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
  3787. sljit_set_current_flags(compiler, SLJIT_SET_Z);
  3788. add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
  3789. }
  3790. else if (nltype == NLTYPE_ANYCRLF)
  3791. {
  3792. if (jumpifmatch)
  3793. {
  3794. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
  3795. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
  3796. }
  3797. else
  3798. {
  3799. jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
  3800. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
  3801. JUMPHERE(jump);
  3802. }
  3803. }
  3804. else
  3805. {
  3806. SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
  3807. add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
  3808. }
  3809. }
  3810. #ifdef SUPPORT_UNICODE
  3811. #if PCRE2_CODE_UNIT_WIDTH == 8
  3812. static void do_utfreadchar(compiler_common *common)
  3813. {
  3814. /* Fast decoding a UTF-8 character. TMP1 contains the first byte
  3815. of the character (>= 0xc0). Return char value in TMP1. */
  3816. DEFINE_COMPILER;
  3817. struct sljit_jump *jump;
  3818. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  3819. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  3820. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  3821. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
  3822. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  3823. /* Searching for the first zero. */
  3824. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
  3825. jump = JUMP(SLJIT_NOT_ZERO);
  3826. /* Two byte sequence. */
  3827. OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
  3828. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3829. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  3830. JUMPHERE(jump);
  3831. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  3832. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  3833. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
  3834. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  3835. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
  3836. jump = JUMP(SLJIT_NOT_ZERO);
  3837. /* Three byte sequence. */
  3838. OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
  3839. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  3840. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  3841. /* Four byte sequence. */
  3842. JUMPHERE(jump);
  3843. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
  3844. OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
  3845. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
  3846. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  3847. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
  3848. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  3849. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  3850. }
  3851. static void do_utfreadtype8(compiler_common *common)
  3852. {
  3853. /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
  3854. of the character (>= 0xc0). Return value in TMP1. */
  3855. DEFINE_COMPILER;
  3856. struct sljit_jump *jump;
  3857. struct sljit_jump *compare;
  3858. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  3859. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
  3860. jump = JUMP(SLJIT_NOT_ZERO);
  3861. /* Two byte sequence. */
  3862. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  3863. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3864. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
  3865. /* The upper 5 bits are known at this point. */
  3866. compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
  3867. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
  3868. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
  3869. OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
  3870. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
  3871. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  3872. JUMPHERE(compare);
  3873. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
  3874. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  3875. /* We only have types for characters less than 256. */
  3876. JUMPHERE(jump);
  3877. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  3878. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
  3879. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  3880. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  3881. }
  3882. static void do_utfreadchar_invalid(compiler_common *common)
  3883. {
  3884. /* Slow decoding a UTF-8 character. TMP1 contains the first byte
  3885. of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
  3886. undefined for invalid characters. */
  3887. DEFINE_COMPILER;
  3888. sljit_s32 i;
  3889. sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
  3890. struct sljit_jump *jump;
  3891. struct sljit_jump *buffer_end_close;
  3892. struct sljit_label *three_byte_entry;
  3893. struct sljit_label *exit_invalid_label;
  3894. struct sljit_jump *exit_invalid[11];
  3895. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  3896. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
  3897. /* Usually more than 3 characters remained in the subject buffer. */
  3898. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
  3899. /* Not a valid start of a multi-byte sequence, no more bytes read. */
  3900. exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
  3901. buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
  3902. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
  3903. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  3904. /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
  3905. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  3906. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
  3907. exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
  3908. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
  3909. jump = JUMP(SLJIT_NOT_ZERO);
  3910. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  3911. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  3912. JUMPHERE(jump);
  3913. /* Three-byte sequence. */
  3914. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
  3915. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  3916. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
  3917. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  3918. if (has_cmov)
  3919. {
  3920. OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
  3921. CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
  3922. exit_invalid[2] = NULL;
  3923. }
  3924. else
  3925. exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
  3926. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
  3927. jump = JUMP(SLJIT_NOT_ZERO);
  3928. three_byte_entry = LABEL();
  3929. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
  3930. if (has_cmov)
  3931. {
  3932. OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
  3933. CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
  3934. exit_invalid[3] = NULL;
  3935. }
  3936. else
  3937. exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
  3938. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  3939. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3940. if (has_cmov)
  3941. {
  3942. OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
  3943. CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
  3944. exit_invalid[4] = NULL;
  3945. }
  3946. else
  3947. exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
  3948. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  3949. JUMPHERE(jump);
  3950. /* Four-byte sequence. */
  3951. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
  3952. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  3953. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
  3954. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  3955. if (has_cmov)
  3956. {
  3957. OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
  3958. CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
  3959. exit_invalid[5] = NULL;
  3960. }
  3961. else
  3962. exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
  3963. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
  3964. if (has_cmov)
  3965. {
  3966. OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
  3967. CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
  3968. exit_invalid[6] = NULL;
  3969. }
  3970. else
  3971. exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
  3972. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
  3973. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  3974. JUMPHERE(buffer_end_close);
  3975. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  3976. exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
  3977. /* Two-byte sequence. */
  3978. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
  3979. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  3980. /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
  3981. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  3982. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
  3983. exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
  3984. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
  3985. jump = JUMP(SLJIT_NOT_ZERO);
  3986. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  3987. /* Three-byte sequence. */
  3988. JUMPHERE(jump);
  3989. exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  3990. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  3991. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  3992. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
  3993. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  3994. if (has_cmov)
  3995. {
  3996. OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
  3997. CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
  3998. exit_invalid[10] = NULL;
  3999. }
  4000. else
  4001. exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
  4002. /* One will be substracted from STR_PTR later. */
  4003. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  4004. /* Four byte sequences are not possible. */
  4005. CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
  4006. exit_invalid_label = LABEL();
  4007. for (i = 0; i < 11; i++)
  4008. sljit_set_label(exit_invalid[i], exit_invalid_label);
  4009. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
  4010. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4011. }
  4012. static void do_utfreadnewline_invalid(compiler_common *common)
  4013. {
  4014. /* Slow decoding a UTF-8 character, specialized for newlines.
  4015. TMP1 contains the first byte of the character (>= 0xc0). Return
  4016. char value in TMP1. */
  4017. DEFINE_COMPILER;
  4018. struct sljit_label *loop;
  4019. struct sljit_label *skip_start;
  4020. struct sljit_label *three_byte_exit;
  4021. struct sljit_jump *jump[5];
  4022. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  4023. if (common->nltype != NLTYPE_ANY)
  4024. {
  4025. SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
  4026. /* All newlines are ascii, just skip intermediate octets. */
  4027. jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  4028. loop = LABEL();
  4029. if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
  4030. sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  4031. else
  4032. {
  4033. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  4034. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4035. }
  4036. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
  4037. CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
  4038. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4039. JUMPHERE(jump[0]);
  4040. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
  4041. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4042. return;
  4043. }
  4044. jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  4045. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  4046. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4047. jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
  4048. jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
  4049. skip_start = LABEL();
  4050. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
  4051. jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
  4052. /* Skip intermediate octets. */
  4053. loop = LABEL();
  4054. jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  4055. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  4056. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4057. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
  4058. CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
  4059. JUMPHERE(jump[3]);
  4060. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4061. three_byte_exit = LABEL();
  4062. JUMPHERE(jump[0]);
  4063. JUMPHERE(jump[4]);
  4064. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
  4065. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4066. /* Two byte long newline: 0x85. */
  4067. JUMPHERE(jump[1]);
  4068. CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
  4069. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
  4070. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4071. /* Three byte long newlines: 0x2028 and 0x2029. */
  4072. JUMPHERE(jump[2]);
  4073. CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
  4074. CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
  4075. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  4076. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4077. OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
  4078. CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
  4079. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
  4080. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  4081. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4082. }
  4083. static void do_utfmoveback_invalid(compiler_common *common)
  4084. {
  4085. /* Goes one character back. */
  4086. DEFINE_COMPILER;
  4087. sljit_s32 i;
  4088. struct sljit_jump *jump;
  4089. struct sljit_jump *buffer_start_close;
  4090. struct sljit_label *exit_ok_label;
  4091. struct sljit_label *exit_invalid_label;
  4092. struct sljit_jump *exit_invalid[7];
  4093. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  4094. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
  4095. exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
  4096. /* Two-byte sequence. */
  4097. buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
  4098. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
  4099. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
  4100. jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
  4101. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
  4102. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  4103. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4104. /* Three-byte sequence. */
  4105. JUMPHERE(jump);
  4106. exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
  4107. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  4108. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
  4109. jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
  4110. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
  4111. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4112. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4113. /* Four-byte sequence. */
  4114. JUMPHERE(jump);
  4115. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
  4116. exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
  4117. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  4118. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
  4119. exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
  4120. exit_ok_label = LABEL();
  4121. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
  4122. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4123. /* Two-byte sequence. */
  4124. JUMPHERE(buffer_start_close);
  4125. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  4126. exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
  4127. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  4128. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
  4129. CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
  4130. /* Three-byte sequence. */
  4131. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4132. exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
  4133. exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
  4134. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  4135. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
  4136. CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
  4137. /* Four-byte sequences are not possible. */
  4138. exit_invalid_label = LABEL();
  4139. sljit_set_label(exit_invalid[5], exit_invalid_label);
  4140. sljit_set_label(exit_invalid[6], exit_invalid_label);
  4141. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
  4142. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
  4143. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4144. JUMPHERE(exit_invalid[4]);
  4145. /* -2 + 4 = 2 */
  4146. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  4147. exit_invalid_label = LABEL();
  4148. for (i = 0; i < 4; i++)
  4149. sljit_set_label(exit_invalid[i], exit_invalid_label);
  4150. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
  4151. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
  4152. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4153. }
  4154. static void do_utfpeakcharback(compiler_common *common)
  4155. {
  4156. /* Peak a character back. Does not modify STR_PTR. */
  4157. DEFINE_COMPILER;
  4158. struct sljit_jump *jump[2];
  4159. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  4160. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
  4161. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
  4162. jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
  4163. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
  4164. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
  4165. jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
  4166. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
  4167. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
  4168. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
  4169. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
  4170. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  4171. JUMPHERE(jump[1]);
  4172. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
  4173. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  4174. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
  4175. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  4176. JUMPHERE(jump[0]);
  4177. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
  4178. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  4179. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
  4180. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  4181. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4182. }
  4183. static void do_utfpeakcharback_invalid(compiler_common *common)
  4184. {
  4185. /* Peak a character back. Does not modify STR_PTR. */
  4186. DEFINE_COMPILER;
  4187. sljit_s32 i;
  4188. sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
  4189. struct sljit_jump *jump[2];
  4190. struct sljit_label *two_byte_entry;
  4191. struct sljit_label *three_byte_entry;
  4192. struct sljit_label *exit_invalid_label;
  4193. struct sljit_jump *exit_invalid[8];
  4194. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  4195. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
  4196. exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
  4197. jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
  4198. /* Two-byte sequence. */
  4199. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
  4200. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
  4201. jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
  4202. two_byte_entry = LABEL();
  4203. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
  4204. /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
  4205. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  4206. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4207. JUMPHERE(jump[1]);
  4208. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
  4209. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
  4210. exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
  4211. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
  4212. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  4213. /* Three-byte sequence. */
  4214. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
  4215. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
  4216. jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
  4217. three_byte_entry = LABEL();
  4218. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
  4219. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  4220. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  4221. if (has_cmov)
  4222. {
  4223. OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
  4224. CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
  4225. exit_invalid[2] = NULL;
  4226. }
  4227. else
  4228. exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
  4229. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  4230. if (has_cmov)
  4231. {
  4232. OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
  4233. CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
  4234. exit_invalid[3] = NULL;
  4235. }
  4236. else
  4237. exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
  4238. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4239. JUMPHERE(jump[1]);
  4240. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
  4241. exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
  4242. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
  4243. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  4244. /* Four-byte sequence. */
  4245. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
  4246. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
  4247. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
  4248. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
  4249. /* ADD is used instead of OR because of the SUB 0x10000 above. */
  4250. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  4251. if (has_cmov)
  4252. {
  4253. OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
  4254. CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
  4255. exit_invalid[5] = NULL;
  4256. }
  4257. else
  4258. exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
  4259. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
  4260. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4261. JUMPHERE(jump[0]);
  4262. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
  4263. jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
  4264. /* Two-byte sequence. */
  4265. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
  4266. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
  4267. CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
  4268. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
  4269. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
  4270. exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
  4271. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
  4272. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  4273. /* Three-byte sequence. */
  4274. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
  4275. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
  4276. CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
  4277. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
  4278. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4279. JUMPHERE(jump[0]);
  4280. exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
  4281. /* Two-byte sequence. */
  4282. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
  4283. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
  4284. CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
  4285. exit_invalid_label = LABEL();
  4286. for (i = 0; i < 8; i++)
  4287. sljit_set_label(exit_invalid[i], exit_invalid_label);
  4288. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
  4289. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4290. }
  4291. #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
  4292. #if PCRE2_CODE_UNIT_WIDTH == 16
  4293. static void do_utfreadchar_invalid(compiler_common *common)
  4294. {
  4295. /* Slow decoding a UTF-16 character. TMP1 contains the first half
  4296. of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
  4297. undefined for invalid characters. */
  4298. DEFINE_COMPILER;
  4299. struct sljit_jump *exit_invalid[3];
  4300. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  4301. /* TMP2 contains the high surrogate. */
  4302. exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
  4303. exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  4304. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  4305. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
  4306. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4307. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
  4308. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
  4309. exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
  4310. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  4311. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4312. JUMPHERE(exit_invalid[0]);
  4313. JUMPHERE(exit_invalid[1]);
  4314. JUMPHERE(exit_invalid[2]);
  4315. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
  4316. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4317. }
  4318. static void do_utfreadnewline_invalid(compiler_common *common)
  4319. {
  4320. /* Slow decoding a UTF-16 character, specialized for newlines.
  4321. TMP1 contains the first half of the character (>= 0xd800). Return
  4322. char value in TMP1. */
  4323. DEFINE_COMPILER;
  4324. struct sljit_jump *exit_invalid[2];
  4325. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  4326. /* TMP2 contains the high surrogate. */
  4327. exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  4328. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  4329. exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
  4330. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
  4331. OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
  4332. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
  4333. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
  4334. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
  4335. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  4336. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4337. JUMPHERE(exit_invalid[0]);
  4338. JUMPHERE(exit_invalid[1]);
  4339. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
  4340. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4341. }
  4342. static void do_utfmoveback_invalid(compiler_common *common)
  4343. {
  4344. /* Goes one character back. */
  4345. DEFINE_COMPILER;
  4346. struct sljit_jump *exit_invalid[3];
  4347. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  4348. exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
  4349. exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
  4350. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
  4351. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  4352. exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
  4353. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4354. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
  4355. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4356. JUMPHERE(exit_invalid[0]);
  4357. JUMPHERE(exit_invalid[1]);
  4358. JUMPHERE(exit_invalid[2]);
  4359. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4360. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
  4361. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4362. }
  4363. static void do_utfpeakcharback_invalid(compiler_common *common)
  4364. {
  4365. /* Peak a character back. Does not modify STR_PTR. */
  4366. DEFINE_COMPILER;
  4367. struct sljit_jump *jump;
  4368. struct sljit_jump *exit_invalid[3];
  4369. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  4370. jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
  4371. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
  4372. exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
  4373. exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
  4374. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
  4375. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
  4376. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
  4377. exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
  4378. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
  4379. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  4380. JUMPHERE(jump);
  4381. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4382. JUMPHERE(exit_invalid[0]);
  4383. JUMPHERE(exit_invalid[1]);
  4384. JUMPHERE(exit_invalid[2]);
  4385. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
  4386. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4387. }
  4388. #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
  4389. /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
  4390. #define UCD_BLOCK_MASK 127
  4391. #define UCD_BLOCK_SHIFT 7
  4392. static void do_getucd(compiler_common *common)
  4393. {
  4394. /* Search the UCD record for the character comes in TMP1.
  4395. Returns chartype in TMP1 and UCD offset in TMP2. */
  4396. DEFINE_COMPILER;
  4397. #if PCRE2_CODE_UNIT_WIDTH == 32
  4398. struct sljit_jump *jump;
  4399. #endif
  4400. #if defined SLJIT_DEBUG && SLJIT_DEBUG
  4401. /* dummy_ucd_record */
  4402. const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
  4403. SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
  4404. SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
  4405. #endif
  4406. SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
  4407. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  4408. #if PCRE2_CODE_UNIT_WIDTH == 32
  4409. if (!common->utf)
  4410. {
  4411. jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
  4412. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
  4413. JUMPHERE(jump);
  4414. }
  4415. #endif
  4416. OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
  4417. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
  4418. OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
  4419. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
  4420. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
  4421. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  4422. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
  4423. OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
  4424. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4425. }
  4426. static void do_getucdtype(compiler_common *common)
  4427. {
  4428. /* Search the UCD record for the character comes in TMP1.
  4429. Returns chartype in TMP1 and UCD offset in TMP2. */
  4430. DEFINE_COMPILER;
  4431. #if PCRE2_CODE_UNIT_WIDTH == 32
  4432. struct sljit_jump *jump;
  4433. #endif
  4434. #if defined SLJIT_DEBUG && SLJIT_DEBUG
  4435. /* dummy_ucd_record */
  4436. const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
  4437. SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
  4438. SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
  4439. #endif
  4440. SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
  4441. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  4442. #if PCRE2_CODE_UNIT_WIDTH == 32
  4443. if (!common->utf)
  4444. {
  4445. jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
  4446. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
  4447. JUMPHERE(jump);
  4448. }
  4449. #endif
  4450. OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
  4451. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
  4452. OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
  4453. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
  4454. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
  4455. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  4456. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
  4457. OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
  4458. /* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
  4459. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
  4460. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
  4461. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  4462. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
  4463. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  4464. }
  4465. #endif /* SUPPORT_UNICODE */
  4466. static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
  4467. {
  4468. DEFINE_COMPILER;
  4469. struct sljit_label *mainloop;
  4470. struct sljit_label *newlinelabel = NULL;
  4471. struct sljit_jump *start;
  4472. struct sljit_jump *end = NULL;
  4473. struct sljit_jump *end2 = NULL;
  4474. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  4475. struct sljit_label *loop;
  4476. struct sljit_jump *jump;
  4477. #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
  4478. jump_list *newline = NULL;
  4479. sljit_u32 overall_options = common->re->overall_options;
  4480. BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
  4481. BOOL newlinecheck = FALSE;
  4482. BOOL readuchar = FALSE;
  4483. if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
  4484. && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
  4485. newlinecheck = TRUE;
  4486. SLJIT_ASSERT(common->abort_label == NULL);
  4487. if ((overall_options & PCRE2_FIRSTLINE) != 0)
  4488. {
  4489. /* Search for the end of the first line. */
  4490. SLJIT_ASSERT(common->match_end_ptr != 0);
  4491. OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
  4492. if (common->nltype == NLTYPE_FIXED && common->newline > 255)
  4493. {
  4494. mainloop = LABEL();
  4495. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4496. end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  4497. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
  4498. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  4499. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
  4500. CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
  4501. JUMPHERE(end);
  4502. OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4503. }
  4504. else
  4505. {
  4506. end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  4507. mainloop = LABEL();
  4508. /* Continual stores does not cause data dependency. */
  4509. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
  4510. read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
  4511. check_newlinechar(common, common->nltype, &newline, TRUE);
  4512. CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
  4513. JUMPHERE(end);
  4514. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
  4515. set_jumps(newline, LABEL());
  4516. }
  4517. OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
  4518. }
  4519. else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
  4520. {
  4521. /* Check whether offset limit is set and valid. */
  4522. SLJIT_ASSERT(common->match_end_ptr != 0);
  4523. if (HAS_VIRTUAL_REGISTERS)
  4524. {
  4525. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  4526. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
  4527. }
  4528. else
  4529. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
  4530. OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
  4531. end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
  4532. if (HAS_VIRTUAL_REGISTERS)
  4533. OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
  4534. else
  4535. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
  4536. #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
  4537. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
  4538. #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
  4539. if (HAS_VIRTUAL_REGISTERS)
  4540. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
  4541. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
  4542. end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
  4543. OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
  4544. JUMPHERE(end2);
  4545. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
  4546. add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
  4547. JUMPHERE(end);
  4548. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
  4549. }
  4550. start = JUMP(SLJIT_JUMP);
  4551. if (newlinecheck)
  4552. {
  4553. newlinelabel = LABEL();
  4554. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4555. end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  4556. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  4557. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
  4558. OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
  4559. #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
  4560. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
  4561. #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
  4562. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  4563. end2 = JUMP(SLJIT_JUMP);
  4564. }
  4565. mainloop = LABEL();
  4566. /* Increasing the STR_PTR here requires one less jump in the most common case. */
  4567. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  4568. if (common->utf && !common->invalid_utf) readuchar = TRUE;
  4569. #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
  4570. if (newlinecheck) readuchar = TRUE;
  4571. if (readuchar)
  4572. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  4573. if (newlinecheck)
  4574. CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
  4575. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4576. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  4577. #if PCRE2_CODE_UNIT_WIDTH == 8
  4578. if (common->invalid_utf)
  4579. {
  4580. /* Skip continuation code units. */
  4581. loop = LABEL();
  4582. jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  4583. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  4584. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4585. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
  4586. CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
  4587. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4588. JUMPHERE(jump);
  4589. }
  4590. else if (common->utf)
  4591. {
  4592. jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
  4593. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  4594. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  4595. JUMPHERE(jump);
  4596. }
  4597. #elif PCRE2_CODE_UNIT_WIDTH == 16
  4598. if (common->invalid_utf)
  4599. {
  4600. /* Skip continuation code units. */
  4601. loop = LABEL();
  4602. jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  4603. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  4604. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4605. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
  4606. CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
  4607. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4608. JUMPHERE(jump);
  4609. }
  4610. else if (common->utf)
  4611. {
  4612. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  4613. if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
  4614. {
  4615. OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4616. OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
  4617. CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
  4618. }
  4619. else
  4620. {
  4621. OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
  4622. OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
  4623. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
  4624. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  4625. }
  4626. }
  4627. #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
  4628. #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
  4629. JUMPHERE(start);
  4630. if (newlinecheck)
  4631. {
  4632. JUMPHERE(end);
  4633. JUMPHERE(end2);
  4634. }
  4635. return mainloop;
  4636. }
  4637. static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
  4638. {
  4639. sljit_u32 i, count = chars->count;
  4640. if (count == 255)
  4641. return;
  4642. if (count == 0)
  4643. {
  4644. chars->count = 1;
  4645. chars->chars[0] = chr;
  4646. if (last)
  4647. chars->last_count = 1;
  4648. return;
  4649. }
  4650. for (i = 0; i < count; i++)
  4651. if (chars->chars[i] == chr)
  4652. return;
  4653. if (count >= MAX_DIFF_CHARS)
  4654. {
  4655. chars->count = 255;
  4656. return;
  4657. }
  4658. chars->chars[count] = chr;
  4659. chars->count = count + 1;
  4660. if (last)
  4661. chars->last_count++;
  4662. }
  4663. static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
  4664. {
  4665. /* Recursive function, which scans prefix literals. */
  4666. BOOL last, any, class, caseless;
  4667. int len, repeat, len_save, consumed = 0;
  4668. sljit_u32 chr; /* Any unicode character. */
  4669. sljit_u8 *bytes, *bytes_end, byte;
  4670. PCRE2_SPTR alternative, cc_save, oc;
  4671. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
  4672. PCRE2_UCHAR othercase[4];
  4673. #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
  4674. PCRE2_UCHAR othercase[2];
  4675. #else
  4676. PCRE2_UCHAR othercase[1];
  4677. #endif
  4678. repeat = 1;
  4679. while (TRUE)
  4680. {
  4681. if (*rec_count == 0)
  4682. return 0;
  4683. (*rec_count)--;
  4684. last = TRUE;
  4685. any = FALSE;
  4686. class = FALSE;
  4687. caseless = FALSE;
  4688. switch (*cc)
  4689. {
  4690. case OP_CHARI:
  4691. caseless = TRUE;
  4692. /* Fall through */
  4693. case OP_CHAR:
  4694. last = FALSE;
  4695. cc++;
  4696. break;
  4697. case OP_SOD:
  4698. case OP_SOM:
  4699. case OP_SET_SOM:
  4700. case OP_NOT_WORD_BOUNDARY:
  4701. case OP_WORD_BOUNDARY:
  4702. case OP_EODN:
  4703. case OP_EOD:
  4704. case OP_CIRC:
  4705. case OP_CIRCM:
  4706. case OP_DOLL:
  4707. case OP_DOLLM:
  4708. /* Zero width assertions. */
  4709. cc++;
  4710. continue;
  4711. case OP_ASSERT:
  4712. case OP_ASSERT_NOT:
  4713. case OP_ASSERTBACK:
  4714. case OP_ASSERTBACK_NOT:
  4715. case OP_ASSERT_NA:
  4716. case OP_ASSERTBACK_NA:
  4717. cc = bracketend(cc);
  4718. continue;
  4719. case OP_PLUSI:
  4720. case OP_MINPLUSI:
  4721. case OP_POSPLUSI:
  4722. caseless = TRUE;
  4723. /* Fall through */
  4724. case OP_PLUS:
  4725. case OP_MINPLUS:
  4726. case OP_POSPLUS:
  4727. cc++;
  4728. break;
  4729. case OP_EXACTI:
  4730. caseless = TRUE;
  4731. /* Fall through */
  4732. case OP_EXACT:
  4733. repeat = GET2(cc, 1);
  4734. last = FALSE;
  4735. cc += 1 + IMM2_SIZE;
  4736. break;
  4737. case OP_QUERYI:
  4738. case OP_MINQUERYI:
  4739. case OP_POSQUERYI:
  4740. caseless = TRUE;
  4741. /* Fall through */
  4742. case OP_QUERY:
  4743. case OP_MINQUERY:
  4744. case OP_POSQUERY:
  4745. len = 1;
  4746. cc++;
  4747. #ifdef SUPPORT_UNICODE
  4748. if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
  4749. #endif
  4750. max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
  4751. if (max_chars == 0)
  4752. return consumed;
  4753. last = FALSE;
  4754. break;
  4755. case OP_KET:
  4756. cc += 1 + LINK_SIZE;
  4757. continue;
  4758. case OP_ALT:
  4759. cc += GET(cc, 1);
  4760. continue;
  4761. case OP_ONCE:
  4762. case OP_BRA:
  4763. case OP_BRAPOS:
  4764. case OP_CBRA:
  4765. case OP_CBRAPOS:
  4766. alternative = cc + GET(cc, 1);
  4767. while (*alternative == OP_ALT)
  4768. {
  4769. max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
  4770. if (max_chars == 0)
  4771. return consumed;
  4772. alternative += GET(alternative, 1);
  4773. }
  4774. if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
  4775. cc += IMM2_SIZE;
  4776. cc += 1 + LINK_SIZE;
  4777. continue;
  4778. case OP_CLASS:
  4779. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
  4780. if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
  4781. return consumed;
  4782. #endif
  4783. class = TRUE;
  4784. break;
  4785. case OP_NCLASS:
  4786. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  4787. if (common->utf) return consumed;
  4788. #endif
  4789. class = TRUE;
  4790. break;
  4791. #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
  4792. case OP_XCLASS:
  4793. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  4794. if (common->utf) return consumed;
  4795. #endif
  4796. any = TRUE;
  4797. cc += GET(cc, 1);
  4798. break;
  4799. #endif
  4800. case OP_DIGIT:
  4801. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
  4802. if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
  4803. return consumed;
  4804. #endif
  4805. any = TRUE;
  4806. cc++;
  4807. break;
  4808. case OP_WHITESPACE:
  4809. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
  4810. if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
  4811. return consumed;
  4812. #endif
  4813. any = TRUE;
  4814. cc++;
  4815. break;
  4816. case OP_WORDCHAR:
  4817. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
  4818. if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
  4819. return consumed;
  4820. #endif
  4821. any = TRUE;
  4822. cc++;
  4823. break;
  4824. case OP_NOT:
  4825. case OP_NOTI:
  4826. cc++;
  4827. /* Fall through. */
  4828. case OP_NOT_DIGIT:
  4829. case OP_NOT_WHITESPACE:
  4830. case OP_NOT_WORDCHAR:
  4831. case OP_ANY:
  4832. case OP_ALLANY:
  4833. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  4834. if (common->utf) return consumed;
  4835. #endif
  4836. any = TRUE;
  4837. cc++;
  4838. break;
  4839. #ifdef SUPPORT_UNICODE
  4840. case OP_NOTPROP:
  4841. case OP_PROP:
  4842. #if PCRE2_CODE_UNIT_WIDTH != 32
  4843. if (common->utf) return consumed;
  4844. #endif
  4845. any = TRUE;
  4846. cc += 1 + 2;
  4847. break;
  4848. #endif
  4849. case OP_TYPEEXACT:
  4850. repeat = GET2(cc, 1);
  4851. cc += 1 + IMM2_SIZE;
  4852. continue;
  4853. case OP_NOTEXACT:
  4854. case OP_NOTEXACTI:
  4855. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  4856. if (common->utf) return consumed;
  4857. #endif
  4858. any = TRUE;
  4859. repeat = GET2(cc, 1);
  4860. cc += 1 + IMM2_SIZE + 1;
  4861. break;
  4862. default:
  4863. return consumed;
  4864. }
  4865. if (any)
  4866. {
  4867. do
  4868. {
  4869. chars->count = 255;
  4870. consumed++;
  4871. if (--max_chars == 0)
  4872. return consumed;
  4873. chars++;
  4874. }
  4875. while (--repeat > 0);
  4876. repeat = 1;
  4877. continue;
  4878. }
  4879. if (class)
  4880. {
  4881. bytes = (sljit_u8*) (cc + 1);
  4882. cc += 1 + 32 / sizeof(PCRE2_UCHAR);
  4883. switch (*cc)
  4884. {
  4885. case OP_CRSTAR:
  4886. case OP_CRMINSTAR:
  4887. case OP_CRPOSSTAR:
  4888. case OP_CRQUERY:
  4889. case OP_CRMINQUERY:
  4890. case OP_CRPOSQUERY:
  4891. max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
  4892. if (max_chars == 0)
  4893. return consumed;
  4894. break;
  4895. default:
  4896. case OP_CRPLUS:
  4897. case OP_CRMINPLUS:
  4898. case OP_CRPOSPLUS:
  4899. break;
  4900. case OP_CRRANGE:
  4901. case OP_CRMINRANGE:
  4902. case OP_CRPOSRANGE:
  4903. repeat = GET2(cc, 1);
  4904. if (repeat <= 0)
  4905. return consumed;
  4906. break;
  4907. }
  4908. do
  4909. {
  4910. if (bytes[31] & 0x80)
  4911. chars->count = 255;
  4912. else if (chars->count != 255)
  4913. {
  4914. bytes_end = bytes + 32;
  4915. chr = 0;
  4916. do
  4917. {
  4918. byte = *bytes++;
  4919. SLJIT_ASSERT((chr & 0x7) == 0);
  4920. if (byte == 0)
  4921. chr += 8;
  4922. else
  4923. {
  4924. do
  4925. {
  4926. if ((byte & 0x1) != 0)
  4927. add_prefix_char(chr, chars, TRUE);
  4928. byte >>= 1;
  4929. chr++;
  4930. }
  4931. while (byte != 0);
  4932. chr = (chr + 7) & ~7;
  4933. }
  4934. }
  4935. while (chars->count != 255 && bytes < bytes_end);
  4936. bytes = bytes_end - 32;
  4937. }
  4938. consumed++;
  4939. if (--max_chars == 0)
  4940. return consumed;
  4941. chars++;
  4942. }
  4943. while (--repeat > 0);
  4944. switch (*cc)
  4945. {
  4946. case OP_CRSTAR:
  4947. case OP_CRMINSTAR:
  4948. case OP_CRPOSSTAR:
  4949. return consumed;
  4950. case OP_CRQUERY:
  4951. case OP_CRMINQUERY:
  4952. case OP_CRPOSQUERY:
  4953. cc++;
  4954. break;
  4955. case OP_CRRANGE:
  4956. case OP_CRMINRANGE:
  4957. case OP_CRPOSRANGE:
  4958. if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
  4959. return consumed;
  4960. cc += 1 + 2 * IMM2_SIZE;
  4961. break;
  4962. }
  4963. repeat = 1;
  4964. continue;
  4965. }
  4966. len = 1;
  4967. #ifdef SUPPORT_UNICODE
  4968. if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
  4969. #endif
  4970. if (caseless && char_has_othercase(common, cc))
  4971. {
  4972. #ifdef SUPPORT_UNICODE
  4973. if (common->utf)
  4974. {
  4975. GETCHAR(chr, cc);
  4976. if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
  4977. return consumed;
  4978. }
  4979. else
  4980. #endif
  4981. {
  4982. chr = *cc;
  4983. #ifdef SUPPORT_UNICODE
  4984. if (common->ucp && chr > 127)
  4985. othercase[0] = UCD_OTHERCASE(chr);
  4986. else
  4987. #endif
  4988. othercase[0] = TABLE_GET(chr, common->fcc, chr);
  4989. }
  4990. }
  4991. else
  4992. {
  4993. caseless = FALSE;
  4994. othercase[0] = 0; /* Stops compiler warning - PH */
  4995. }
  4996. len_save = len;
  4997. cc_save = cc;
  4998. while (TRUE)
  4999. {
  5000. oc = othercase;
  5001. do
  5002. {
  5003. len--;
  5004. consumed++;
  5005. chr = *cc;
  5006. add_prefix_char(*cc, chars, len == 0);
  5007. if (caseless)
  5008. add_prefix_char(*oc, chars, len == 0);
  5009. if (--max_chars == 0)
  5010. return consumed;
  5011. chars++;
  5012. cc++;
  5013. oc++;
  5014. }
  5015. while (len > 0);
  5016. if (--repeat == 0)
  5017. break;
  5018. len = len_save;
  5019. cc = cc_save;
  5020. }
  5021. repeat = 1;
  5022. if (last)
  5023. return consumed;
  5024. }
  5025. }
  5026. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  5027. static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
  5028. {
  5029. #if PCRE2_CODE_UNIT_WIDTH == 8
  5030. OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
  5031. CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
  5032. #elif PCRE2_CODE_UNIT_WIDTH == 16
  5033. OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
  5034. CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
  5035. #else
  5036. #error "Unknown code width"
  5037. #endif
  5038. }
  5039. #endif
  5040. #include "pcre2_jit_simd_inc.h"
  5041. #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
  5042. static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
  5043. {
  5044. sljit_s32 i, j, max_i = 0, max_j = 0;
  5045. sljit_u32 max_pri = 0;
  5046. PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
  5047. for (i = max - 1; i >= 1; i--)
  5048. {
  5049. if (chars[i].last_count > 2)
  5050. {
  5051. a1 = chars[i].chars[0];
  5052. a2 = chars[i].chars[1];
  5053. a_pri = chars[i].last_count;
  5054. j = i - max_fast_forward_char_pair_offset();
  5055. if (j < 0)
  5056. j = 0;
  5057. while (j < i)
  5058. {
  5059. b_pri = chars[j].last_count;
  5060. if (b_pri > 2 && a_pri + b_pri >= max_pri)
  5061. {
  5062. b1 = chars[j].chars[0];
  5063. b2 = chars[j].chars[1];
  5064. if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
  5065. {
  5066. max_pri = a_pri + b_pri;
  5067. max_i = i;
  5068. max_j = j;
  5069. }
  5070. }
  5071. j++;
  5072. }
  5073. }
  5074. }
  5075. if (max_pri == 0)
  5076. return FALSE;
  5077. fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
  5078. return TRUE;
  5079. }
  5080. #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
  5081. static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
  5082. {
  5083. DEFINE_COMPILER;
  5084. struct sljit_label *start;
  5085. struct sljit_jump *match;
  5086. struct sljit_jump *partial_quit;
  5087. PCRE2_UCHAR mask;
  5088. BOOL has_match_end = (common->match_end_ptr != 0);
  5089. SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
  5090. if (has_match_end)
  5091. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
  5092. if (offset > 0)
  5093. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
  5094. if (has_match_end)
  5095. {
  5096. OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
  5097. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
  5098. OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
  5099. CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
  5100. }
  5101. #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
  5102. if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
  5103. {
  5104. fast_forward_char_simd(common, char1, char2, offset);
  5105. if (offset > 0)
  5106. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
  5107. if (has_match_end)
  5108. OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
  5109. return;
  5110. }
  5111. #endif
  5112. start = LABEL();
  5113. partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  5114. if (common->mode == PCRE2_JIT_COMPLETE)
  5115. add_jump(compiler, &common->failed_match, partial_quit);
  5116. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  5117. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5118. if (char1 == char2)
  5119. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
  5120. else
  5121. {
  5122. mask = char1 ^ char2;
  5123. if (is_powerof2(mask))
  5124. {
  5125. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
  5126. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
  5127. }
  5128. else
  5129. {
  5130. match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
  5131. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
  5132. JUMPHERE(match);
  5133. }
  5134. }
  5135. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  5136. if (common->utf && offset > 0)
  5137. {
  5138. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
  5139. jumpto_if_not_utf_char_start(compiler, TMP1, start);
  5140. }
  5141. #endif
  5142. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
  5143. if (common->mode != PCRE2_JIT_COMPLETE)
  5144. JUMPHERE(partial_quit);
  5145. if (has_match_end)
  5146. OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
  5147. }
  5148. static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
  5149. {
  5150. DEFINE_COMPILER;
  5151. struct sljit_label *start;
  5152. struct sljit_jump *match;
  5153. fast_forward_char_data chars[MAX_N_CHARS];
  5154. sljit_s32 offset;
  5155. PCRE2_UCHAR mask;
  5156. PCRE2_UCHAR *char_set, *char_set_end;
  5157. int i, max, from;
  5158. int range_right = -1, range_len;
  5159. sljit_u8 *update_table = NULL;
  5160. BOOL in_range;
  5161. sljit_u32 rec_count;
  5162. for (i = 0; i < MAX_N_CHARS; i++)
  5163. {
  5164. chars[i].count = 0;
  5165. chars[i].last_count = 0;
  5166. }
  5167. rec_count = 10000;
  5168. max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
  5169. if (max < 1)
  5170. return FALSE;
  5171. /* Convert last_count to priority. */
  5172. for (i = 0; i < max; i++)
  5173. {
  5174. SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
  5175. if (chars[i].count == 1)
  5176. {
  5177. chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
  5178. /* Simplifies algorithms later. */
  5179. chars[i].chars[1] = chars[i].chars[0];
  5180. }
  5181. else if (chars[i].count == 2)
  5182. {
  5183. SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
  5184. if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
  5185. chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
  5186. else
  5187. chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
  5188. }
  5189. else
  5190. chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
  5191. }
  5192. #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
  5193. if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
  5194. return TRUE;
  5195. #endif
  5196. in_range = FALSE;
  5197. /* Prevent compiler "uninitialized" warning */
  5198. from = 0;
  5199. range_len = 4 /* minimum length */ - 1;
  5200. for (i = 0; i <= max; i++)
  5201. {
  5202. if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
  5203. {
  5204. range_len = i - from;
  5205. range_right = i - 1;
  5206. }
  5207. if (i < max && chars[i].count < 255)
  5208. {
  5209. SLJIT_ASSERT(chars[i].count > 0);
  5210. if (!in_range)
  5211. {
  5212. in_range = TRUE;
  5213. from = i;
  5214. }
  5215. }
  5216. else
  5217. in_range = FALSE;
  5218. }
  5219. if (range_right >= 0)
  5220. {
  5221. update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
  5222. if (update_table == NULL)
  5223. return TRUE;
  5224. memset(update_table, IN_UCHARS(range_len), 256);
  5225. for (i = 0; i < range_len; i++)
  5226. {
  5227. SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
  5228. char_set = chars[range_right - i].chars;
  5229. char_set_end = char_set + chars[range_right - i].count;
  5230. do
  5231. {
  5232. if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
  5233. update_table[(*char_set) & 0xff] = IN_UCHARS(i);
  5234. char_set++;
  5235. }
  5236. while (char_set < char_set_end);
  5237. }
  5238. }
  5239. offset = -1;
  5240. /* Scan forward. */
  5241. for (i = 0; i < max; i++)
  5242. {
  5243. if (range_right == i)
  5244. continue;
  5245. if (offset == -1)
  5246. {
  5247. if (chars[i].last_count >= 2)
  5248. offset = i;
  5249. }
  5250. else if (chars[offset].last_count < chars[i].last_count)
  5251. offset = i;
  5252. }
  5253. SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
  5254. if (range_right < 0)
  5255. {
  5256. if (offset < 0)
  5257. return FALSE;
  5258. /* Works regardless the value is 1 or 2. */
  5259. fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
  5260. return TRUE;
  5261. }
  5262. SLJIT_ASSERT(range_right != offset);
  5263. if (common->match_end_ptr != 0)
  5264. {
  5265. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
  5266. OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
  5267. OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
  5268. add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
  5269. OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
  5270. CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
  5271. }
  5272. else
  5273. {
  5274. OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
  5275. add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
  5276. }
  5277. SLJIT_ASSERT(range_right >= 0);
  5278. if (!HAS_VIRTUAL_REGISTERS)
  5279. OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
  5280. start = LABEL();
  5281. add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
  5282. #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
  5283. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
  5284. #else
  5285. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
  5286. #endif
  5287. if (!HAS_VIRTUAL_REGISTERS)
  5288. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
  5289. else
  5290. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
  5291. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  5292. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
  5293. if (offset >= 0)
  5294. {
  5295. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
  5296. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5297. if (chars[offset].count == 1)
  5298. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
  5299. else
  5300. {
  5301. mask = chars[offset].chars[0] ^ chars[offset].chars[1];
  5302. if (is_powerof2(mask))
  5303. {
  5304. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
  5305. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
  5306. }
  5307. else
  5308. {
  5309. match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
  5310. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
  5311. JUMPHERE(match);
  5312. }
  5313. }
  5314. }
  5315. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  5316. if (common->utf && offset != 0)
  5317. {
  5318. if (offset < 0)
  5319. {
  5320. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  5321. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5322. }
  5323. else
  5324. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
  5325. jumpto_if_not_utf_char_start(compiler, TMP1, start);
  5326. if (offset < 0)
  5327. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5328. }
  5329. #endif
  5330. if (offset >= 0)
  5331. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5332. if (common->match_end_ptr != 0)
  5333. OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
  5334. else
  5335. OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
  5336. return TRUE;
  5337. }
  5338. static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
  5339. {
  5340. PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
  5341. PCRE2_UCHAR oc;
  5342. oc = first_char;
  5343. if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
  5344. {
  5345. oc = TABLE_GET(first_char, common->fcc, first_char);
  5346. #if defined SUPPORT_UNICODE
  5347. if (first_char > 127 && (common->utf || common->ucp))
  5348. oc = UCD_OTHERCASE(first_char);
  5349. #endif
  5350. }
  5351. fast_forward_first_char2(common, first_char, oc, 0);
  5352. }
  5353. static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
  5354. {
  5355. DEFINE_COMPILER;
  5356. struct sljit_label *loop;
  5357. struct sljit_jump *lastchar = NULL;
  5358. struct sljit_jump *firstchar;
  5359. struct sljit_jump *quit = NULL;
  5360. struct sljit_jump *foundcr = NULL;
  5361. struct sljit_jump *notfoundnl;
  5362. jump_list *newline = NULL;
  5363. if (common->match_end_ptr != 0)
  5364. {
  5365. OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
  5366. OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
  5367. }
  5368. if (common->nltype == NLTYPE_FIXED && common->newline > 255)
  5369. {
  5370. #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
  5371. if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
  5372. {
  5373. if (HAS_VIRTUAL_REGISTERS)
  5374. {
  5375. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  5376. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
  5377. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
  5378. }
  5379. else
  5380. {
  5381. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
  5382. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
  5383. }
  5384. firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
  5385. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5386. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
  5387. OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
  5388. #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
  5389. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
  5390. #endif
  5391. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  5392. fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
  5393. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  5394. }
  5395. else
  5396. #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
  5397. {
  5398. lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  5399. if (HAS_VIRTUAL_REGISTERS)
  5400. {
  5401. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  5402. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
  5403. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
  5404. }
  5405. else
  5406. {
  5407. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
  5408. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
  5409. }
  5410. firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
  5411. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
  5412. OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
  5413. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
  5414. #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
  5415. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
  5416. #endif
  5417. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  5418. loop = LABEL();
  5419. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5420. quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  5421. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
  5422. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
  5423. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
  5424. CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
  5425. JUMPHERE(quit);
  5426. JUMPHERE(lastchar);
  5427. }
  5428. JUMPHERE(firstchar);
  5429. if (common->match_end_ptr != 0)
  5430. OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
  5431. return;
  5432. }
  5433. if (HAS_VIRTUAL_REGISTERS)
  5434. {
  5435. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  5436. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
  5437. }
  5438. else
  5439. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
  5440. /* Example: match /^/ to \r\n from offset 1. */
  5441. firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
  5442. if (common->nltype == NLTYPE_ANY)
  5443. move_back(common, NULL, FALSE);
  5444. else
  5445. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5446. loop = LABEL();
  5447. common->ff_newline_shortcut = loop;
  5448. #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
  5449. if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
  5450. {
  5451. if (common->nltype == NLTYPE_ANYCRLF)
  5452. {
  5453. fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
  5454. if (common->mode != PCRE2_JIT_COMPLETE)
  5455. lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  5456. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  5457. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5458. quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
  5459. }
  5460. else
  5461. {
  5462. fast_forward_char_simd(common, common->newline, common->newline, 0);
  5463. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5464. if (common->mode != PCRE2_JIT_COMPLETE)
  5465. {
  5466. OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
  5467. CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
  5468. }
  5469. }
  5470. }
  5471. else
  5472. #endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
  5473. {
  5474. read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
  5475. lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  5476. if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
  5477. foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
  5478. check_newlinechar(common, common->nltype, &newline, FALSE);
  5479. set_jumps(newline, loop);
  5480. }
  5481. if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
  5482. {
  5483. if (quit == NULL)
  5484. {
  5485. quit = JUMP(SLJIT_JUMP);
  5486. JUMPHERE(foundcr);
  5487. }
  5488. notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  5489. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  5490. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
  5491. OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
  5492. #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
  5493. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
  5494. #endif
  5495. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  5496. JUMPHERE(notfoundnl);
  5497. JUMPHERE(quit);
  5498. }
  5499. if (lastchar)
  5500. JUMPHERE(lastchar);
  5501. JUMPHERE(firstchar);
  5502. if (common->match_end_ptr != 0)
  5503. OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
  5504. }
  5505. static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
  5506. static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
  5507. {
  5508. DEFINE_COMPILER;
  5509. const sljit_u8 *start_bits = common->re->start_bitmap;
  5510. struct sljit_label *start;
  5511. struct sljit_jump *partial_quit;
  5512. #if PCRE2_CODE_UNIT_WIDTH != 8
  5513. struct sljit_jump *found = NULL;
  5514. #endif
  5515. jump_list *matches = NULL;
  5516. if (common->match_end_ptr != 0)
  5517. {
  5518. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
  5519. OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
  5520. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
  5521. OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
  5522. CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
  5523. }
  5524. start = LABEL();
  5525. partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  5526. if (common->mode == PCRE2_JIT_COMPLETE)
  5527. add_jump(compiler, &common->failed_match, partial_quit);
  5528. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  5529. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5530. if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
  5531. {
  5532. #if PCRE2_CODE_UNIT_WIDTH != 8
  5533. if ((start_bits[31] & 0x80) != 0)
  5534. found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
  5535. else
  5536. CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
  5537. #elif defined SUPPORT_UNICODE
  5538. if (common->utf && is_char7_bitset(start_bits, FALSE))
  5539. CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
  5540. #endif
  5541. OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
  5542. OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
  5543. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
  5544. if (!HAS_VIRTUAL_REGISTERS)
  5545. {
  5546. OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
  5547. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0);
  5548. }
  5549. else
  5550. {
  5551. OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
  5552. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
  5553. }
  5554. JUMPTO(SLJIT_ZERO, start);
  5555. }
  5556. else
  5557. set_jumps(matches, start);
  5558. #if PCRE2_CODE_UNIT_WIDTH != 8
  5559. if (found != NULL)
  5560. JUMPHERE(found);
  5561. #endif
  5562. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5563. if (common->mode != PCRE2_JIT_COMPLETE)
  5564. JUMPHERE(partial_quit);
  5565. if (common->match_end_ptr != 0)
  5566. OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
  5567. }
  5568. static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
  5569. {
  5570. DEFINE_COMPILER;
  5571. struct sljit_label *loop;
  5572. struct sljit_jump *toolong;
  5573. struct sljit_jump *already_found;
  5574. struct sljit_jump *found;
  5575. struct sljit_jump *found_oc = NULL;
  5576. jump_list *not_found = NULL;
  5577. sljit_u32 oc, bit;
  5578. SLJIT_ASSERT(common->req_char_ptr != 0);
  5579. OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
  5580. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
  5581. toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
  5582. already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
  5583. if (has_firstchar)
  5584. OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5585. else
  5586. OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
  5587. oc = req_char;
  5588. if (caseless)
  5589. {
  5590. oc = TABLE_GET(req_char, common->fcc, req_char);
  5591. #if defined SUPPORT_UNICODE
  5592. if (req_char > 127 && (common->utf || common->ucp))
  5593. oc = UCD_OTHERCASE(req_char);
  5594. #endif
  5595. }
  5596. #ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
  5597. if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
  5598. {
  5599. not_found = fast_requested_char_simd(common, req_char, oc);
  5600. }
  5601. else
  5602. #endif
  5603. {
  5604. loop = LABEL();
  5605. add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
  5606. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
  5607. if (req_char == oc)
  5608. found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
  5609. else
  5610. {
  5611. bit = req_char ^ oc;
  5612. if (is_powerof2(bit))
  5613. {
  5614. OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
  5615. found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
  5616. }
  5617. else
  5618. {
  5619. found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
  5620. found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
  5621. }
  5622. }
  5623. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
  5624. JUMPTO(SLJIT_JUMP, loop);
  5625. JUMPHERE(found);
  5626. if (found_oc)
  5627. JUMPHERE(found_oc);
  5628. }
  5629. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
  5630. JUMPHERE(already_found);
  5631. JUMPHERE(toolong);
  5632. return not_found;
  5633. }
  5634. static void do_revertframes(compiler_common *common)
  5635. {
  5636. DEFINE_COMPILER;
  5637. struct sljit_jump *jump;
  5638. struct sljit_label *mainloop;
  5639. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  5640. GET_LOCAL_BASE(TMP1, 0, 0);
  5641. /* Drop frames until we reach STACK_TOP. */
  5642. mainloop = LABEL();
  5643. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
  5644. jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
  5645. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
  5646. if (HAS_VIRTUAL_REGISTERS)
  5647. {
  5648. OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
  5649. OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
  5650. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
  5651. }
  5652. else
  5653. {
  5654. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
  5655. OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
  5656. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
  5657. OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
  5658. GET_LOCAL_BASE(TMP1, 0, 0);
  5659. OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
  5660. }
  5661. JUMPTO(SLJIT_JUMP, mainloop);
  5662. JUMPHERE(jump);
  5663. jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
  5664. /* End of reverting values. */
  5665. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  5666. JUMPHERE(jump);
  5667. OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
  5668. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
  5669. if (HAS_VIRTUAL_REGISTERS)
  5670. {
  5671. OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
  5672. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
  5673. }
  5674. else
  5675. {
  5676. OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
  5677. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
  5678. OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
  5679. }
  5680. JUMPTO(SLJIT_JUMP, mainloop);
  5681. }
  5682. static void check_wordboundary(compiler_common *common)
  5683. {
  5684. DEFINE_COMPILER;
  5685. struct sljit_jump *skipread;
  5686. jump_list *skipread_list = NULL;
  5687. #ifdef SUPPORT_UNICODE
  5688. struct sljit_label *valid_utf;
  5689. jump_list *invalid_utf1 = NULL;
  5690. #endif /* SUPPORT_UNICODE */
  5691. jump_list *invalid_utf2 = NULL;
  5692. #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
  5693. struct sljit_jump *jump;
  5694. #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
  5695. SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
  5696. sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  5697. /* Get type of the previous char, and put it to TMP3. */
  5698. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  5699. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
  5700. OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
  5701. skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
  5702. #ifdef SUPPORT_UNICODE
  5703. if (common->invalid_utf)
  5704. {
  5705. peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
  5706. if (common->mode != PCRE2_JIT_COMPLETE)
  5707. {
  5708. OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
  5709. OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
  5710. move_back(common, NULL, TRUE);
  5711. check_start_used_ptr(common);
  5712. OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
  5713. OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
  5714. }
  5715. }
  5716. else
  5717. #endif /* SUPPORT_UNICODE */
  5718. {
  5719. if (common->mode == PCRE2_JIT_COMPLETE)
  5720. peek_char_back(common, READ_CHAR_MAX, NULL);
  5721. else
  5722. {
  5723. move_back(common, NULL, TRUE);
  5724. check_start_used_ptr(common);
  5725. read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
  5726. }
  5727. }
  5728. /* Testing char type. */
  5729. #ifdef SUPPORT_UNICODE
  5730. if (common->ucp)
  5731. {
  5732. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
  5733. jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
  5734. add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
  5735. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
  5736. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
  5737. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
  5738. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
  5739. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
  5740. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
  5741. JUMPHERE(jump);
  5742. OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
  5743. }
  5744. else
  5745. #endif /* SUPPORT_UNICODE */
  5746. {
  5747. #if PCRE2_CODE_UNIT_WIDTH != 8
  5748. jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
  5749. #elif defined SUPPORT_UNICODE
  5750. /* Here TMP3 has already been zeroed. */
  5751. jump = NULL;
  5752. if (common->utf)
  5753. jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
  5754. #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
  5755. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
  5756. OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
  5757. OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
  5758. #if PCRE2_CODE_UNIT_WIDTH != 8
  5759. JUMPHERE(jump);
  5760. #elif defined SUPPORT_UNICODE
  5761. if (jump != NULL)
  5762. JUMPHERE(jump);
  5763. #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
  5764. }
  5765. JUMPHERE(skipread);
  5766. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
  5767. check_str_end(common, &skipread_list);
  5768. peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
  5769. /* Testing char type. This is a code duplication. */
  5770. #ifdef SUPPORT_UNICODE
  5771. valid_utf = LABEL();
  5772. if (common->ucp)
  5773. {
  5774. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
  5775. jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
  5776. add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
  5777. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
  5778. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
  5779. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
  5780. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
  5781. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
  5782. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
  5783. JUMPHERE(jump);
  5784. }
  5785. else
  5786. #endif /* SUPPORT_UNICODE */
  5787. {
  5788. #if PCRE2_CODE_UNIT_WIDTH != 8
  5789. /* TMP2 may be destroyed by peek_char. */
  5790. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
  5791. jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
  5792. #elif defined SUPPORT_UNICODE
  5793. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
  5794. jump = NULL;
  5795. if (common->utf)
  5796. jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
  5797. #endif
  5798. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
  5799. OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
  5800. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
  5801. #if PCRE2_CODE_UNIT_WIDTH != 8
  5802. JUMPHERE(jump);
  5803. #elif defined SUPPORT_UNICODE
  5804. if (jump != NULL)
  5805. JUMPHERE(jump);
  5806. #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
  5807. }
  5808. set_jumps(skipread_list, LABEL());
  5809. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  5810. OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
  5811. OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
  5812. #ifdef SUPPORT_UNICODE
  5813. if (common->invalid_utf)
  5814. {
  5815. set_jumps(invalid_utf1, LABEL());
  5816. peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
  5817. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
  5818. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  5819. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
  5820. OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
  5821. set_jumps(invalid_utf2, LABEL());
  5822. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  5823. OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
  5824. OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
  5825. }
  5826. #endif /* SUPPORT_UNICODE */
  5827. }
  5828. static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
  5829. {
  5830. /* May destroy TMP1. */
  5831. DEFINE_COMPILER;
  5832. int ranges[MAX_CLASS_RANGE_SIZE];
  5833. sljit_u8 bit, cbit, all;
  5834. int i, byte, length = 0;
  5835. bit = bits[0] & 0x1;
  5836. /* All bits will be zero or one (since bit is zero or one). */
  5837. all = -bit;
  5838. for (i = 0; i < 256; )
  5839. {
  5840. byte = i >> 3;
  5841. if ((i & 0x7) == 0 && bits[byte] == all)
  5842. i += 8;
  5843. else
  5844. {
  5845. cbit = (bits[byte] >> (i & 0x7)) & 0x1;
  5846. if (cbit != bit)
  5847. {
  5848. if (length >= MAX_CLASS_RANGE_SIZE)
  5849. return FALSE;
  5850. ranges[length] = i;
  5851. length++;
  5852. bit = cbit;
  5853. all = -cbit;
  5854. }
  5855. i++;
  5856. }
  5857. }
  5858. if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
  5859. {
  5860. if (length >= MAX_CLASS_RANGE_SIZE)
  5861. return FALSE;
  5862. ranges[length] = 256;
  5863. length++;
  5864. }
  5865. if (length < 0 || length > 4)
  5866. return FALSE;
  5867. bit = bits[0] & 0x1;
  5868. if (invert) bit ^= 0x1;
  5869. /* No character is accepted. */
  5870. if (length == 0 && bit == 0)
  5871. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  5872. switch(length)
  5873. {
  5874. case 0:
  5875. /* When bit != 0, all characters are accepted. */
  5876. return TRUE;
  5877. case 1:
  5878. add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
  5879. return TRUE;
  5880. case 2:
  5881. if (ranges[0] + 1 != ranges[1])
  5882. {
  5883. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
  5884. add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
  5885. }
  5886. else
  5887. add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
  5888. return TRUE;
  5889. case 3:
  5890. if (bit != 0)
  5891. {
  5892. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
  5893. if (ranges[0] + 1 != ranges[1])
  5894. {
  5895. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
  5896. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
  5897. }
  5898. else
  5899. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
  5900. return TRUE;
  5901. }
  5902. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
  5903. if (ranges[1] + 1 != ranges[2])
  5904. {
  5905. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
  5906. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
  5907. }
  5908. else
  5909. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
  5910. return TRUE;
  5911. case 4:
  5912. if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
  5913. && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
  5914. && (ranges[1] & (ranges[2] - ranges[0])) == 0
  5915. && is_powerof2(ranges[2] - ranges[0]))
  5916. {
  5917. SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
  5918. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
  5919. if (ranges[2] + 1 != ranges[3])
  5920. {
  5921. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
  5922. add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
  5923. }
  5924. else
  5925. add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
  5926. return TRUE;
  5927. }
  5928. if (bit != 0)
  5929. {
  5930. i = 0;
  5931. if (ranges[0] + 1 != ranges[1])
  5932. {
  5933. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
  5934. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
  5935. i = ranges[0];
  5936. }
  5937. else
  5938. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
  5939. if (ranges[2] + 1 != ranges[3])
  5940. {
  5941. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
  5942. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
  5943. }
  5944. else
  5945. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
  5946. return TRUE;
  5947. }
  5948. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
  5949. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
  5950. if (ranges[1] + 1 != ranges[2])
  5951. {
  5952. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
  5953. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
  5954. }
  5955. else
  5956. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
  5957. return TRUE;
  5958. default:
  5959. SLJIT_UNREACHABLE();
  5960. return FALSE;
  5961. }
  5962. }
  5963. static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
  5964. {
  5965. /* May destroy TMP1. */
  5966. DEFINE_COMPILER;
  5967. uint16_t char_list[MAX_CLASS_CHARS_SIZE];
  5968. uint8_t byte;
  5969. sljit_s32 type;
  5970. int i, j, k, len, c;
  5971. if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
  5972. return FALSE;
  5973. len = 0;
  5974. for (i = 0; i < 32; i++)
  5975. {
  5976. byte = bits[i];
  5977. if (nclass)
  5978. byte = ~byte;
  5979. j = 0;
  5980. while (byte != 0)
  5981. {
  5982. if (byte & 0x1)
  5983. {
  5984. c = i * 8 + j;
  5985. k = len;
  5986. if ((c & 0x20) != 0)
  5987. {
  5988. for (k = 0; k < len; k++)
  5989. if (char_list[k] == c - 0x20)
  5990. {
  5991. char_list[k] |= 0x120;
  5992. break;
  5993. }
  5994. }
  5995. if (k == len)
  5996. {
  5997. if (len >= MAX_CLASS_CHARS_SIZE)
  5998. return FALSE;
  5999. char_list[len++] = (uint16_t) c;
  6000. }
  6001. }
  6002. byte >>= 1;
  6003. j++;
  6004. }
  6005. }
  6006. if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
  6007. i = 0;
  6008. j = 0;
  6009. if (char_list[0] == 0)
  6010. {
  6011. i++;
  6012. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0);
  6013. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
  6014. }
  6015. else
  6016. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
  6017. while (i < len)
  6018. {
  6019. if ((char_list[i] & 0x100) != 0)
  6020. j++;
  6021. else
  6022. {
  6023. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]);
  6024. CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
  6025. }
  6026. i++;
  6027. }
  6028. if (j != 0)
  6029. {
  6030. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
  6031. for (i = 0; i < len; i++)
  6032. if ((char_list[i] & 0x100) != 0)
  6033. {
  6034. j--;
  6035. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
  6036. CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
  6037. }
  6038. }
  6039. if (invert)
  6040. nclass = !nclass;
  6041. type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
  6042. add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
  6043. return TRUE;
  6044. }
  6045. static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
  6046. {
  6047. /* May destroy TMP1. */
  6048. if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
  6049. return TRUE;
  6050. return optimize_class_chars(common, bits, nclass, invert, backtracks);
  6051. }
  6052. static void check_anynewline(compiler_common *common)
  6053. {
  6054. /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
  6055. DEFINE_COMPILER;
  6056. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  6057. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
  6058. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
  6059. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
  6060. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
  6061. #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
  6062. #if PCRE2_CODE_UNIT_WIDTH == 8
  6063. if (common->utf)
  6064. {
  6065. #endif
  6066. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  6067. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
  6068. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
  6069. #if PCRE2_CODE_UNIT_WIDTH == 8
  6070. }
  6071. #endif
  6072. #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
  6073. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
  6074. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  6075. }
  6076. static void check_hspace(compiler_common *common)
  6077. {
  6078. /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
  6079. DEFINE_COMPILER;
  6080. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  6081. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
  6082. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
  6083. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
  6084. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  6085. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
  6086. #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
  6087. #if PCRE2_CODE_UNIT_WIDTH == 8
  6088. if (common->utf)
  6089. {
  6090. #endif
  6091. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  6092. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
  6093. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  6094. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
  6095. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  6096. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
  6097. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
  6098. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
  6099. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
  6100. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  6101. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
  6102. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  6103. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
  6104. #if PCRE2_CODE_UNIT_WIDTH == 8
  6105. }
  6106. #endif
  6107. #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
  6108. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
  6109. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  6110. }
  6111. static void check_vspace(compiler_common *common)
  6112. {
  6113. /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
  6114. DEFINE_COMPILER;
  6115. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  6116. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
  6117. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
  6118. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
  6119. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
  6120. #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
  6121. #if PCRE2_CODE_UNIT_WIDTH == 8
  6122. if (common->utf)
  6123. {
  6124. #endif
  6125. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  6126. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
  6127. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
  6128. #if PCRE2_CODE_UNIT_WIDTH == 8
  6129. }
  6130. #endif
  6131. #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
  6132. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
  6133. OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
  6134. }
  6135. static void do_casefulcmp(compiler_common *common)
  6136. {
  6137. DEFINE_COMPILER;
  6138. struct sljit_jump *jump;
  6139. struct sljit_label *label;
  6140. int char1_reg;
  6141. int char2_reg;
  6142. if (HAS_VIRTUAL_REGISTERS)
  6143. {
  6144. char1_reg = STR_END;
  6145. char2_reg = STACK_TOP;
  6146. }
  6147. else
  6148. {
  6149. char1_reg = TMP3;
  6150. char2_reg = RETURN_ADDR;
  6151. }
  6152. sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  6153. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  6154. if (char1_reg == STR_END)
  6155. {
  6156. OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
  6157. OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
  6158. }
  6159. if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
  6160. {
  6161. label = LABEL();
  6162. sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
  6163. sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  6164. jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
  6165. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
  6166. JUMPTO(SLJIT_NOT_ZERO, label);
  6167. JUMPHERE(jump);
  6168. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  6169. }
  6170. else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
  6171. {
  6172. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
  6173. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  6174. label = LABEL();
  6175. sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
  6176. sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  6177. jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
  6178. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
  6179. JUMPTO(SLJIT_NOT_ZERO, label);
  6180. JUMPHERE(jump);
  6181. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  6182. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  6183. }
  6184. else
  6185. {
  6186. label = LABEL();
  6187. OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
  6188. OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
  6189. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
  6190. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  6191. jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
  6192. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
  6193. JUMPTO(SLJIT_NOT_ZERO, label);
  6194. JUMPHERE(jump);
  6195. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  6196. }
  6197. if (char1_reg == STR_END)
  6198. {
  6199. OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
  6200. OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
  6201. }
  6202. OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
  6203. }
  6204. static void do_caselesscmp(compiler_common *common)
  6205. {
  6206. DEFINE_COMPILER;
  6207. struct sljit_jump *jump;
  6208. struct sljit_label *label;
  6209. int char1_reg = STR_END;
  6210. int char2_reg;
  6211. int lcc_table;
  6212. int opt_type = 0;
  6213. if (HAS_VIRTUAL_REGISTERS)
  6214. {
  6215. char2_reg = STACK_TOP;
  6216. lcc_table = STACK_LIMIT;
  6217. }
  6218. else
  6219. {
  6220. char2_reg = RETURN_ADDR;
  6221. lcc_table = TMP3;
  6222. }
  6223. if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
  6224. opt_type = 1;
  6225. else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
  6226. opt_type = 2;
  6227. sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  6228. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  6229. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
  6230. if (char2_reg == STACK_TOP)
  6231. {
  6232. OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
  6233. OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
  6234. }
  6235. OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
  6236. if (opt_type == 1)
  6237. {
  6238. label = LABEL();
  6239. sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
  6240. sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  6241. }
  6242. else if (opt_type == 2)
  6243. {
  6244. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
  6245. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  6246. label = LABEL();
  6247. sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
  6248. sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  6249. }
  6250. else
  6251. {
  6252. label = LABEL();
  6253. OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
  6254. OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
  6255. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
  6256. }
  6257. #if PCRE2_CODE_UNIT_WIDTH != 8
  6258. jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
  6259. #endif
  6260. OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
  6261. #if PCRE2_CODE_UNIT_WIDTH != 8
  6262. JUMPHERE(jump);
  6263. jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
  6264. #endif
  6265. OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
  6266. #if PCRE2_CODE_UNIT_WIDTH != 8
  6267. JUMPHERE(jump);
  6268. #endif
  6269. if (opt_type == 0)
  6270. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  6271. jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
  6272. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
  6273. JUMPTO(SLJIT_NOT_ZERO, label);
  6274. JUMPHERE(jump);
  6275. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  6276. if (opt_type == 2)
  6277. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  6278. if (char2_reg == STACK_TOP)
  6279. {
  6280. OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
  6281. OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
  6282. }
  6283. OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
  6284. OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
  6285. }
  6286. static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
  6287. compare_context *context, jump_list **backtracks)
  6288. {
  6289. DEFINE_COMPILER;
  6290. unsigned int othercasebit = 0;
  6291. PCRE2_SPTR othercasechar = NULL;
  6292. #ifdef SUPPORT_UNICODE
  6293. int utflength;
  6294. #endif
  6295. if (caseless && char_has_othercase(common, cc))
  6296. {
  6297. othercasebit = char_get_othercase_bit(common, cc);
  6298. SLJIT_ASSERT(othercasebit);
  6299. /* Extracting bit difference info. */
  6300. #if PCRE2_CODE_UNIT_WIDTH == 8
  6301. othercasechar = cc + (othercasebit >> 8);
  6302. othercasebit &= 0xff;
  6303. #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
  6304. /* Note that this code only handles characters in the BMP. If there
  6305. ever are characters outside the BMP whose othercase differs in only one
  6306. bit from itself (there currently are none), this code will need to be
  6307. revised for PCRE2_CODE_UNIT_WIDTH == 32. */
  6308. othercasechar = cc + (othercasebit >> 9);
  6309. if ((othercasebit & 0x100) != 0)
  6310. othercasebit = (othercasebit & 0xff) << 8;
  6311. else
  6312. othercasebit &= 0xff;
  6313. #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
  6314. }
  6315. if (context->sourcereg == -1)
  6316. {
  6317. #if PCRE2_CODE_UNIT_WIDTH == 8
  6318. #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
  6319. if (context->length >= 4)
  6320. OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
  6321. else if (context->length >= 2)
  6322. OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
  6323. else
  6324. #endif
  6325. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
  6326. #elif PCRE2_CODE_UNIT_WIDTH == 16
  6327. #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
  6328. if (context->length >= 4)
  6329. OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
  6330. else
  6331. #endif
  6332. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
  6333. #elif PCRE2_CODE_UNIT_WIDTH == 32
  6334. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
  6335. #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
  6336. context->sourcereg = TMP2;
  6337. }
  6338. #ifdef SUPPORT_UNICODE
  6339. utflength = 1;
  6340. if (common->utf && HAS_EXTRALEN(*cc))
  6341. utflength += GET_EXTRALEN(*cc);
  6342. do
  6343. {
  6344. #endif
  6345. context->length -= IN_UCHARS(1);
  6346. #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
  6347. /* Unaligned read is supported. */
  6348. if (othercasebit != 0 && othercasechar == cc)
  6349. {
  6350. context->c.asuchars[context->ucharptr] = *cc | othercasebit;
  6351. context->oc.asuchars[context->ucharptr] = othercasebit;
  6352. }
  6353. else
  6354. {
  6355. context->c.asuchars[context->ucharptr] = *cc;
  6356. context->oc.asuchars[context->ucharptr] = 0;
  6357. }
  6358. context->ucharptr++;
  6359. #if PCRE2_CODE_UNIT_WIDTH == 8
  6360. if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
  6361. #else
  6362. if (context->ucharptr >= 2 || context->length == 0)
  6363. #endif
  6364. {
  6365. if (context->length >= 4)
  6366. OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
  6367. else if (context->length >= 2)
  6368. OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
  6369. #if PCRE2_CODE_UNIT_WIDTH == 8
  6370. else if (context->length >= 1)
  6371. OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
  6372. #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
  6373. context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
  6374. switch(context->ucharptr)
  6375. {
  6376. case 4 / sizeof(PCRE2_UCHAR):
  6377. if (context->oc.asint != 0)
  6378. OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
  6379. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
  6380. break;
  6381. case 2 / sizeof(PCRE2_UCHAR):
  6382. if (context->oc.asushort != 0)
  6383. OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
  6384. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
  6385. break;
  6386. #if PCRE2_CODE_UNIT_WIDTH == 8
  6387. case 1:
  6388. if (context->oc.asbyte != 0)
  6389. OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
  6390. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
  6391. break;
  6392. #endif
  6393. default:
  6394. SLJIT_UNREACHABLE();
  6395. break;
  6396. }
  6397. context->ucharptr = 0;
  6398. }
  6399. #else
  6400. /* Unaligned read is unsupported or in 32 bit mode. */
  6401. if (context->length >= 1)
  6402. OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
  6403. context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
  6404. if (othercasebit != 0 && othercasechar == cc)
  6405. {
  6406. OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
  6407. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
  6408. }
  6409. else
  6410. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
  6411. #endif
  6412. cc++;
  6413. #ifdef SUPPORT_UNICODE
  6414. utflength--;
  6415. }
  6416. while (utflength > 0);
  6417. #endif
  6418. return cc;
  6419. }
  6420. #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
  6421. #define SET_TYPE_OFFSET(value) \
  6422. if ((value) != typeoffset) \
  6423. { \
  6424. if ((value) < typeoffset) \
  6425. OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
  6426. else \
  6427. OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
  6428. } \
  6429. typeoffset = (value);
  6430. #define SET_CHAR_OFFSET(value) \
  6431. if ((value) != charoffset) \
  6432. { \
  6433. if ((value) < charoffset) \
  6434. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
  6435. else \
  6436. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
  6437. } \
  6438. charoffset = (value);
  6439. static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
  6440. static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
  6441. {
  6442. DEFINE_COMPILER;
  6443. jump_list *found = NULL;
  6444. jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
  6445. sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
  6446. struct sljit_jump *jump = NULL;
  6447. PCRE2_SPTR ccbegin;
  6448. int compares, invertcmp, numberofcmps;
  6449. #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
  6450. BOOL utf = common->utf;
  6451. #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
  6452. #ifdef SUPPORT_UNICODE
  6453. BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
  6454. BOOL charsaved = FALSE;
  6455. int typereg = TMP1;
  6456. const sljit_u32 *other_cases;
  6457. sljit_uw typeoffset;
  6458. #endif /* SUPPORT_UNICODE */
  6459. /* Scanning the necessary info. */
  6460. cc++;
  6461. ccbegin = cc;
  6462. compares = 0;
  6463. if (cc[-1] & XCL_MAP)
  6464. {
  6465. min = 0;
  6466. cc += 32 / sizeof(PCRE2_UCHAR);
  6467. }
  6468. while (*cc != XCL_END)
  6469. {
  6470. compares++;
  6471. if (*cc == XCL_SINGLE)
  6472. {
  6473. cc ++;
  6474. GETCHARINCTEST(c, cc);
  6475. if (c > max) max = c;
  6476. if (c < min) min = c;
  6477. #ifdef SUPPORT_UNICODE
  6478. needschar = TRUE;
  6479. #endif /* SUPPORT_UNICODE */
  6480. }
  6481. else if (*cc == XCL_RANGE)
  6482. {
  6483. cc ++;
  6484. GETCHARINCTEST(c, cc);
  6485. if (c < min) min = c;
  6486. GETCHARINCTEST(c, cc);
  6487. if (c > max) max = c;
  6488. #ifdef SUPPORT_UNICODE
  6489. needschar = TRUE;
  6490. #endif /* SUPPORT_UNICODE */
  6491. }
  6492. #ifdef SUPPORT_UNICODE
  6493. else
  6494. {
  6495. SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
  6496. cc++;
  6497. if (*cc == PT_CLIST)
  6498. {
  6499. other_cases = PRIV(ucd_caseless_sets) + cc[1];
  6500. while (*other_cases != NOTACHAR)
  6501. {
  6502. if (*other_cases > max) max = *other_cases;
  6503. if (*other_cases < min) min = *other_cases;
  6504. other_cases++;
  6505. }
  6506. }
  6507. else
  6508. {
  6509. max = READ_CHAR_MAX;
  6510. min = 0;
  6511. }
  6512. switch(*cc)
  6513. {
  6514. case PT_ANY:
  6515. /* Any either accepts everything or ignored. */
  6516. if (cc[-1] == XCL_PROP)
  6517. {
  6518. compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
  6519. if (list == backtracks)
  6520. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  6521. return;
  6522. }
  6523. break;
  6524. case PT_LAMP:
  6525. case PT_GC:
  6526. case PT_PC:
  6527. case PT_ALNUM:
  6528. needstype = TRUE;
  6529. break;
  6530. case PT_SC:
  6531. needsscript = TRUE;
  6532. break;
  6533. case PT_SPACE:
  6534. case PT_PXSPACE:
  6535. case PT_WORD:
  6536. case PT_PXGRAPH:
  6537. case PT_PXPRINT:
  6538. case PT_PXPUNCT:
  6539. needstype = TRUE;
  6540. needschar = TRUE;
  6541. break;
  6542. case PT_CLIST:
  6543. case PT_UCNC:
  6544. needschar = TRUE;
  6545. break;
  6546. default:
  6547. SLJIT_UNREACHABLE();
  6548. break;
  6549. }
  6550. cc += 2;
  6551. }
  6552. #endif /* SUPPORT_UNICODE */
  6553. }
  6554. SLJIT_ASSERT(compares > 0);
  6555. /* We are not necessary in utf mode even in 8 bit mode. */
  6556. cc = ccbegin;
  6557. if ((cc[-1] & XCL_NOT) != 0)
  6558. read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
  6559. else
  6560. {
  6561. #ifdef SUPPORT_UNICODE
  6562. read_char(common, min, max, (needstype || needsscript) ? backtracks : NULL, 0);
  6563. #else /* !SUPPORT_UNICODE */
  6564. read_char(common, min, max, NULL, 0);
  6565. #endif /* SUPPORT_UNICODE */
  6566. }
  6567. if ((cc[-1] & XCL_HASPROP) == 0)
  6568. {
  6569. if ((cc[-1] & XCL_MAP) != 0)
  6570. {
  6571. jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
  6572. if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
  6573. {
  6574. OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
  6575. OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
  6576. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
  6577. OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
  6578. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
  6579. add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
  6580. }
  6581. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  6582. JUMPHERE(jump);
  6583. cc += 32 / sizeof(PCRE2_UCHAR);
  6584. }
  6585. else
  6586. {
  6587. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
  6588. add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
  6589. }
  6590. }
  6591. else if ((cc[-1] & XCL_MAP) != 0)
  6592. {
  6593. OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
  6594. #ifdef SUPPORT_UNICODE
  6595. charsaved = TRUE;
  6596. #endif /* SUPPORT_UNICODE */
  6597. if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
  6598. {
  6599. #if PCRE2_CODE_UNIT_WIDTH == 8
  6600. jump = NULL;
  6601. if (common->utf)
  6602. #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
  6603. jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
  6604. OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
  6605. OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
  6606. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
  6607. OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
  6608. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
  6609. add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
  6610. #if PCRE2_CODE_UNIT_WIDTH == 8
  6611. if (common->utf)
  6612. #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
  6613. JUMPHERE(jump);
  6614. }
  6615. OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
  6616. cc += 32 / sizeof(PCRE2_UCHAR);
  6617. }
  6618. #ifdef SUPPORT_UNICODE
  6619. if (needstype || needsscript)
  6620. {
  6621. if (needschar && !charsaved)
  6622. OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
  6623. #if PCRE2_CODE_UNIT_WIDTH == 32
  6624. if (!common->utf)
  6625. {
  6626. jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
  6627. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
  6628. JUMPHERE(jump);
  6629. }
  6630. #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
  6631. OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
  6632. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
  6633. OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
  6634. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
  6635. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
  6636. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  6637. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
  6638. OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
  6639. /* Before anything else, we deal with scripts. */
  6640. if (needsscript)
  6641. {
  6642. OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
  6643. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
  6644. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  6645. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
  6646. ccbegin = cc;
  6647. while (*cc != XCL_END)
  6648. {
  6649. if (*cc == XCL_SINGLE)
  6650. {
  6651. cc ++;
  6652. GETCHARINCTEST(c, cc);
  6653. }
  6654. else if (*cc == XCL_RANGE)
  6655. {
  6656. cc ++;
  6657. GETCHARINCTEST(c, cc);
  6658. GETCHARINCTEST(c, cc);
  6659. }
  6660. else
  6661. {
  6662. SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
  6663. cc++;
  6664. if (*cc == PT_SC)
  6665. {
  6666. compares--;
  6667. invertcmp = (compares == 0 && list != backtracks);
  6668. if (cc[-1] == XCL_NOTPROP)
  6669. invertcmp ^= 0x1;
  6670. jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
  6671. add_jump(compiler, compares > 0 ? list : backtracks, jump);
  6672. }
  6673. cc += 2;
  6674. }
  6675. }
  6676. cc = ccbegin;
  6677. if (needstype)
  6678. {
  6679. /* TMP2 has already been shifted by 2 */
  6680. if (!needschar)
  6681. {
  6682. OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
  6683. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  6684. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
  6685. }
  6686. else
  6687. {
  6688. OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
  6689. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
  6690. OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
  6691. OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
  6692. typereg = RETURN_ADDR;
  6693. }
  6694. }
  6695. else if (needschar)
  6696. OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
  6697. }
  6698. else if (needstype)
  6699. {
  6700. OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
  6701. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
  6702. if (!needschar)
  6703. {
  6704. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  6705. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
  6706. }
  6707. else
  6708. {
  6709. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
  6710. OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
  6711. OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
  6712. typereg = RETURN_ADDR;
  6713. }
  6714. }
  6715. else if (needschar)
  6716. OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
  6717. }
  6718. #endif /* SUPPORT_UNICODE */
  6719. /* Generating code. */
  6720. charoffset = 0;
  6721. numberofcmps = 0;
  6722. #ifdef SUPPORT_UNICODE
  6723. typeoffset = 0;
  6724. #endif /* SUPPORT_UNICODE */
  6725. while (*cc != XCL_END)
  6726. {
  6727. compares--;
  6728. invertcmp = (compares == 0 && list != backtracks);
  6729. jump = NULL;
  6730. if (*cc == XCL_SINGLE)
  6731. {
  6732. cc ++;
  6733. GETCHARINCTEST(c, cc);
  6734. if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
  6735. {
  6736. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
  6737. OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  6738. numberofcmps++;
  6739. }
  6740. else if (numberofcmps > 0)
  6741. {
  6742. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
  6743. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
  6744. jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
  6745. numberofcmps = 0;
  6746. }
  6747. else
  6748. {
  6749. jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
  6750. numberofcmps = 0;
  6751. }
  6752. }
  6753. else if (*cc == XCL_RANGE)
  6754. {
  6755. cc ++;
  6756. GETCHARINCTEST(c, cc);
  6757. SET_CHAR_OFFSET(c);
  6758. GETCHARINCTEST(c, cc);
  6759. if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
  6760. {
  6761. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
  6762. OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
  6763. numberofcmps++;
  6764. }
  6765. else if (numberofcmps > 0)
  6766. {
  6767. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
  6768. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
  6769. jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
  6770. numberofcmps = 0;
  6771. }
  6772. else
  6773. {
  6774. jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
  6775. numberofcmps = 0;
  6776. }
  6777. }
  6778. #ifdef SUPPORT_UNICODE
  6779. else
  6780. {
  6781. SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
  6782. if (*cc == XCL_NOTPROP)
  6783. invertcmp ^= 0x1;
  6784. cc++;
  6785. switch(*cc)
  6786. {
  6787. case PT_ANY:
  6788. if (!invertcmp)
  6789. jump = JUMP(SLJIT_JUMP);
  6790. break;
  6791. case PT_LAMP:
  6792. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
  6793. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
  6794. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
  6795. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  6796. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
  6797. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
  6798. jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
  6799. break;
  6800. case PT_GC:
  6801. c = PRIV(ucp_typerange)[(int)cc[1] * 2];
  6802. SET_TYPE_OFFSET(c);
  6803. jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
  6804. break;
  6805. case PT_PC:
  6806. jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
  6807. break;
  6808. case PT_SC:
  6809. compares++;
  6810. /* Do nothing. */
  6811. break;
  6812. case PT_SPACE:
  6813. case PT_PXSPACE:
  6814. SET_CHAR_OFFSET(9);
  6815. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
  6816. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
  6817. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
  6818. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  6819. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
  6820. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  6821. SET_TYPE_OFFSET(ucp_Zl);
  6822. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
  6823. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
  6824. jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
  6825. break;
  6826. case PT_WORD:
  6827. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
  6828. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
  6829. /* Fall through. */
  6830. case PT_ALNUM:
  6831. SET_TYPE_OFFSET(ucp_Ll);
  6832. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
  6833. OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
  6834. SET_TYPE_OFFSET(ucp_Nd);
  6835. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
  6836. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
  6837. jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
  6838. break;
  6839. case PT_CLIST:
  6840. other_cases = PRIV(ucd_caseless_sets) + cc[1];
  6841. /* At least three characters are required.
  6842. Otherwise this case would be handled by the normal code path. */
  6843. SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
  6844. SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
  6845. /* Optimizing character pairs, if their difference is power of 2. */
  6846. if (is_powerof2(other_cases[1] ^ other_cases[0]))
  6847. {
  6848. if (charoffset == 0)
  6849. OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
  6850. else
  6851. {
  6852. OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
  6853. OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
  6854. }
  6855. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
  6856. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
  6857. other_cases += 2;
  6858. }
  6859. else if (is_powerof2(other_cases[2] ^ other_cases[1]))
  6860. {
  6861. if (charoffset == 0)
  6862. OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
  6863. else
  6864. {
  6865. OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
  6866. OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
  6867. }
  6868. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
  6869. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
  6870. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
  6871. OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
  6872. other_cases += 3;
  6873. }
  6874. else
  6875. {
  6876. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
  6877. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
  6878. }
  6879. while (*other_cases != NOTACHAR)
  6880. {
  6881. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
  6882. OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
  6883. }
  6884. jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
  6885. break;
  6886. case PT_UCNC:
  6887. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
  6888. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
  6889. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
  6890. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  6891. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
  6892. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  6893. SET_CHAR_OFFSET(0xa0);
  6894. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
  6895. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
  6896. SET_CHAR_OFFSET(0);
  6897. OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
  6898. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
  6899. jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
  6900. break;
  6901. case PT_PXGRAPH:
  6902. /* C and Z groups are the farthest two groups. */
  6903. SET_TYPE_OFFSET(ucp_Ll);
  6904. OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
  6905. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
  6906. jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
  6907. /* In case of ucp_Cf, we overwrite the result. */
  6908. SET_CHAR_OFFSET(0x2066);
  6909. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
  6910. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
  6911. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
  6912. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  6913. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
  6914. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  6915. JUMPHERE(jump);
  6916. jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
  6917. break;
  6918. case PT_PXPRINT:
  6919. /* C and Z groups are the farthest two groups. */
  6920. SET_TYPE_OFFSET(ucp_Ll);
  6921. OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
  6922. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
  6923. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
  6924. OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
  6925. jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
  6926. /* In case of ucp_Cf, we overwrite the result. */
  6927. SET_CHAR_OFFSET(0x2066);
  6928. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
  6929. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
  6930. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
  6931. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  6932. JUMPHERE(jump);
  6933. jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
  6934. break;
  6935. case PT_PXPUNCT:
  6936. SET_TYPE_OFFSET(ucp_Sc);
  6937. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
  6938. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
  6939. SET_CHAR_OFFSET(0);
  6940. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
  6941. OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
  6942. SET_TYPE_OFFSET(ucp_Pc);
  6943. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
  6944. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
  6945. jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
  6946. break;
  6947. default:
  6948. SLJIT_UNREACHABLE();
  6949. break;
  6950. }
  6951. cc += 2;
  6952. }
  6953. #endif /* SUPPORT_UNICODE */
  6954. if (jump != NULL)
  6955. add_jump(compiler, compares > 0 ? list : backtracks, jump);
  6956. }
  6957. if (found != NULL)
  6958. set_jumps(found, LABEL());
  6959. }
  6960. #undef SET_TYPE_OFFSET
  6961. #undef SET_CHAR_OFFSET
  6962. #endif
  6963. static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
  6964. {
  6965. DEFINE_COMPILER;
  6966. int length;
  6967. struct sljit_jump *jump[4];
  6968. #ifdef SUPPORT_UNICODE
  6969. struct sljit_label *label;
  6970. #endif /* SUPPORT_UNICODE */
  6971. switch(type)
  6972. {
  6973. case OP_SOD:
  6974. if (HAS_VIRTUAL_REGISTERS)
  6975. {
  6976. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  6977. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
  6978. }
  6979. else
  6980. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
  6981. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
  6982. return cc;
  6983. case OP_SOM:
  6984. if (HAS_VIRTUAL_REGISTERS)
  6985. {
  6986. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  6987. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
  6988. }
  6989. else
  6990. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
  6991. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
  6992. return cc;
  6993. case OP_NOT_WORD_BOUNDARY:
  6994. case OP_WORD_BOUNDARY:
  6995. add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
  6996. #ifdef SUPPORT_UNICODE
  6997. if (common->invalid_utf)
  6998. {
  6999. add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
  7000. return cc;
  7001. }
  7002. #endif /* SUPPORT_UNICODE */
  7003. sljit_set_current_flags(compiler, SLJIT_SET_Z);
  7004. add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
  7005. return cc;
  7006. case OP_EODN:
  7007. /* Requires rather complex checks. */
  7008. jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  7009. if (common->nltype == NLTYPE_FIXED && common->newline > 255)
  7010. {
  7011. OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  7012. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  7013. if (common->mode == PCRE2_JIT_COMPLETE)
  7014. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
  7015. else
  7016. {
  7017. jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
  7018. OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
  7019. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
  7020. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
  7021. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
  7022. add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
  7023. check_partial(common, TRUE);
  7024. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  7025. JUMPHERE(jump[1]);
  7026. }
  7027. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  7028. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
  7029. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
  7030. }
  7031. else if (common->nltype == NLTYPE_FIXED)
  7032. {
  7033. OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  7034. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  7035. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
  7036. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
  7037. }
  7038. else
  7039. {
  7040. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  7041. jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
  7042. OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  7043. OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
  7044. jump[2] = JUMP(SLJIT_GREATER);
  7045. add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
  7046. /* Equal. */
  7047. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  7048. jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
  7049. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  7050. JUMPHERE(jump[1]);
  7051. if (common->nltype == NLTYPE_ANYCRLF)
  7052. {
  7053. OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  7054. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
  7055. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
  7056. }
  7057. else
  7058. {
  7059. OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
  7060. read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
  7061. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
  7062. add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
  7063. sljit_set_current_flags(compiler, SLJIT_SET_Z);
  7064. add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
  7065. OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
  7066. }
  7067. JUMPHERE(jump[2]);
  7068. JUMPHERE(jump[3]);
  7069. }
  7070. JUMPHERE(jump[0]);
  7071. if (common->mode != PCRE2_JIT_COMPLETE)
  7072. check_partial(common, TRUE);
  7073. return cc;
  7074. case OP_EOD:
  7075. add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
  7076. if (common->mode != PCRE2_JIT_COMPLETE)
  7077. check_partial(common, TRUE);
  7078. return cc;
  7079. case OP_DOLL:
  7080. if (HAS_VIRTUAL_REGISTERS)
  7081. {
  7082. OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
  7083. OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
  7084. }
  7085. else
  7086. OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
  7087. add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
  7088. if (!common->endonly)
  7089. compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
  7090. else
  7091. {
  7092. add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
  7093. check_partial(common, FALSE);
  7094. }
  7095. return cc;
  7096. case OP_DOLLM:
  7097. jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
  7098. if (HAS_VIRTUAL_REGISTERS)
  7099. {
  7100. OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
  7101. OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
  7102. }
  7103. else
  7104. OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
  7105. add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
  7106. check_partial(common, FALSE);
  7107. jump[0] = JUMP(SLJIT_JUMP);
  7108. JUMPHERE(jump[1]);
  7109. if (common->nltype == NLTYPE_FIXED && common->newline > 255)
  7110. {
  7111. OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  7112. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  7113. if (common->mode == PCRE2_JIT_COMPLETE)
  7114. add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
  7115. else
  7116. {
  7117. jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
  7118. /* STR_PTR = STR_END - IN_UCHARS(1) */
  7119. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
  7120. check_partial(common, TRUE);
  7121. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  7122. JUMPHERE(jump[1]);
  7123. }
  7124. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  7125. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
  7126. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
  7127. }
  7128. else
  7129. {
  7130. peek_char(common, common->nlmax, TMP3, 0, NULL);
  7131. check_newlinechar(common, common->nltype, backtracks, FALSE);
  7132. }
  7133. JUMPHERE(jump[0]);
  7134. return cc;
  7135. case OP_CIRC:
  7136. if (HAS_VIRTUAL_REGISTERS)
  7137. {
  7138. OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
  7139. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
  7140. add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
  7141. OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
  7142. add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
  7143. }
  7144. else
  7145. {
  7146. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
  7147. add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
  7148. OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
  7149. add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
  7150. }
  7151. return cc;
  7152. case OP_CIRCM:
  7153. /* TMP2 might be used by peek_char_back. */
  7154. if (HAS_VIRTUAL_REGISTERS)
  7155. {
  7156. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  7157. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
  7158. jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
  7159. OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
  7160. }
  7161. else
  7162. {
  7163. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
  7164. jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
  7165. OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
  7166. }
  7167. add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
  7168. jump[0] = JUMP(SLJIT_JUMP);
  7169. JUMPHERE(jump[1]);
  7170. if (!common->alt_circumflex)
  7171. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
  7172. if (common->nltype == NLTYPE_FIXED && common->newline > 255)
  7173. {
  7174. OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  7175. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
  7176. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
  7177. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
  7178. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
  7179. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
  7180. }
  7181. else
  7182. {
  7183. peek_char_back(common, common->nlmax, backtracks);
  7184. check_newlinechar(common, common->nltype, backtracks, FALSE);
  7185. }
  7186. JUMPHERE(jump[0]);
  7187. return cc;
  7188. case OP_REVERSE:
  7189. length = GET(cc, 0);
  7190. if (length == 0)
  7191. return cc + LINK_SIZE;
  7192. if (HAS_VIRTUAL_REGISTERS)
  7193. {
  7194. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  7195. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
  7196. }
  7197. else
  7198. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
  7199. #ifdef SUPPORT_UNICODE
  7200. if (common->utf)
  7201. {
  7202. OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
  7203. label = LABEL();
  7204. add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
  7205. move_back(common, backtracks, FALSE);
  7206. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
  7207. JUMPTO(SLJIT_NOT_ZERO, label);
  7208. }
  7209. else
  7210. #endif
  7211. {
  7212. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
  7213. add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
  7214. }
  7215. check_start_used_ptr(common);
  7216. return cc + LINK_SIZE;
  7217. }
  7218. SLJIT_UNREACHABLE();
  7219. return cc;
  7220. }
  7221. #ifdef SUPPORT_UNICODE
  7222. #if PCRE2_CODE_UNIT_WIDTH != 32
  7223. static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
  7224. {
  7225. PCRE2_SPTR start_subject = args->begin;
  7226. PCRE2_SPTR end_subject = args->end;
  7227. int lgb, rgb, ricount;
  7228. PCRE2_SPTR prevcc, endcc, bptr;
  7229. BOOL first = TRUE;
  7230. uint32_t c;
  7231. prevcc = cc;
  7232. endcc = NULL;
  7233. do
  7234. {
  7235. GETCHARINC(c, cc);
  7236. rgb = UCD_GRAPHBREAK(c);
  7237. if (first)
  7238. {
  7239. lgb = rgb;
  7240. endcc = cc;
  7241. first = FALSE;
  7242. continue;
  7243. }
  7244. if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
  7245. break;
  7246. /* Not breaking between Regional Indicators is allowed only if there
  7247. are an even number of preceding RIs. */
  7248. if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
  7249. {
  7250. ricount = 0;
  7251. bptr = prevcc;
  7252. /* bptr is pointing to the left-hand character */
  7253. while (bptr > start_subject)
  7254. {
  7255. bptr--;
  7256. BACKCHAR(bptr);
  7257. GETCHAR(c, bptr);
  7258. if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
  7259. break;
  7260. ricount++;
  7261. }
  7262. if ((ricount & 1) != 0) break; /* Grapheme break required */
  7263. }
  7264. /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
  7265. allows any number of them before a following Extended_Pictographic. */
  7266. if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
  7267. lgb != ucp_gbExtended_Pictographic)
  7268. lgb = rgb;
  7269. prevcc = endcc;
  7270. endcc = cc;
  7271. }
  7272. while (cc < end_subject);
  7273. return endcc;
  7274. }
  7275. #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
  7276. static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
  7277. {
  7278. PCRE2_SPTR start_subject = args->begin;
  7279. PCRE2_SPTR end_subject = args->end;
  7280. int lgb, rgb, ricount;
  7281. PCRE2_SPTR prevcc, endcc, bptr;
  7282. BOOL first = TRUE;
  7283. uint32_t c;
  7284. prevcc = cc;
  7285. endcc = NULL;
  7286. do
  7287. {
  7288. GETCHARINC_INVALID(c, cc, end_subject, break);
  7289. rgb = UCD_GRAPHBREAK(c);
  7290. if (first)
  7291. {
  7292. lgb = rgb;
  7293. endcc = cc;
  7294. first = FALSE;
  7295. continue;
  7296. }
  7297. if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
  7298. break;
  7299. /* Not breaking between Regional Indicators is allowed only if there
  7300. are an even number of preceding RIs. */
  7301. if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
  7302. {
  7303. ricount = 0;
  7304. bptr = prevcc;
  7305. /* bptr is pointing to the left-hand character */
  7306. while (bptr > start_subject)
  7307. {
  7308. GETCHARBACK_INVALID(c, bptr, start_subject, break);
  7309. if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
  7310. break;
  7311. ricount++;
  7312. }
  7313. if ((ricount & 1) != 0)
  7314. break; /* Grapheme break required */
  7315. }
  7316. /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
  7317. allows any number of them before a following Extended_Pictographic. */
  7318. if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
  7319. lgb != ucp_gbExtended_Pictographic)
  7320. lgb = rgb;
  7321. prevcc = endcc;
  7322. endcc = cc;
  7323. }
  7324. while (cc < end_subject);
  7325. return endcc;
  7326. }
  7327. static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
  7328. {
  7329. PCRE2_SPTR start_subject = args->begin;
  7330. PCRE2_SPTR end_subject = args->end;
  7331. int lgb, rgb, ricount;
  7332. PCRE2_SPTR bptr;
  7333. uint32_t c;
  7334. /* Patch by PH */
  7335. /* GETCHARINC(c, cc); */
  7336. c = *cc++;
  7337. #if PCRE2_CODE_UNIT_WIDTH == 32
  7338. if (c >= 0x110000)
  7339. return NULL;
  7340. #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
  7341. lgb = UCD_GRAPHBREAK(c);
  7342. while (cc < end_subject)
  7343. {
  7344. c = *cc;
  7345. #if PCRE2_CODE_UNIT_WIDTH == 32
  7346. if (c >= 0x110000)
  7347. break;
  7348. #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
  7349. rgb = UCD_GRAPHBREAK(c);
  7350. if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
  7351. break;
  7352. /* Not breaking between Regional Indicators is allowed only if there
  7353. are an even number of preceding RIs. */
  7354. if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
  7355. {
  7356. ricount = 0;
  7357. bptr = cc - 1;
  7358. /* bptr is pointing to the left-hand character */
  7359. while (bptr > start_subject)
  7360. {
  7361. bptr--;
  7362. c = *bptr;
  7363. #if PCRE2_CODE_UNIT_WIDTH == 32
  7364. if (c >= 0x110000)
  7365. break;
  7366. #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
  7367. if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
  7368. ricount++;
  7369. }
  7370. if ((ricount & 1) != 0)
  7371. break; /* Grapheme break required */
  7372. }
  7373. /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
  7374. allows any number of them before a following Extended_Pictographic. */
  7375. if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
  7376. lgb != ucp_gbExtended_Pictographic)
  7377. lgb = rgb;
  7378. cc++;
  7379. }
  7380. return cc;
  7381. }
  7382. #endif /* SUPPORT_UNICODE */
  7383. static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
  7384. {
  7385. DEFINE_COMPILER;
  7386. int length;
  7387. unsigned int c, oc, bit;
  7388. compare_context context;
  7389. struct sljit_jump *jump[3];
  7390. jump_list *end_list;
  7391. #ifdef SUPPORT_UNICODE
  7392. PCRE2_UCHAR propdata[5];
  7393. #endif /* SUPPORT_UNICODE */
  7394. switch(type)
  7395. {
  7396. case OP_NOT_DIGIT:
  7397. case OP_DIGIT:
  7398. /* Digits are usually 0-9, so it is worth to optimize them. */
  7399. if (check_str_ptr)
  7400. detect_partial_match(common, backtracks);
  7401. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
  7402. if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
  7403. read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
  7404. else
  7405. #endif
  7406. read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
  7407. /* Flip the starting bit in the negative case. */
  7408. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
  7409. add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
  7410. return cc;
  7411. case OP_NOT_WHITESPACE:
  7412. case OP_WHITESPACE:
  7413. if (check_str_ptr)
  7414. detect_partial_match(common, backtracks);
  7415. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
  7416. if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
  7417. read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
  7418. else
  7419. #endif
  7420. read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
  7421. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
  7422. add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
  7423. return cc;
  7424. case OP_NOT_WORDCHAR:
  7425. case OP_WORDCHAR:
  7426. if (check_str_ptr)
  7427. detect_partial_match(common, backtracks);
  7428. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
  7429. if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
  7430. read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
  7431. else
  7432. #endif
  7433. read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
  7434. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
  7435. add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
  7436. return cc;
  7437. case OP_ANY:
  7438. if (check_str_ptr)
  7439. detect_partial_match(common, backtracks);
  7440. read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
  7441. if (common->nltype == NLTYPE_FIXED && common->newline > 255)
  7442. {
  7443. jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
  7444. end_list = NULL;
  7445. if (common->mode != PCRE2_JIT_PARTIAL_HARD)
  7446. add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
  7447. else
  7448. check_str_end(common, &end_list);
  7449. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  7450. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
  7451. set_jumps(end_list, LABEL());
  7452. JUMPHERE(jump[0]);
  7453. }
  7454. else
  7455. check_newlinechar(common, common->nltype, backtracks, TRUE);
  7456. return cc;
  7457. case OP_ALLANY:
  7458. if (check_str_ptr)
  7459. detect_partial_match(common, backtracks);
  7460. #ifdef SUPPORT_UNICODE
  7461. if (common->utf)
  7462. {
  7463. if (common->invalid_utf)
  7464. {
  7465. read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
  7466. return cc;
  7467. }
  7468. #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
  7469. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  7470. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  7471. #if PCRE2_CODE_UNIT_WIDTH == 8
  7472. jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
  7473. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  7474. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  7475. #elif PCRE2_CODE_UNIT_WIDTH == 16
  7476. jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
  7477. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
  7478. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  7479. OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
  7480. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
  7481. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  7482. #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
  7483. JUMPHERE(jump[0]);
  7484. return cc;
  7485. #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
  7486. }
  7487. #endif /* SUPPORT_UNICODE */
  7488. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  7489. return cc;
  7490. case OP_ANYBYTE:
  7491. if (check_str_ptr)
  7492. detect_partial_match(common, backtracks);
  7493. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  7494. return cc;
  7495. #ifdef SUPPORT_UNICODE
  7496. case OP_NOTPROP:
  7497. case OP_PROP:
  7498. propdata[0] = XCL_HASPROP;
  7499. propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
  7500. propdata[2] = cc[0];
  7501. propdata[3] = cc[1];
  7502. propdata[4] = XCL_END;
  7503. if (check_str_ptr)
  7504. detect_partial_match(common, backtracks);
  7505. compile_xclass_matchingpath(common, propdata, backtracks);
  7506. return cc + 2;
  7507. #endif
  7508. case OP_ANYNL:
  7509. if (check_str_ptr)
  7510. detect_partial_match(common, backtracks);
  7511. read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
  7512. jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
  7513. /* We don't need to handle soft partial matching case. */
  7514. end_list = NULL;
  7515. if (common->mode != PCRE2_JIT_PARTIAL_HARD)
  7516. add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
  7517. else
  7518. check_str_end(common, &end_list);
  7519. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  7520. jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
  7521. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  7522. jump[2] = JUMP(SLJIT_JUMP);
  7523. JUMPHERE(jump[0]);
  7524. check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
  7525. set_jumps(end_list, LABEL());
  7526. JUMPHERE(jump[1]);
  7527. JUMPHERE(jump[2]);
  7528. return cc;
  7529. case OP_NOT_HSPACE:
  7530. case OP_HSPACE:
  7531. if (check_str_ptr)
  7532. detect_partial_match(common, backtracks);
  7533. if (type == OP_NOT_HSPACE)
  7534. read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
  7535. else
  7536. read_char(common, 0x9, 0x3000, NULL, 0);
  7537. add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
  7538. sljit_set_current_flags(compiler, SLJIT_SET_Z);
  7539. add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
  7540. return cc;
  7541. case OP_NOT_VSPACE:
  7542. case OP_VSPACE:
  7543. if (check_str_ptr)
  7544. detect_partial_match(common, backtracks);
  7545. if (type == OP_NOT_VSPACE)
  7546. read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
  7547. else
  7548. read_char(common, 0xa, 0x2029, NULL, 0);
  7549. add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
  7550. sljit_set_current_flags(compiler, SLJIT_SET_Z);
  7551. add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
  7552. return cc;
  7553. #ifdef SUPPORT_UNICODE
  7554. case OP_EXTUNI:
  7555. if (check_str_ptr)
  7556. detect_partial_match(common, backtracks);
  7557. SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
  7558. OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
  7559. #if PCRE2_CODE_UNIT_WIDTH != 32
  7560. sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
  7561. common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
  7562. if (common->invalid_utf)
  7563. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
  7564. #else
  7565. sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
  7566. common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
  7567. if (!common->utf || common->invalid_utf)
  7568. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
  7569. #endif
  7570. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
  7571. if (common->mode == PCRE2_JIT_PARTIAL_HARD)
  7572. {
  7573. jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
  7574. /* Since we successfully read a char above, partial matching must occure. */
  7575. check_partial(common, TRUE);
  7576. JUMPHERE(jump[0]);
  7577. }
  7578. return cc;
  7579. #endif
  7580. case OP_CHAR:
  7581. case OP_CHARI:
  7582. length = 1;
  7583. #ifdef SUPPORT_UNICODE
  7584. if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
  7585. #endif
  7586. if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
  7587. detect_partial_match(common, backtracks);
  7588. if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
  7589. {
  7590. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
  7591. if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
  7592. add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
  7593. context.length = IN_UCHARS(length);
  7594. context.sourcereg = -1;
  7595. #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
  7596. context.ucharptr = 0;
  7597. #endif
  7598. return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
  7599. }
  7600. #ifdef SUPPORT_UNICODE
  7601. if (common->utf)
  7602. {
  7603. GETCHAR(c, cc);
  7604. }
  7605. else
  7606. #endif
  7607. c = *cc;
  7608. SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
  7609. if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
  7610. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
  7611. oc = char_othercase(common, c);
  7612. read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
  7613. SLJIT_ASSERT(!is_powerof2(c ^ oc));
  7614. if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
  7615. {
  7616. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
  7617. CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
  7618. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
  7619. }
  7620. else
  7621. {
  7622. jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
  7623. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
  7624. JUMPHERE(jump[0]);
  7625. }
  7626. return cc + length;
  7627. case OP_NOT:
  7628. case OP_NOTI:
  7629. if (check_str_ptr)
  7630. detect_partial_match(common, backtracks);
  7631. length = 1;
  7632. #ifdef SUPPORT_UNICODE
  7633. if (common->utf)
  7634. {
  7635. #if PCRE2_CODE_UNIT_WIDTH == 8
  7636. c = *cc;
  7637. if (c < 128 && !common->invalid_utf)
  7638. {
  7639. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  7640. if (type == OP_NOT || !char_has_othercase(common, cc))
  7641. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
  7642. else
  7643. {
  7644. /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
  7645. OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
  7646. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
  7647. }
  7648. /* Skip the variable-length character. */
  7649. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  7650. jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
  7651. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  7652. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  7653. JUMPHERE(jump[0]);
  7654. return cc + 1;
  7655. }
  7656. else
  7657. #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
  7658. {
  7659. GETCHARLEN(c, cc, length);
  7660. }
  7661. }
  7662. else
  7663. #endif /* SUPPORT_UNICODE */
  7664. c = *cc;
  7665. if (type == OP_NOT || !char_has_othercase(common, cc))
  7666. {
  7667. read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
  7668. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
  7669. }
  7670. else
  7671. {
  7672. oc = char_othercase(common, c);
  7673. read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
  7674. bit = c ^ oc;
  7675. if (is_powerof2(bit))
  7676. {
  7677. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
  7678. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
  7679. }
  7680. else
  7681. {
  7682. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
  7683. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
  7684. }
  7685. }
  7686. return cc + length;
  7687. case OP_CLASS:
  7688. case OP_NCLASS:
  7689. if (check_str_ptr)
  7690. detect_partial_match(common, backtracks);
  7691. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
  7692. bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
  7693. if (type == OP_NCLASS)
  7694. read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
  7695. else
  7696. read_char(common, 0, bit, NULL, 0);
  7697. #else
  7698. if (type == OP_NCLASS)
  7699. read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
  7700. else
  7701. read_char(common, 0, 255, NULL, 0);
  7702. #endif
  7703. if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
  7704. return cc + 32 / sizeof(PCRE2_UCHAR);
  7705. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
  7706. jump[0] = NULL;
  7707. if (common->utf)
  7708. {
  7709. jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
  7710. if (type == OP_CLASS)
  7711. {
  7712. add_jump(compiler, backtracks, jump[0]);
  7713. jump[0] = NULL;
  7714. }
  7715. }
  7716. #elif PCRE2_CODE_UNIT_WIDTH != 8
  7717. jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
  7718. if (type == OP_CLASS)
  7719. {
  7720. add_jump(compiler, backtracks, jump[0]);
  7721. jump[0] = NULL;
  7722. }
  7723. #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
  7724. OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
  7725. OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
  7726. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
  7727. OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
  7728. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
  7729. add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
  7730. #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
  7731. if (jump[0] != NULL)
  7732. JUMPHERE(jump[0]);
  7733. #endif
  7734. return cc + 32 / sizeof(PCRE2_UCHAR);
  7735. #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
  7736. case OP_XCLASS:
  7737. if (check_str_ptr)
  7738. detect_partial_match(common, backtracks);
  7739. compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
  7740. return cc + GET(cc, 0) - 1;
  7741. #endif
  7742. }
  7743. SLJIT_UNREACHABLE();
  7744. return cc;
  7745. }
  7746. static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
  7747. {
  7748. /* This function consumes at least one input character. */
  7749. /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
  7750. DEFINE_COMPILER;
  7751. PCRE2_SPTR ccbegin = cc;
  7752. compare_context context;
  7753. int size;
  7754. context.length = 0;
  7755. do
  7756. {
  7757. if (cc >= ccend)
  7758. break;
  7759. if (*cc == OP_CHAR)
  7760. {
  7761. size = 1;
  7762. #ifdef SUPPORT_UNICODE
  7763. if (common->utf && HAS_EXTRALEN(cc[1]))
  7764. size += GET_EXTRALEN(cc[1]);
  7765. #endif
  7766. }
  7767. else if (*cc == OP_CHARI)
  7768. {
  7769. size = 1;
  7770. #ifdef SUPPORT_UNICODE
  7771. if (common->utf)
  7772. {
  7773. if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
  7774. size = 0;
  7775. else if (HAS_EXTRALEN(cc[1]))
  7776. size += GET_EXTRALEN(cc[1]);
  7777. }
  7778. else
  7779. #endif
  7780. if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
  7781. size = 0;
  7782. }
  7783. else
  7784. size = 0;
  7785. cc += 1 + size;
  7786. context.length += IN_UCHARS(size);
  7787. }
  7788. while (size > 0 && context.length <= 128);
  7789. cc = ccbegin;
  7790. if (context.length > 0)
  7791. {
  7792. /* We have a fixed-length byte sequence. */
  7793. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
  7794. add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
  7795. context.sourcereg = -1;
  7796. #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
  7797. context.ucharptr = 0;
  7798. #endif
  7799. do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
  7800. return cc;
  7801. }
  7802. /* A non-fixed length character will be checked if length == 0. */
  7803. return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
  7804. }
  7805. /* Forward definitions. */
  7806. static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
  7807. static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
  7808. #define PUSH_BACKTRACK(size, ccstart, error) \
  7809. do \
  7810. { \
  7811. backtrack = sljit_alloc_memory(compiler, (size)); \
  7812. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
  7813. return error; \
  7814. memset(backtrack, 0, size); \
  7815. backtrack->prev = parent->top; \
  7816. backtrack->cc = (ccstart); \
  7817. parent->top = backtrack; \
  7818. } \
  7819. while (0)
  7820. #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
  7821. do \
  7822. { \
  7823. backtrack = sljit_alloc_memory(compiler, (size)); \
  7824. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
  7825. return; \
  7826. memset(backtrack, 0, size); \
  7827. backtrack->prev = parent->top; \
  7828. backtrack->cc = (ccstart); \
  7829. parent->top = backtrack; \
  7830. } \
  7831. while (0)
  7832. #define BACKTRACK_AS(type) ((type *)backtrack)
  7833. static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
  7834. {
  7835. /* The OVECTOR offset goes to TMP2. */
  7836. DEFINE_COMPILER;
  7837. int count = GET2(cc, 1 + IMM2_SIZE);
  7838. PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
  7839. unsigned int offset;
  7840. jump_list *found = NULL;
  7841. SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
  7842. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
  7843. count--;
  7844. while (count-- > 0)
  7845. {
  7846. offset = GET2(slot, 0) << 1;
  7847. GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
  7848. add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
  7849. slot += common->name_entry_size;
  7850. }
  7851. offset = GET2(slot, 0) << 1;
  7852. GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
  7853. if (backtracks != NULL && !common->unset_backref)
  7854. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
  7855. set_jumps(found, LABEL());
  7856. }
  7857. static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
  7858. {
  7859. DEFINE_COMPILER;
  7860. BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
  7861. int offset = 0;
  7862. struct sljit_jump *jump = NULL;
  7863. struct sljit_jump *partial;
  7864. struct sljit_jump *nopartial;
  7865. #if defined SUPPORT_UNICODE
  7866. struct sljit_label *loop;
  7867. struct sljit_label *caseless_loop;
  7868. jump_list *no_match = NULL;
  7869. int source_reg = COUNT_MATCH;
  7870. int source_end_reg = ARGUMENTS;
  7871. int char1_reg = STACK_LIMIT;
  7872. #endif /* SUPPORT_UNICODE */
  7873. if (ref)
  7874. {
  7875. offset = GET2(cc, 1) << 1;
  7876. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
  7877. /* OVECTOR(1) contains the "string begin - 1" constant. */
  7878. if (withchecks && !common->unset_backref)
  7879. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
  7880. }
  7881. else
  7882. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
  7883. #if defined SUPPORT_UNICODE
  7884. if (common->utf && *cc == OP_REFI)
  7885. {
  7886. SLJIT_ASSERT(common->iref_ptr != 0);
  7887. if (ref)
  7888. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
  7889. else
  7890. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
  7891. if (withchecks && emptyfail)
  7892. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
  7893. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
  7894. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
  7895. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
  7896. OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
  7897. OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
  7898. loop = LABEL();
  7899. jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
  7900. partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  7901. /* Read original character. It must be a valid UTF character. */
  7902. OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
  7903. OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
  7904. read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
  7905. OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
  7906. OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
  7907. OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
  7908. /* Read second character. */
  7909. read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
  7910. CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
  7911. OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
  7912. add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
  7913. OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
  7914. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
  7915. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
  7916. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
  7917. OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
  7918. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
  7919. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
  7920. CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
  7921. add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
  7922. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
  7923. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
  7924. caseless_loop = LABEL();
  7925. OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
  7926. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
  7927. OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0);
  7928. JUMPTO(SLJIT_EQUAL, loop);
  7929. JUMPTO(SLJIT_LESS, caseless_loop);
  7930. set_jumps(no_match, LABEL());
  7931. if (common->mode == PCRE2_JIT_COMPLETE)
  7932. JUMPHERE(partial);
  7933. OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
  7934. OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
  7935. OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
  7936. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  7937. if (common->mode != PCRE2_JIT_COMPLETE)
  7938. {
  7939. JUMPHERE(partial);
  7940. OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
  7941. OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
  7942. OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
  7943. check_partial(common, FALSE);
  7944. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  7945. }
  7946. JUMPHERE(jump);
  7947. OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
  7948. OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
  7949. OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
  7950. return;
  7951. }
  7952. else
  7953. #endif /* SUPPORT_UNICODE */
  7954. {
  7955. if (ref)
  7956. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
  7957. else
  7958. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
  7959. if (withchecks)
  7960. jump = JUMP(SLJIT_ZERO);
  7961. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  7962. partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
  7963. if (common->mode == PCRE2_JIT_COMPLETE)
  7964. add_jump(compiler, backtracks, partial);
  7965. add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
  7966. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
  7967. if (common->mode != PCRE2_JIT_COMPLETE)
  7968. {
  7969. nopartial = JUMP(SLJIT_JUMP);
  7970. JUMPHERE(partial);
  7971. /* TMP2 -= STR_END - STR_PTR */
  7972. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
  7973. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
  7974. partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
  7975. OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
  7976. add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
  7977. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
  7978. JUMPHERE(partial);
  7979. check_partial(common, FALSE);
  7980. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  7981. JUMPHERE(nopartial);
  7982. }
  7983. }
  7984. if (jump != NULL)
  7985. {
  7986. if (emptyfail)
  7987. add_jump(compiler, backtracks, jump);
  7988. else
  7989. JUMPHERE(jump);
  7990. }
  7991. }
  7992. static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
  7993. {
  7994. DEFINE_COMPILER;
  7995. BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
  7996. backtrack_common *backtrack;
  7997. PCRE2_UCHAR type;
  7998. int offset = 0;
  7999. struct sljit_label *label;
  8000. struct sljit_jump *zerolength;
  8001. struct sljit_jump *jump = NULL;
  8002. PCRE2_SPTR ccbegin = cc;
  8003. int min = 0, max = 0;
  8004. BOOL minimize;
  8005. PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
  8006. if (ref)
  8007. offset = GET2(cc, 1) << 1;
  8008. else
  8009. cc += IMM2_SIZE;
  8010. type = cc[1 + IMM2_SIZE];
  8011. SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
  8012. minimize = (type & 0x1) != 0;
  8013. switch(type)
  8014. {
  8015. case OP_CRSTAR:
  8016. case OP_CRMINSTAR:
  8017. min = 0;
  8018. max = 0;
  8019. cc += 1 + IMM2_SIZE + 1;
  8020. break;
  8021. case OP_CRPLUS:
  8022. case OP_CRMINPLUS:
  8023. min = 1;
  8024. max = 0;
  8025. cc += 1 + IMM2_SIZE + 1;
  8026. break;
  8027. case OP_CRQUERY:
  8028. case OP_CRMINQUERY:
  8029. min = 0;
  8030. max = 1;
  8031. cc += 1 + IMM2_SIZE + 1;
  8032. break;
  8033. case OP_CRRANGE:
  8034. case OP_CRMINRANGE:
  8035. min = GET2(cc, 1 + IMM2_SIZE + 1);
  8036. max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
  8037. cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
  8038. break;
  8039. default:
  8040. SLJIT_UNREACHABLE();
  8041. break;
  8042. }
  8043. if (!minimize)
  8044. {
  8045. if (min == 0)
  8046. {
  8047. allocate_stack(common, 2);
  8048. if (ref)
  8049. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
  8050. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  8051. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
  8052. /* Temporary release of STR_PTR. */
  8053. OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
  8054. /* Handles both invalid and empty cases. Since the minimum repeat,
  8055. is zero the invalid case is basically the same as an empty case. */
  8056. if (ref)
  8057. zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
  8058. else
  8059. {
  8060. compile_dnref_search(common, ccbegin, NULL);
  8061. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
  8062. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
  8063. zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
  8064. }
  8065. /* Restore if not zero length. */
  8066. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
  8067. }
  8068. else
  8069. {
  8070. allocate_stack(common, 1);
  8071. if (ref)
  8072. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
  8073. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  8074. if (ref)
  8075. {
  8076. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
  8077. zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
  8078. }
  8079. else
  8080. {
  8081. compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
  8082. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
  8083. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
  8084. zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
  8085. }
  8086. }
  8087. if (min > 1 || max > 1)
  8088. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
  8089. label = LABEL();
  8090. if (!ref)
  8091. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
  8092. compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
  8093. if (min > 1 || max > 1)
  8094. {
  8095. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
  8096. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
  8097. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
  8098. if (min > 1)
  8099. CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
  8100. if (max > 1)
  8101. {
  8102. jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
  8103. allocate_stack(common, 1);
  8104. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  8105. JUMPTO(SLJIT_JUMP, label);
  8106. JUMPHERE(jump);
  8107. }
  8108. }
  8109. if (max == 0)
  8110. {
  8111. /* Includes min > 1 case as well. */
  8112. allocate_stack(common, 1);
  8113. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  8114. JUMPTO(SLJIT_JUMP, label);
  8115. }
  8116. JUMPHERE(zerolength);
  8117. BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
  8118. count_match(common);
  8119. return cc;
  8120. }
  8121. allocate_stack(common, ref ? 2 : 3);
  8122. if (ref)
  8123. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
  8124. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  8125. if (type != OP_CRMINSTAR)
  8126. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
  8127. if (min == 0)
  8128. {
  8129. /* Handles both invalid and empty cases. Since the minimum repeat,
  8130. is zero the invalid case is basically the same as an empty case. */
  8131. if (ref)
  8132. zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
  8133. else
  8134. {
  8135. compile_dnref_search(common, ccbegin, NULL);
  8136. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
  8137. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
  8138. zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
  8139. }
  8140. /* Length is non-zero, we can match real repeats. */
  8141. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  8142. jump = JUMP(SLJIT_JUMP);
  8143. }
  8144. else
  8145. {
  8146. if (ref)
  8147. {
  8148. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
  8149. zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
  8150. }
  8151. else
  8152. {
  8153. compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
  8154. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
  8155. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
  8156. zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
  8157. }
  8158. }
  8159. BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
  8160. if (max > 0)
  8161. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
  8162. if (!ref)
  8163. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
  8164. compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
  8165. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  8166. if (min > 1)
  8167. {
  8168. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  8169. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
  8170. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
  8171. CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
  8172. }
  8173. else if (max > 0)
  8174. OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
  8175. if (jump != NULL)
  8176. JUMPHERE(jump);
  8177. JUMPHERE(zerolength);
  8178. count_match(common);
  8179. return cc;
  8180. }
  8181. static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
  8182. {
  8183. DEFINE_COMPILER;
  8184. backtrack_common *backtrack;
  8185. recurse_entry *entry = common->entries;
  8186. recurse_entry *prev = NULL;
  8187. sljit_sw start = GET(cc, 1);
  8188. PCRE2_SPTR start_cc;
  8189. BOOL needs_control_head;
  8190. PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
  8191. /* Inlining simple patterns. */
  8192. if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
  8193. {
  8194. start_cc = common->start + start;
  8195. compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
  8196. BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
  8197. return cc + 1 + LINK_SIZE;
  8198. }
  8199. while (entry != NULL)
  8200. {
  8201. if (entry->start == start)
  8202. break;
  8203. prev = entry;
  8204. entry = entry->next;
  8205. }
  8206. if (entry == NULL)
  8207. {
  8208. entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
  8209. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  8210. return NULL;
  8211. entry->next = NULL;
  8212. entry->entry_label = NULL;
  8213. entry->backtrack_label = NULL;
  8214. entry->entry_calls = NULL;
  8215. entry->backtrack_calls = NULL;
  8216. entry->start = start;
  8217. if (prev != NULL)
  8218. prev->next = entry;
  8219. else
  8220. common->entries = entry;
  8221. }
  8222. BACKTRACK_AS(recurse_backtrack)->entry = entry;
  8223. if (entry->entry_label == NULL)
  8224. add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
  8225. else
  8226. JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
  8227. /* Leave if the match is failed. */
  8228. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
  8229. BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
  8230. return cc + 1 + LINK_SIZE;
  8231. }
  8232. static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
  8233. {
  8234. PCRE2_SPTR begin;
  8235. PCRE2_SIZE *ovector;
  8236. sljit_u32 oveccount, capture_top;
  8237. if (arguments->callout == NULL)
  8238. return 0;
  8239. SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
  8240. begin = arguments->begin;
  8241. ovector = (PCRE2_SIZE*)(callout_block + 1);
  8242. oveccount = callout_block->capture_top;
  8243. SLJIT_ASSERT(oveccount >= 1);
  8244. callout_block->version = 2;
  8245. callout_block->callout_flags = 0;
  8246. /* Offsets in subject. */
  8247. callout_block->subject_length = arguments->end - arguments->begin;
  8248. callout_block->start_match = jit_ovector[0] - begin;
  8249. callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
  8250. callout_block->subject = begin;
  8251. /* Convert and copy the JIT offset vector to the ovector array. */
  8252. callout_block->capture_top = 1;
  8253. callout_block->offset_vector = ovector;
  8254. ovector[0] = PCRE2_UNSET;
  8255. ovector[1] = PCRE2_UNSET;
  8256. ovector += 2;
  8257. jit_ovector += 2;
  8258. capture_top = 1;
  8259. /* Convert pointers to sizes. */
  8260. while (--oveccount != 0)
  8261. {
  8262. capture_top++;
  8263. ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
  8264. ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
  8265. if (ovector[0] != PCRE2_UNSET)
  8266. callout_block->capture_top = capture_top;
  8267. ovector += 2;
  8268. jit_ovector += 2;
  8269. }
  8270. return (arguments->callout)(callout_block, arguments->callout_data);
  8271. }
  8272. #define CALLOUT_ARG_OFFSET(arg) \
  8273. SLJIT_OFFSETOF(pcre2_callout_block, arg)
  8274. static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
  8275. {
  8276. DEFINE_COMPILER;
  8277. backtrack_common *backtrack;
  8278. sljit_s32 mov_opcode;
  8279. unsigned int callout_length = (*cc == OP_CALLOUT)
  8280. ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
  8281. sljit_sw value1;
  8282. sljit_sw value2;
  8283. sljit_sw value3;
  8284. sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
  8285. PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
  8286. callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
  8287. allocate_stack(common, callout_arg_size);
  8288. SLJIT_ASSERT(common->capture_last_ptr != 0);
  8289. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
  8290. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  8291. value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
  8292. OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
  8293. OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
  8294. OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
  8295. /* These pointer sized fields temporarly stores internal variables. */
  8296. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
  8297. if (common->mark_ptr != 0)
  8298. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
  8299. mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
  8300. OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
  8301. OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
  8302. if (*cc == OP_CALLOUT)
  8303. {
  8304. value1 = 0;
  8305. value2 = 0;
  8306. value3 = 0;
  8307. }
  8308. else
  8309. {
  8310. value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
  8311. value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
  8312. value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
  8313. }
  8314. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
  8315. OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
  8316. OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
  8317. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
  8318. SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
  8319. /* Needed to save important temporary registers. */
  8320. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
  8321. /* SLJIT_R0 = arguments */
  8322. OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
  8323. GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
  8324. sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
  8325. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  8326. free_stack(common, callout_arg_size);
  8327. /* Check return value. */
  8328. OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
  8329. add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
  8330. if (common->abort_label == NULL)
  8331. add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
  8332. else
  8333. JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
  8334. return cc + callout_length;
  8335. }
  8336. #undef CALLOUT_ARG_SIZE
  8337. #undef CALLOUT_ARG_OFFSET
  8338. static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
  8339. {
  8340. while (TRUE)
  8341. {
  8342. switch (*cc)
  8343. {
  8344. case OP_CALLOUT_STR:
  8345. cc += GET(cc, 1 + 2*LINK_SIZE);
  8346. break;
  8347. case OP_NOT_WORD_BOUNDARY:
  8348. case OP_WORD_BOUNDARY:
  8349. case OP_CIRC:
  8350. case OP_CIRCM:
  8351. case OP_DOLL:
  8352. case OP_DOLLM:
  8353. case OP_CALLOUT:
  8354. case OP_ALT:
  8355. cc += PRIV(OP_lengths)[*cc];
  8356. break;
  8357. case OP_KET:
  8358. return FALSE;
  8359. default:
  8360. return TRUE;
  8361. }
  8362. }
  8363. }
  8364. static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
  8365. {
  8366. DEFINE_COMPILER;
  8367. int framesize;
  8368. int extrasize;
  8369. BOOL local_quit_available = FALSE;
  8370. BOOL needs_control_head;
  8371. int private_data_ptr;
  8372. backtrack_common altbacktrack;
  8373. PCRE2_SPTR ccbegin;
  8374. PCRE2_UCHAR opcode;
  8375. PCRE2_UCHAR bra = OP_BRA;
  8376. jump_list *tmp = NULL;
  8377. jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
  8378. jump_list **found;
  8379. /* Saving previous accept variables. */
  8380. BOOL save_local_quit_available = common->local_quit_available;
  8381. BOOL save_in_positive_assertion = common->in_positive_assertion;
  8382. then_trap_backtrack *save_then_trap = common->then_trap;
  8383. struct sljit_label *save_quit_label = common->quit_label;
  8384. struct sljit_label *save_accept_label = common->accept_label;
  8385. jump_list *save_quit = common->quit;
  8386. jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
  8387. jump_list *save_accept = common->accept;
  8388. struct sljit_jump *jump;
  8389. struct sljit_jump *brajump = NULL;
  8390. /* Assert captures then. */
  8391. common->then_trap = NULL;
  8392. if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
  8393. {
  8394. SLJIT_ASSERT(!conditional);
  8395. bra = *cc;
  8396. cc++;
  8397. }
  8398. private_data_ptr = PRIVATE_DATA(cc);
  8399. SLJIT_ASSERT(private_data_ptr != 0);
  8400. framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
  8401. backtrack->framesize = framesize;
  8402. backtrack->private_data_ptr = private_data_ptr;
  8403. opcode = *cc;
  8404. SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
  8405. found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
  8406. ccbegin = cc;
  8407. cc += GET(cc, 1);
  8408. if (bra == OP_BRAMINZERO)
  8409. {
  8410. /* This is a braminzero backtrack path. */
  8411. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  8412. free_stack(common, 1);
  8413. brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
  8414. }
  8415. if (framesize < 0)
  8416. {
  8417. extrasize = 1;
  8418. if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
  8419. extrasize = 0;
  8420. if (needs_control_head)
  8421. extrasize++;
  8422. if (framesize == no_frame)
  8423. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
  8424. if (extrasize > 0)
  8425. allocate_stack(common, extrasize);
  8426. if (needs_control_head)
  8427. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  8428. if (extrasize > 0)
  8429. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  8430. if (needs_control_head)
  8431. {
  8432. SLJIT_ASSERT(extrasize == 2);
  8433. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
  8434. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
  8435. }
  8436. }
  8437. else
  8438. {
  8439. extrasize = needs_control_head ? 3 : 2;
  8440. allocate_stack(common, framesize + extrasize);
  8441. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  8442. OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
  8443. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
  8444. if (needs_control_head)
  8445. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  8446. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  8447. if (needs_control_head)
  8448. {
  8449. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
  8450. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
  8451. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
  8452. }
  8453. else
  8454. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
  8455. init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
  8456. }
  8457. memset(&altbacktrack, 0, sizeof(backtrack_common));
  8458. if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
  8459. {
  8460. /* Control verbs cannot escape from these asserts. */
  8461. local_quit_available = TRUE;
  8462. common->local_quit_available = TRUE;
  8463. common->quit_label = NULL;
  8464. common->quit = NULL;
  8465. }
  8466. common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
  8467. common->positive_assertion_quit = NULL;
  8468. while (1)
  8469. {
  8470. common->accept_label = NULL;
  8471. common->accept = NULL;
  8472. altbacktrack.top = NULL;
  8473. altbacktrack.topbacktracks = NULL;
  8474. if (*ccbegin == OP_ALT && extrasize > 0)
  8475. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  8476. altbacktrack.cc = ccbegin;
  8477. compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
  8478. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  8479. {
  8480. if (local_quit_available)
  8481. {
  8482. common->local_quit_available = save_local_quit_available;
  8483. common->quit_label = save_quit_label;
  8484. common->quit = save_quit;
  8485. }
  8486. common->in_positive_assertion = save_in_positive_assertion;
  8487. common->then_trap = save_then_trap;
  8488. common->accept_label = save_accept_label;
  8489. common->positive_assertion_quit = save_positive_assertion_quit;
  8490. common->accept = save_accept;
  8491. return NULL;
  8492. }
  8493. common->accept_label = LABEL();
  8494. if (common->accept != NULL)
  8495. set_jumps(common->accept, common->accept_label);
  8496. /* Reset stack. */
  8497. if (framesize < 0)
  8498. {
  8499. if (framesize == no_frame)
  8500. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  8501. else if (extrasize > 0)
  8502. free_stack(common, extrasize);
  8503. if (needs_control_head)
  8504. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
  8505. }
  8506. else
  8507. {
  8508. if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
  8509. {
  8510. /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
  8511. OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
  8512. if (needs_control_head)
  8513. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
  8514. }
  8515. else
  8516. {
  8517. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  8518. if (needs_control_head)
  8519. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
  8520. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  8521. OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
  8522. }
  8523. }
  8524. if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
  8525. {
  8526. /* We know that STR_PTR was stored on the top of the stack. */
  8527. if (conditional)
  8528. {
  8529. if (extrasize > 0)
  8530. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
  8531. }
  8532. else if (bra == OP_BRAZERO)
  8533. {
  8534. if (framesize < 0)
  8535. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
  8536. else
  8537. {
  8538. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
  8539. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
  8540. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
  8541. }
  8542. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
  8543. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  8544. }
  8545. else if (framesize >= 0)
  8546. {
  8547. /* For OP_BRA and OP_BRAMINZERO. */
  8548. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
  8549. }
  8550. }
  8551. add_jump(compiler, found, JUMP(SLJIT_JUMP));
  8552. compile_backtrackingpath(common, altbacktrack.top);
  8553. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  8554. {
  8555. if (local_quit_available)
  8556. {
  8557. common->local_quit_available = save_local_quit_available;
  8558. common->quit_label = save_quit_label;
  8559. common->quit = save_quit;
  8560. }
  8561. common->in_positive_assertion = save_in_positive_assertion;
  8562. common->then_trap = save_then_trap;
  8563. common->accept_label = save_accept_label;
  8564. common->positive_assertion_quit = save_positive_assertion_quit;
  8565. common->accept = save_accept;
  8566. return NULL;
  8567. }
  8568. set_jumps(altbacktrack.topbacktracks, LABEL());
  8569. if (*cc != OP_ALT)
  8570. break;
  8571. ccbegin = cc;
  8572. cc += GET(cc, 1);
  8573. }
  8574. if (local_quit_available)
  8575. {
  8576. SLJIT_ASSERT(common->positive_assertion_quit == NULL);
  8577. /* Makes the check less complicated below. */
  8578. common->positive_assertion_quit = common->quit;
  8579. }
  8580. /* None of them matched. */
  8581. if (common->positive_assertion_quit != NULL)
  8582. {
  8583. jump = JUMP(SLJIT_JUMP);
  8584. set_jumps(common->positive_assertion_quit, LABEL());
  8585. SLJIT_ASSERT(framesize != no_stack);
  8586. if (framesize < 0)
  8587. OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
  8588. else
  8589. {
  8590. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  8591. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  8592. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
  8593. }
  8594. JUMPHERE(jump);
  8595. }
  8596. if (needs_control_head)
  8597. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
  8598. if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
  8599. {
  8600. /* Assert is failed. */
  8601. if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
  8602. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  8603. if (framesize < 0)
  8604. {
  8605. /* The topmost item should be 0. */
  8606. if (bra == OP_BRAZERO)
  8607. {
  8608. if (extrasize == 2)
  8609. free_stack(common, 1);
  8610. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  8611. }
  8612. else if (extrasize > 0)
  8613. free_stack(common, extrasize);
  8614. }
  8615. else
  8616. {
  8617. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
  8618. /* The topmost item should be 0. */
  8619. if (bra == OP_BRAZERO)
  8620. {
  8621. free_stack(common, framesize + extrasize - 1);
  8622. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  8623. }
  8624. else
  8625. free_stack(common, framesize + extrasize);
  8626. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
  8627. }
  8628. jump = JUMP(SLJIT_JUMP);
  8629. if (bra != OP_BRAZERO)
  8630. add_jump(compiler, target, jump);
  8631. /* Assert is successful. */
  8632. set_jumps(tmp, LABEL());
  8633. if (framesize < 0)
  8634. {
  8635. /* We know that STR_PTR was stored on the top of the stack. */
  8636. if (extrasize > 0)
  8637. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
  8638. /* Keep the STR_PTR on the top of the stack. */
  8639. if (bra == OP_BRAZERO)
  8640. {
  8641. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
  8642. if (extrasize == 2)
  8643. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  8644. }
  8645. else if (bra == OP_BRAMINZERO)
  8646. {
  8647. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
  8648. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  8649. }
  8650. }
  8651. else
  8652. {
  8653. if (bra == OP_BRA)
  8654. {
  8655. /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
  8656. OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
  8657. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
  8658. }
  8659. else
  8660. {
  8661. /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
  8662. OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
  8663. if (extrasize == 2)
  8664. {
  8665. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  8666. if (bra == OP_BRAMINZERO)
  8667. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  8668. }
  8669. else
  8670. {
  8671. SLJIT_ASSERT(extrasize == 3);
  8672. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
  8673. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
  8674. }
  8675. }
  8676. }
  8677. if (bra == OP_BRAZERO)
  8678. {
  8679. backtrack->matchingpath = LABEL();
  8680. SET_LABEL(jump, backtrack->matchingpath);
  8681. }
  8682. else if (bra == OP_BRAMINZERO)
  8683. {
  8684. JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
  8685. JUMPHERE(brajump);
  8686. if (framesize >= 0)
  8687. {
  8688. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  8689. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  8690. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
  8691. OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
  8692. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
  8693. }
  8694. set_jumps(backtrack->common.topbacktracks, LABEL());
  8695. }
  8696. }
  8697. else
  8698. {
  8699. /* AssertNot is successful. */
  8700. if (framesize < 0)
  8701. {
  8702. if (extrasize > 0)
  8703. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  8704. if (bra != OP_BRA)
  8705. {
  8706. if (extrasize == 2)
  8707. free_stack(common, 1);
  8708. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  8709. }
  8710. else if (extrasize > 0)
  8711. free_stack(common, extrasize);
  8712. }
  8713. else
  8714. {
  8715. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  8716. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
  8717. /* The topmost item should be 0. */
  8718. if (bra != OP_BRA)
  8719. {
  8720. free_stack(common, framesize + extrasize - 1);
  8721. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  8722. }
  8723. else
  8724. free_stack(common, framesize + extrasize);
  8725. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
  8726. }
  8727. if (bra == OP_BRAZERO)
  8728. backtrack->matchingpath = LABEL();
  8729. else if (bra == OP_BRAMINZERO)
  8730. {
  8731. JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
  8732. JUMPHERE(brajump);
  8733. }
  8734. if (bra != OP_BRA)
  8735. {
  8736. SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
  8737. set_jumps(backtrack->common.topbacktracks, LABEL());
  8738. backtrack->common.topbacktracks = NULL;
  8739. }
  8740. }
  8741. if (local_quit_available)
  8742. {
  8743. common->local_quit_available = save_local_quit_available;
  8744. common->quit_label = save_quit_label;
  8745. common->quit = save_quit;
  8746. }
  8747. common->in_positive_assertion = save_in_positive_assertion;
  8748. common->then_trap = save_then_trap;
  8749. common->accept_label = save_accept_label;
  8750. common->positive_assertion_quit = save_positive_assertion_quit;
  8751. common->accept = save_accept;
  8752. return cc + 1 + LINK_SIZE;
  8753. }
  8754. static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
  8755. {
  8756. DEFINE_COMPILER;
  8757. int stacksize;
  8758. if (framesize < 0)
  8759. {
  8760. if (framesize == no_frame)
  8761. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  8762. else
  8763. {
  8764. stacksize = needs_control_head ? 1 : 0;
  8765. if (ket != OP_KET || has_alternatives)
  8766. stacksize++;
  8767. if (stacksize > 0)
  8768. free_stack(common, stacksize);
  8769. }
  8770. if (needs_control_head)
  8771. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
  8772. /* TMP2 which is set here used by OP_KETRMAX below. */
  8773. if (ket == OP_KETRMAX)
  8774. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
  8775. else if (ket == OP_KETRMIN)
  8776. {
  8777. /* Move the STR_PTR to the private_data_ptr. */
  8778. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
  8779. }
  8780. }
  8781. else
  8782. {
  8783. stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
  8784. OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
  8785. if (needs_control_head)
  8786. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
  8787. if (ket == OP_KETRMAX)
  8788. {
  8789. /* TMP2 which is set here used by OP_KETRMAX below. */
  8790. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  8791. }
  8792. }
  8793. if (needs_control_head)
  8794. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
  8795. }
  8796. static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
  8797. {
  8798. DEFINE_COMPILER;
  8799. if (common->capture_last_ptr != 0)
  8800. {
  8801. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
  8802. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
  8803. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
  8804. stacksize++;
  8805. }
  8806. if (common->optimized_cbracket[offset >> 1] == 0)
  8807. {
  8808. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
  8809. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
  8810. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
  8811. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  8812. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
  8813. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
  8814. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
  8815. stacksize += 2;
  8816. }
  8817. return stacksize;
  8818. }
  8819. static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
  8820. {
  8821. if (PRIV(script_run)(ptr, endptr, FALSE))
  8822. return endptr;
  8823. return NULL;
  8824. }
  8825. #ifdef SUPPORT_UNICODE
  8826. static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
  8827. {
  8828. if (PRIV(script_run)(ptr, endptr, TRUE))
  8829. return endptr;
  8830. return NULL;
  8831. }
  8832. #endif /* SUPPORT_UNICODE */
  8833. static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
  8834. {
  8835. DEFINE_COMPILER;
  8836. SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
  8837. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  8838. #ifdef SUPPORT_UNICODE
  8839. sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
  8840. common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run));
  8841. #else
  8842. sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run));
  8843. #endif
  8844. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
  8845. add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
  8846. }
  8847. /*
  8848. Handling bracketed expressions is probably the most complex part.
  8849. Stack layout naming characters:
  8850. S - Push the current STR_PTR
  8851. 0 - Push a 0 (NULL)
  8852. A - Push the current STR_PTR. Needed for restoring the STR_PTR
  8853. before the next alternative. Not pushed if there are no alternatives.
  8854. M - Any values pushed by the current alternative. Can be empty, or anything.
  8855. C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
  8856. L - Push the previous local (pointed by localptr) to the stack
  8857. () - opional values stored on the stack
  8858. ()* - optonal, can be stored multiple times
  8859. The following list shows the regular expression templates, their PCRE byte codes
  8860. and stack layout supported by pcre-sljit.
  8861. (?:) OP_BRA | OP_KET A M
  8862. () OP_CBRA | OP_KET C M
  8863. (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
  8864. OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
  8865. (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
  8866. OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
  8867. ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
  8868. OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
  8869. ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
  8870. OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
  8871. (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
  8872. (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
  8873. ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
  8874. ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
  8875. (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
  8876. OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
  8877. (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
  8878. OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
  8879. ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
  8880. OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
  8881. ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
  8882. OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
  8883. Stack layout naming characters:
  8884. A - Push the alternative index (starting from 0) on the stack.
  8885. Not pushed if there is no alternatives.
  8886. M - Any values pushed by the current alternative. Can be empty, or anything.
  8887. The next list shows the possible content of a bracket:
  8888. (|) OP_*BRA | OP_ALT ... M A
  8889. (?()|) OP_*COND | OP_ALT M A
  8890. (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
  8891. Or nothing, if trace is unnecessary
  8892. */
  8893. static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
  8894. {
  8895. DEFINE_COMPILER;
  8896. backtrack_common *backtrack;
  8897. PCRE2_UCHAR opcode;
  8898. int private_data_ptr = 0;
  8899. int offset = 0;
  8900. int i, stacksize;
  8901. int repeat_ptr = 0, repeat_length = 0;
  8902. int repeat_type = 0, repeat_count = 0;
  8903. PCRE2_SPTR ccbegin;
  8904. PCRE2_SPTR matchingpath;
  8905. PCRE2_SPTR slot;
  8906. PCRE2_UCHAR bra = OP_BRA;
  8907. PCRE2_UCHAR ket;
  8908. assert_backtrack *assert;
  8909. BOOL has_alternatives;
  8910. BOOL needs_control_head = FALSE;
  8911. struct sljit_jump *jump;
  8912. struct sljit_jump *skip;
  8913. struct sljit_label *rmax_label = NULL;
  8914. struct sljit_jump *braminzero = NULL;
  8915. PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
  8916. if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
  8917. {
  8918. bra = *cc;
  8919. cc++;
  8920. opcode = *cc;
  8921. }
  8922. opcode = *cc;
  8923. ccbegin = cc;
  8924. matchingpath = bracketend(cc) - 1 - LINK_SIZE;
  8925. ket = *matchingpath;
  8926. if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
  8927. {
  8928. repeat_ptr = PRIVATE_DATA(matchingpath);
  8929. repeat_length = PRIVATE_DATA(matchingpath + 1);
  8930. repeat_type = PRIVATE_DATA(matchingpath + 2);
  8931. repeat_count = PRIVATE_DATA(matchingpath + 3);
  8932. SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
  8933. if (repeat_type == OP_UPTO)
  8934. ket = OP_KETRMAX;
  8935. if (repeat_type == OP_MINUPTO)
  8936. ket = OP_KETRMIN;
  8937. }
  8938. matchingpath = ccbegin + 1 + LINK_SIZE;
  8939. SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
  8940. SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
  8941. cc += GET(cc, 1);
  8942. has_alternatives = *cc == OP_ALT;
  8943. if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
  8944. {
  8945. SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
  8946. compile_time_checks_must_be_grouped_together);
  8947. has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
  8948. }
  8949. if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
  8950. opcode = OP_SCOND;
  8951. if (opcode == OP_CBRA || opcode == OP_SCBRA)
  8952. {
  8953. /* Capturing brackets has a pre-allocated space. */
  8954. offset = GET2(ccbegin, 1 + LINK_SIZE);
  8955. if (common->optimized_cbracket[offset] == 0)
  8956. {
  8957. private_data_ptr = OVECTOR_PRIV(offset);
  8958. offset <<= 1;
  8959. }
  8960. else
  8961. {
  8962. offset <<= 1;
  8963. private_data_ptr = OVECTOR(offset);
  8964. }
  8965. BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
  8966. matchingpath += IMM2_SIZE;
  8967. }
  8968. else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
  8969. {
  8970. /* Other brackets simply allocate the next entry. */
  8971. private_data_ptr = PRIVATE_DATA(ccbegin);
  8972. SLJIT_ASSERT(private_data_ptr != 0);
  8973. BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
  8974. if (opcode == OP_ONCE)
  8975. BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
  8976. }
  8977. /* Instructions before the first alternative. */
  8978. stacksize = 0;
  8979. if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
  8980. stacksize++;
  8981. if (bra == OP_BRAZERO)
  8982. stacksize++;
  8983. if (stacksize > 0)
  8984. allocate_stack(common, stacksize);
  8985. stacksize = 0;
  8986. if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
  8987. {
  8988. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
  8989. stacksize++;
  8990. }
  8991. if (bra == OP_BRAZERO)
  8992. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
  8993. if (bra == OP_BRAMINZERO)
  8994. {
  8995. /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
  8996. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  8997. if (ket != OP_KETRMIN)
  8998. {
  8999. free_stack(common, 1);
  9000. braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
  9001. }
  9002. else if (opcode == OP_ONCE || opcode >= OP_SBRA)
  9003. {
  9004. jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
  9005. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  9006. /* Nothing stored during the first run. */
  9007. skip = JUMP(SLJIT_JUMP);
  9008. JUMPHERE(jump);
  9009. /* Checking zero-length iteration. */
  9010. if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
  9011. {
  9012. /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
  9013. braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9014. }
  9015. else
  9016. {
  9017. /* Except when the whole stack frame must be saved. */
  9018. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9019. braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
  9020. }
  9021. JUMPHERE(skip);
  9022. }
  9023. else
  9024. {
  9025. jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
  9026. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  9027. JUMPHERE(jump);
  9028. }
  9029. }
  9030. if (repeat_type != 0)
  9031. {
  9032. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
  9033. if (repeat_type == OP_EXACT)
  9034. rmax_label = LABEL();
  9035. }
  9036. if (ket == OP_KETRMIN)
  9037. BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
  9038. if (ket == OP_KETRMAX)
  9039. {
  9040. rmax_label = LABEL();
  9041. if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
  9042. BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
  9043. }
  9044. /* Handling capturing brackets and alternatives. */
  9045. if (opcode == OP_ONCE)
  9046. {
  9047. stacksize = 0;
  9048. if (needs_control_head)
  9049. {
  9050. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  9051. stacksize++;
  9052. }
  9053. if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
  9054. {
  9055. /* Neither capturing brackets nor recursions are found in the block. */
  9056. if (ket == OP_KETRMIN)
  9057. {
  9058. stacksize += 2;
  9059. if (!needs_control_head)
  9060. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9061. }
  9062. else
  9063. {
  9064. if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
  9065. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
  9066. if (ket == OP_KETRMAX || has_alternatives)
  9067. stacksize++;
  9068. }
  9069. if (stacksize > 0)
  9070. allocate_stack(common, stacksize);
  9071. stacksize = 0;
  9072. if (needs_control_head)
  9073. {
  9074. stacksize++;
  9075. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
  9076. }
  9077. if (ket == OP_KETRMIN)
  9078. {
  9079. if (needs_control_head)
  9080. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9081. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
  9082. if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
  9083. OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
  9084. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
  9085. }
  9086. else if (ket == OP_KETRMAX || has_alternatives)
  9087. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
  9088. }
  9089. else
  9090. {
  9091. if (ket != OP_KET || has_alternatives)
  9092. stacksize++;
  9093. stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
  9094. allocate_stack(common, stacksize);
  9095. if (needs_control_head)
  9096. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
  9097. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9098. OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
  9099. stacksize = needs_control_head ? 1 : 0;
  9100. if (ket != OP_KET || has_alternatives)
  9101. {
  9102. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
  9103. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
  9104. stacksize++;
  9105. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
  9106. }
  9107. else
  9108. {
  9109. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
  9110. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
  9111. }
  9112. init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
  9113. }
  9114. }
  9115. else if (opcode == OP_CBRA || opcode == OP_SCBRA)
  9116. {
  9117. /* Saving the previous values. */
  9118. if (common->optimized_cbracket[offset >> 1] != 0)
  9119. {
  9120. SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
  9121. allocate_stack(common, 2);
  9122. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9123. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
  9124. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
  9125. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
  9126. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
  9127. }
  9128. else
  9129. {
  9130. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9131. allocate_stack(common, 1);
  9132. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
  9133. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
  9134. }
  9135. }
  9136. else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
  9137. {
  9138. /* Saving the previous value. */
  9139. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9140. allocate_stack(common, 1);
  9141. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
  9142. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
  9143. }
  9144. else if (has_alternatives)
  9145. {
  9146. /* Pushing the starting string pointer. */
  9147. allocate_stack(common, 1);
  9148. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  9149. }
  9150. /* Generating code for the first alternative. */
  9151. if (opcode == OP_COND || opcode == OP_SCOND)
  9152. {
  9153. if (*matchingpath == OP_CREF)
  9154. {
  9155. SLJIT_ASSERT(has_alternatives);
  9156. add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
  9157. CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
  9158. matchingpath += 1 + IMM2_SIZE;
  9159. }
  9160. else if (*matchingpath == OP_DNCREF)
  9161. {
  9162. SLJIT_ASSERT(has_alternatives);
  9163. i = GET2(matchingpath, 1 + IMM2_SIZE);
  9164. slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
  9165. OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
  9166. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
  9167. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
  9168. slot += common->name_entry_size;
  9169. i--;
  9170. while (i-- > 0)
  9171. {
  9172. OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
  9173. OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
  9174. slot += common->name_entry_size;
  9175. }
  9176. OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
  9177. add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
  9178. matchingpath += 1 + 2 * IMM2_SIZE;
  9179. }
  9180. else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
  9181. {
  9182. /* Never has other case. */
  9183. BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
  9184. SLJIT_ASSERT(!has_alternatives);
  9185. if (*matchingpath == OP_TRUE)
  9186. {
  9187. stacksize = 1;
  9188. matchingpath++;
  9189. }
  9190. else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
  9191. stacksize = 0;
  9192. else if (*matchingpath == OP_RREF)
  9193. {
  9194. stacksize = GET2(matchingpath, 1);
  9195. if (common->currententry == NULL)
  9196. stacksize = 0;
  9197. else if (stacksize == RREF_ANY)
  9198. stacksize = 1;
  9199. else if (common->currententry->start == 0)
  9200. stacksize = stacksize == 0;
  9201. else
  9202. stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
  9203. if (stacksize != 0)
  9204. matchingpath += 1 + IMM2_SIZE;
  9205. }
  9206. else
  9207. {
  9208. if (common->currententry == NULL || common->currententry->start == 0)
  9209. stacksize = 0;
  9210. else
  9211. {
  9212. stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
  9213. slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
  9214. i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
  9215. while (stacksize > 0)
  9216. {
  9217. if ((int)GET2(slot, 0) == i)
  9218. break;
  9219. slot += common->name_entry_size;
  9220. stacksize--;
  9221. }
  9222. }
  9223. if (stacksize != 0)
  9224. matchingpath += 1 + 2 * IMM2_SIZE;
  9225. }
  9226. /* The stacksize == 0 is a common "else" case. */
  9227. if (stacksize == 0)
  9228. {
  9229. if (*cc == OP_ALT)
  9230. {
  9231. matchingpath = cc + 1 + LINK_SIZE;
  9232. cc += GET(cc, 1);
  9233. }
  9234. else
  9235. matchingpath = cc;
  9236. }
  9237. }
  9238. else
  9239. {
  9240. SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
  9241. /* Similar code as PUSH_BACKTRACK macro. */
  9242. assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
  9243. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  9244. return NULL;
  9245. memset(assert, 0, sizeof(assert_backtrack));
  9246. assert->common.cc = matchingpath;
  9247. BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
  9248. matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
  9249. }
  9250. }
  9251. compile_matchingpath(common, matchingpath, cc, backtrack);
  9252. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  9253. return NULL;
  9254. if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
  9255. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9256. if (opcode == OP_ONCE)
  9257. match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
  9258. if (opcode == OP_SCRIPT_RUN)
  9259. match_script_run_common(common, private_data_ptr, backtrack);
  9260. stacksize = 0;
  9261. if (repeat_type == OP_MINUPTO)
  9262. {
  9263. /* We need to preserve the counter. TMP2 will be used below. */
  9264. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
  9265. stacksize++;
  9266. }
  9267. if (ket != OP_KET || bra != OP_BRA)
  9268. stacksize++;
  9269. if (offset != 0)
  9270. {
  9271. if (common->capture_last_ptr != 0)
  9272. stacksize++;
  9273. if (common->optimized_cbracket[offset >> 1] == 0)
  9274. stacksize += 2;
  9275. }
  9276. if (has_alternatives && opcode != OP_ONCE)
  9277. stacksize++;
  9278. if (stacksize > 0)
  9279. allocate_stack(common, stacksize);
  9280. stacksize = 0;
  9281. if (repeat_type == OP_MINUPTO)
  9282. {
  9283. /* TMP2 was set above. */
  9284. OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
  9285. stacksize++;
  9286. }
  9287. if (ket != OP_KET || bra != OP_BRA)
  9288. {
  9289. if (ket != OP_KET)
  9290. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
  9291. else
  9292. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
  9293. stacksize++;
  9294. }
  9295. if (offset != 0)
  9296. stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
  9297. /* Skip and count the other alternatives. */
  9298. i = 1;
  9299. while (*cc == OP_ALT)
  9300. {
  9301. cc += GET(cc, 1);
  9302. i++;
  9303. }
  9304. if (has_alternatives)
  9305. {
  9306. if (opcode != OP_ONCE)
  9307. {
  9308. if (i <= 3)
  9309. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
  9310. else
  9311. BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
  9312. }
  9313. if (ket != OP_KETRMAX)
  9314. BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
  9315. }
  9316. /* Must be after the matchingpath label. */
  9317. if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
  9318. {
  9319. SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
  9320. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
  9321. }
  9322. if (ket == OP_KETRMAX)
  9323. {
  9324. if (repeat_type != 0)
  9325. {
  9326. if (has_alternatives)
  9327. BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
  9328. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
  9329. JUMPTO(SLJIT_NOT_ZERO, rmax_label);
  9330. /* Drop STR_PTR for greedy plus quantifier. */
  9331. if (opcode != OP_ONCE)
  9332. free_stack(common, 1);
  9333. }
  9334. else if (opcode < OP_BRA || opcode >= OP_SBRA)
  9335. {
  9336. if (has_alternatives)
  9337. BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
  9338. /* Checking zero-length iteration. */
  9339. if (opcode != OP_ONCE)
  9340. {
  9341. /* This case includes opcodes such as OP_SCRIPT_RUN. */
  9342. CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
  9343. /* Drop STR_PTR for greedy plus quantifier. */
  9344. if (bra != OP_BRAZERO)
  9345. free_stack(common, 1);
  9346. }
  9347. else
  9348. /* TMP2 must contain the starting STR_PTR. */
  9349. CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
  9350. }
  9351. else
  9352. JUMPTO(SLJIT_JUMP, rmax_label);
  9353. BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
  9354. }
  9355. if (repeat_type == OP_EXACT)
  9356. {
  9357. count_match(common);
  9358. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
  9359. JUMPTO(SLJIT_NOT_ZERO, rmax_label);
  9360. }
  9361. else if (repeat_type == OP_UPTO)
  9362. {
  9363. /* We need to preserve the counter. */
  9364. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
  9365. allocate_stack(common, 1);
  9366. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
  9367. }
  9368. if (bra == OP_BRAZERO)
  9369. BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
  9370. if (bra == OP_BRAMINZERO)
  9371. {
  9372. /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
  9373. JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
  9374. if (braminzero != NULL)
  9375. {
  9376. JUMPHERE(braminzero);
  9377. /* We need to release the end pointer to perform the
  9378. backtrack for the zero-length iteration. When
  9379. framesize is < 0, OP_ONCE will do the release itself. */
  9380. if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
  9381. {
  9382. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9383. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  9384. OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
  9385. }
  9386. else if (ket == OP_KETRMIN && opcode != OP_ONCE)
  9387. free_stack(common, 1);
  9388. }
  9389. /* Continue to the normal backtrack. */
  9390. }
  9391. if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
  9392. count_match(common);
  9393. cc += 1 + LINK_SIZE;
  9394. if (opcode == OP_ONCE)
  9395. {
  9396. /* We temporarily encode the needs_control_head in the lowest bit.
  9397. Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
  9398. the same value for small signed numbers (including negative numbers). */
  9399. BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
  9400. }
  9401. return cc + repeat_length;
  9402. }
  9403. static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
  9404. {
  9405. DEFINE_COMPILER;
  9406. backtrack_common *backtrack;
  9407. PCRE2_UCHAR opcode;
  9408. int private_data_ptr;
  9409. int cbraprivptr = 0;
  9410. BOOL needs_control_head;
  9411. int framesize;
  9412. int stacksize;
  9413. int offset = 0;
  9414. BOOL zero = FALSE;
  9415. PCRE2_SPTR ccbegin = NULL;
  9416. int stack; /* Also contains the offset of control head. */
  9417. struct sljit_label *loop = NULL;
  9418. struct jump_list *emptymatch = NULL;
  9419. PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
  9420. if (*cc == OP_BRAPOSZERO)
  9421. {
  9422. zero = TRUE;
  9423. cc++;
  9424. }
  9425. opcode = *cc;
  9426. private_data_ptr = PRIVATE_DATA(cc);
  9427. SLJIT_ASSERT(private_data_ptr != 0);
  9428. BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
  9429. switch(opcode)
  9430. {
  9431. case OP_BRAPOS:
  9432. case OP_SBRAPOS:
  9433. ccbegin = cc + 1 + LINK_SIZE;
  9434. break;
  9435. case OP_CBRAPOS:
  9436. case OP_SCBRAPOS:
  9437. offset = GET2(cc, 1 + LINK_SIZE);
  9438. /* This case cannot be optimized in the same was as
  9439. normal capturing brackets. */
  9440. SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
  9441. cbraprivptr = OVECTOR_PRIV(offset);
  9442. offset <<= 1;
  9443. ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
  9444. break;
  9445. default:
  9446. SLJIT_UNREACHABLE();
  9447. break;
  9448. }
  9449. framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
  9450. BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
  9451. if (framesize < 0)
  9452. {
  9453. if (offset != 0)
  9454. {
  9455. stacksize = 2;
  9456. if (common->capture_last_ptr != 0)
  9457. stacksize++;
  9458. }
  9459. else
  9460. stacksize = 1;
  9461. if (needs_control_head)
  9462. stacksize++;
  9463. if (!zero)
  9464. stacksize++;
  9465. BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
  9466. allocate_stack(common, stacksize);
  9467. if (framesize == no_frame)
  9468. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
  9469. stack = 0;
  9470. if (offset != 0)
  9471. {
  9472. stack = 2;
  9473. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
  9474. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
  9475. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
  9476. if (common->capture_last_ptr != 0)
  9477. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
  9478. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
  9479. if (needs_control_head)
  9480. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  9481. if (common->capture_last_ptr != 0)
  9482. {
  9483. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
  9484. stack = 3;
  9485. }
  9486. }
  9487. else
  9488. {
  9489. if (needs_control_head)
  9490. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  9491. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  9492. stack = 1;
  9493. }
  9494. if (needs_control_head)
  9495. stack++;
  9496. if (!zero)
  9497. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
  9498. if (needs_control_head)
  9499. {
  9500. stack--;
  9501. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
  9502. }
  9503. }
  9504. else
  9505. {
  9506. stacksize = framesize + 1;
  9507. if (!zero)
  9508. stacksize++;
  9509. if (needs_control_head)
  9510. stacksize++;
  9511. if (offset == 0)
  9512. stacksize++;
  9513. BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
  9514. allocate_stack(common, stacksize);
  9515. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9516. if (needs_control_head)
  9517. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  9518. OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
  9519. stack = 0;
  9520. if (!zero)
  9521. {
  9522. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
  9523. stack = 1;
  9524. }
  9525. if (needs_control_head)
  9526. {
  9527. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
  9528. stack++;
  9529. }
  9530. if (offset == 0)
  9531. {
  9532. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
  9533. stack++;
  9534. }
  9535. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
  9536. init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
  9537. stack -= 1 + (offset == 0);
  9538. }
  9539. if (offset != 0)
  9540. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
  9541. loop = LABEL();
  9542. while (*cc != OP_KETRPOS)
  9543. {
  9544. backtrack->top = NULL;
  9545. backtrack->topbacktracks = NULL;
  9546. cc += GET(cc, 1);
  9547. compile_matchingpath(common, ccbegin, cc, backtrack);
  9548. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  9549. return NULL;
  9550. if (framesize < 0)
  9551. {
  9552. if (framesize == no_frame)
  9553. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9554. if (offset != 0)
  9555. {
  9556. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
  9557. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
  9558. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
  9559. if (common->capture_last_ptr != 0)
  9560. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
  9561. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
  9562. }
  9563. else
  9564. {
  9565. if (opcode == OP_SBRAPOS)
  9566. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9567. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  9568. }
  9569. /* Even if the match is empty, we need to reset the control head. */
  9570. if (needs_control_head)
  9571. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
  9572. if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
  9573. add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
  9574. if (!zero)
  9575. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
  9576. }
  9577. else
  9578. {
  9579. if (offset != 0)
  9580. {
  9581. OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
  9582. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
  9583. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
  9584. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
  9585. if (common->capture_last_ptr != 0)
  9586. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
  9587. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
  9588. }
  9589. else
  9590. {
  9591. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9592. OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
  9593. if (opcode == OP_SBRAPOS)
  9594. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
  9595. OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
  9596. }
  9597. /* Even if the match is empty, we need to reset the control head. */
  9598. if (needs_control_head)
  9599. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
  9600. if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
  9601. add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
  9602. if (!zero)
  9603. {
  9604. if (framesize < 0)
  9605. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
  9606. else
  9607. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  9608. }
  9609. }
  9610. JUMPTO(SLJIT_JUMP, loop);
  9611. flush_stubs(common);
  9612. compile_backtrackingpath(common, backtrack->top);
  9613. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  9614. return NULL;
  9615. set_jumps(backtrack->topbacktracks, LABEL());
  9616. if (framesize < 0)
  9617. {
  9618. if (offset != 0)
  9619. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
  9620. else
  9621. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9622. }
  9623. else
  9624. {
  9625. if (offset != 0)
  9626. {
  9627. /* Last alternative. */
  9628. if (*cc == OP_KETRPOS)
  9629. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9630. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
  9631. }
  9632. else
  9633. {
  9634. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9635. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
  9636. }
  9637. }
  9638. if (*cc == OP_KETRPOS)
  9639. break;
  9640. ccbegin = cc + 1 + LINK_SIZE;
  9641. }
  9642. /* We don't have to restore the control head in case of a failed match. */
  9643. backtrack->topbacktracks = NULL;
  9644. if (!zero)
  9645. {
  9646. if (framesize < 0)
  9647. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
  9648. else /* TMP2 is set to [private_data_ptr] above. */
  9649. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
  9650. }
  9651. /* None of them matched. */
  9652. set_jumps(emptymatch, LABEL());
  9653. count_match(common);
  9654. return cc + 1 + LINK_SIZE;
  9655. }
  9656. static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
  9657. {
  9658. int class_len;
  9659. *opcode = *cc;
  9660. *exact = 0;
  9661. if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
  9662. {
  9663. cc++;
  9664. *type = OP_CHAR;
  9665. }
  9666. else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
  9667. {
  9668. cc++;
  9669. *type = OP_CHARI;
  9670. *opcode -= OP_STARI - OP_STAR;
  9671. }
  9672. else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
  9673. {
  9674. cc++;
  9675. *type = OP_NOT;
  9676. *opcode -= OP_NOTSTAR - OP_STAR;
  9677. }
  9678. else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
  9679. {
  9680. cc++;
  9681. *type = OP_NOTI;
  9682. *opcode -= OP_NOTSTARI - OP_STAR;
  9683. }
  9684. else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
  9685. {
  9686. cc++;
  9687. *opcode -= OP_TYPESTAR - OP_STAR;
  9688. *type = OP_END;
  9689. }
  9690. else
  9691. {
  9692. SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
  9693. *type = *opcode;
  9694. cc++;
  9695. class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
  9696. *opcode = cc[class_len - 1];
  9697. if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
  9698. {
  9699. *opcode -= OP_CRSTAR - OP_STAR;
  9700. *end = cc + class_len;
  9701. if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
  9702. {
  9703. *exact = 1;
  9704. *opcode -= OP_PLUS - OP_STAR;
  9705. }
  9706. }
  9707. else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
  9708. {
  9709. *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
  9710. *end = cc + class_len;
  9711. if (*opcode == OP_POSPLUS)
  9712. {
  9713. *exact = 1;
  9714. *opcode = OP_POSSTAR;
  9715. }
  9716. }
  9717. else
  9718. {
  9719. SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
  9720. *max = GET2(cc, (class_len + IMM2_SIZE));
  9721. *exact = GET2(cc, class_len);
  9722. if (*max == 0)
  9723. {
  9724. if (*opcode == OP_CRPOSRANGE)
  9725. *opcode = OP_POSSTAR;
  9726. else
  9727. *opcode -= OP_CRRANGE - OP_STAR;
  9728. }
  9729. else
  9730. {
  9731. *max -= *exact;
  9732. if (*max == 0)
  9733. *opcode = OP_EXACT;
  9734. else if (*max == 1)
  9735. {
  9736. if (*opcode == OP_CRPOSRANGE)
  9737. *opcode = OP_POSQUERY;
  9738. else
  9739. *opcode -= OP_CRRANGE - OP_QUERY;
  9740. }
  9741. else
  9742. {
  9743. if (*opcode == OP_CRPOSRANGE)
  9744. *opcode = OP_POSUPTO;
  9745. else
  9746. *opcode -= OP_CRRANGE - OP_UPTO;
  9747. }
  9748. }
  9749. *end = cc + class_len + 2 * IMM2_SIZE;
  9750. }
  9751. return cc;
  9752. }
  9753. switch(*opcode)
  9754. {
  9755. case OP_EXACT:
  9756. *exact = GET2(cc, 0);
  9757. cc += IMM2_SIZE;
  9758. break;
  9759. case OP_PLUS:
  9760. case OP_MINPLUS:
  9761. *exact = 1;
  9762. *opcode -= OP_PLUS - OP_STAR;
  9763. break;
  9764. case OP_POSPLUS:
  9765. *exact = 1;
  9766. *opcode = OP_POSSTAR;
  9767. break;
  9768. case OP_UPTO:
  9769. case OP_MINUPTO:
  9770. case OP_POSUPTO:
  9771. *max = GET2(cc, 0);
  9772. cc += IMM2_SIZE;
  9773. break;
  9774. }
  9775. if (*type == OP_END)
  9776. {
  9777. *type = *cc;
  9778. *end = next_opcode(common, cc);
  9779. cc++;
  9780. return cc;
  9781. }
  9782. *end = cc + 1;
  9783. #ifdef SUPPORT_UNICODE
  9784. if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
  9785. #endif
  9786. return cc;
  9787. }
  9788. static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
  9789. {
  9790. DEFINE_COMPILER;
  9791. backtrack_common *backtrack;
  9792. PCRE2_UCHAR opcode;
  9793. PCRE2_UCHAR type;
  9794. sljit_u32 max = 0, exact;
  9795. sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
  9796. sljit_s32 early_fail_type;
  9797. BOOL charpos_enabled;
  9798. PCRE2_UCHAR charpos_char;
  9799. unsigned int charpos_othercasebit;
  9800. PCRE2_SPTR end;
  9801. jump_list *no_match = NULL;
  9802. jump_list *no_char1_match = NULL;
  9803. struct sljit_jump *jump = NULL;
  9804. struct sljit_label *label;
  9805. int private_data_ptr = PRIVATE_DATA(cc);
  9806. int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
  9807. int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
  9808. int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
  9809. int tmp_base, tmp_offset;
  9810. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  9811. BOOL use_tmp;
  9812. #endif
  9813. PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
  9814. early_fail_type = (early_fail_ptr & 0x7);
  9815. early_fail_ptr >>= 3;
  9816. /* During recursion, these optimizations are disabled. */
  9817. if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
  9818. {
  9819. early_fail_ptr = 0;
  9820. early_fail_type = type_skip;
  9821. }
  9822. SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
  9823. || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
  9824. if (early_fail_type == type_fail)
  9825. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
  9826. cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
  9827. if (type != OP_EXTUNI)
  9828. {
  9829. tmp_base = TMP3;
  9830. tmp_offset = 0;
  9831. }
  9832. else
  9833. {
  9834. tmp_base = SLJIT_MEM1(SLJIT_SP);
  9835. tmp_offset = POSSESSIVE0;
  9836. }
  9837. /* Handle fixed part first. */
  9838. if (exact > 1)
  9839. {
  9840. SLJIT_ASSERT(early_fail_ptr == 0);
  9841. if (common->mode == PCRE2_JIT_COMPLETE
  9842. #ifdef SUPPORT_UNICODE
  9843. && !common->utf
  9844. #endif
  9845. && type != OP_ANYNL && type != OP_EXTUNI)
  9846. {
  9847. OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
  9848. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
  9849. OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
  9850. label = LABEL();
  9851. compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
  9852. OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
  9853. JUMPTO(SLJIT_NOT_ZERO, label);
  9854. }
  9855. else
  9856. {
  9857. OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
  9858. label = LABEL();
  9859. compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
  9860. OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
  9861. JUMPTO(SLJIT_NOT_ZERO, label);
  9862. }
  9863. }
  9864. else if (exact == 1)
  9865. {
  9866. compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
  9867. if (early_fail_type == type_fail_range)
  9868. {
  9869. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
  9870. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw));
  9871. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
  9872. OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
  9873. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
  9874. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0);
  9875. }
  9876. }
  9877. switch(opcode)
  9878. {
  9879. case OP_STAR:
  9880. case OP_UPTO:
  9881. SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
  9882. if (type == OP_ANYNL || type == OP_EXTUNI)
  9883. {
  9884. SLJIT_ASSERT(private_data_ptr == 0);
  9885. SLJIT_ASSERT(early_fail_ptr == 0);
  9886. allocate_stack(common, 2);
  9887. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  9888. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
  9889. if (opcode == OP_UPTO)
  9890. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
  9891. label = LABEL();
  9892. compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
  9893. if (opcode == OP_UPTO)
  9894. {
  9895. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
  9896. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
  9897. jump = JUMP(SLJIT_ZERO);
  9898. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
  9899. }
  9900. /* We cannot use TMP3 because of allocate_stack. */
  9901. allocate_stack(common, 1);
  9902. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  9903. JUMPTO(SLJIT_JUMP, label);
  9904. if (jump != NULL)
  9905. JUMPHERE(jump);
  9906. BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
  9907. break;
  9908. }
  9909. #ifdef SUPPORT_UNICODE
  9910. else if (type == OP_ALLANY && !common->invalid_utf)
  9911. #else
  9912. else if (type == OP_ALLANY)
  9913. #endif
  9914. {
  9915. if (opcode == OP_STAR)
  9916. {
  9917. if (private_data_ptr == 0)
  9918. allocate_stack(common, 2);
  9919. OP1(SLJIT_MOV, base, offset0, STR_END, 0);
  9920. OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
  9921. OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
  9922. process_partial_match(common);
  9923. if (early_fail_ptr != 0)
  9924. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
  9925. BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
  9926. break;
  9927. }
  9928. #ifdef SUPPORT_UNICODE
  9929. else if (!common->utf)
  9930. #else
  9931. else
  9932. #endif
  9933. {
  9934. if (private_data_ptr == 0)
  9935. allocate_stack(common, 2);
  9936. OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
  9937. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
  9938. if (common->mode == PCRE2_JIT_COMPLETE)
  9939. {
  9940. OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
  9941. CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
  9942. }
  9943. else
  9944. {
  9945. jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
  9946. process_partial_match(common);
  9947. JUMPHERE(jump);
  9948. }
  9949. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  9950. if (early_fail_ptr != 0)
  9951. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
  9952. BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
  9953. break;
  9954. }
  9955. }
  9956. charpos_enabled = FALSE;
  9957. charpos_char = 0;
  9958. charpos_othercasebit = 0;
  9959. if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
  9960. {
  9961. #ifdef SUPPORT_UNICODE
  9962. charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
  9963. #else
  9964. charpos_enabled = TRUE;
  9965. #endif
  9966. if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
  9967. {
  9968. charpos_othercasebit = char_get_othercase_bit(common, end + 1);
  9969. if (charpos_othercasebit == 0)
  9970. charpos_enabled = FALSE;
  9971. }
  9972. if (charpos_enabled)
  9973. {
  9974. charpos_char = end[1];
  9975. /* Consume the OP_CHAR opcode. */
  9976. end += 2;
  9977. #if PCRE2_CODE_UNIT_WIDTH == 8
  9978. SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
  9979. #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
  9980. SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
  9981. if ((charpos_othercasebit & 0x100) != 0)
  9982. charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
  9983. #endif
  9984. if (charpos_othercasebit != 0)
  9985. charpos_char |= charpos_othercasebit;
  9986. BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
  9987. BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
  9988. BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
  9989. }
  9990. }
  9991. if (charpos_enabled)
  9992. {
  9993. if (opcode == OP_UPTO)
  9994. OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
  9995. /* Search the first instance of charpos_char. */
  9996. jump = JUMP(SLJIT_JUMP);
  9997. label = LABEL();
  9998. if (opcode == OP_UPTO)
  9999. {
  10000. OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
  10001. add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
  10002. }
  10003. compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
  10004. if (early_fail_ptr != 0)
  10005. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
  10006. JUMPHERE(jump);
  10007. detect_partial_match(common, &backtrack->topbacktracks);
  10008. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  10009. if (charpos_othercasebit != 0)
  10010. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
  10011. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
  10012. if (private_data_ptr == 0)
  10013. allocate_stack(common, 2);
  10014. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  10015. OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
  10016. if (opcode == OP_UPTO)
  10017. {
  10018. OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
  10019. add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
  10020. }
  10021. /* Search the last instance of charpos_char. */
  10022. label = LABEL();
  10023. compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
  10024. if (early_fail_ptr != 0)
  10025. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
  10026. detect_partial_match(common, &no_match);
  10027. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  10028. if (charpos_othercasebit != 0)
  10029. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
  10030. if (opcode == OP_STAR)
  10031. {
  10032. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
  10033. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  10034. JUMPTO(SLJIT_JUMP, label);
  10035. }
  10036. else
  10037. {
  10038. jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
  10039. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  10040. JUMPHERE(jump);
  10041. OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
  10042. JUMPTO(SLJIT_NOT_ZERO, label);
  10043. }
  10044. set_jumps(no_match, LABEL());
  10045. OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
  10046. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  10047. }
  10048. else
  10049. {
  10050. if (private_data_ptr == 0)
  10051. allocate_stack(common, 2);
  10052. OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
  10053. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  10054. use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
  10055. SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
  10056. if (common->utf)
  10057. OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
  10058. #endif
  10059. if (opcode == OP_UPTO)
  10060. OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
  10061. detect_partial_match(common, &no_match);
  10062. label = LABEL();
  10063. compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
  10064. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  10065. if (common->utf)
  10066. OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
  10067. #endif
  10068. if (opcode == OP_UPTO)
  10069. {
  10070. OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
  10071. add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
  10072. }
  10073. detect_partial_match_to(common, label);
  10074. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  10075. set_jumps(no_char1_match, LABEL());
  10076. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  10077. if (common->utf)
  10078. {
  10079. set_jumps(no_match, LABEL());
  10080. if (use_tmp)
  10081. {
  10082. OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
  10083. OP1(SLJIT_MOV, base, offset0, TMP3, 0);
  10084. }
  10085. else
  10086. OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
  10087. }
  10088. else
  10089. #endif
  10090. {
  10091. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  10092. set_jumps(no_match, LABEL());
  10093. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  10094. }
  10095. if (early_fail_ptr != 0)
  10096. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
  10097. }
  10098. BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
  10099. break;
  10100. case OP_MINSTAR:
  10101. if (private_data_ptr == 0)
  10102. allocate_stack(common, 1);
  10103. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  10104. BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
  10105. if (early_fail_ptr != 0)
  10106. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
  10107. break;
  10108. case OP_MINUPTO:
  10109. SLJIT_ASSERT(early_fail_ptr == 0);
  10110. if (private_data_ptr == 0)
  10111. allocate_stack(common, 2);
  10112. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  10113. OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
  10114. BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
  10115. break;
  10116. case OP_QUERY:
  10117. case OP_MINQUERY:
  10118. SLJIT_ASSERT(early_fail_ptr == 0);
  10119. if (private_data_ptr == 0)
  10120. allocate_stack(common, 1);
  10121. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  10122. if (opcode == OP_QUERY)
  10123. compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
  10124. BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
  10125. break;
  10126. case OP_EXACT:
  10127. break;
  10128. case OP_POSSTAR:
  10129. #if defined SUPPORT_UNICODE
  10130. if (type == OP_ALLANY && !common->invalid_utf)
  10131. #else
  10132. if (type == OP_ALLANY)
  10133. #endif
  10134. {
  10135. OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
  10136. process_partial_match(common);
  10137. if (early_fail_ptr != 0)
  10138. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
  10139. break;
  10140. }
  10141. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  10142. if (common->utf)
  10143. {
  10144. OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
  10145. detect_partial_match(common, &no_match);
  10146. label = LABEL();
  10147. compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
  10148. OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
  10149. detect_partial_match_to(common, label);
  10150. set_jumps(no_match, LABEL());
  10151. OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
  10152. if (early_fail_ptr != 0)
  10153. {
  10154. if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
  10155. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
  10156. else
  10157. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
  10158. }
  10159. break;
  10160. }
  10161. #endif
  10162. detect_partial_match(common, &no_match);
  10163. label = LABEL();
  10164. compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
  10165. detect_partial_match_to(common, label);
  10166. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  10167. set_jumps(no_char1_match, LABEL());
  10168. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  10169. set_jumps(no_match, LABEL());
  10170. if (early_fail_ptr != 0)
  10171. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
  10172. break;
  10173. case OP_POSUPTO:
  10174. SLJIT_ASSERT(early_fail_ptr == 0);
  10175. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
  10176. if (common->utf)
  10177. {
  10178. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
  10179. OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
  10180. detect_partial_match(common, &no_match);
  10181. label = LABEL();
  10182. compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
  10183. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
  10184. OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
  10185. add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
  10186. detect_partial_match_to(common, label);
  10187. set_jumps(no_match, LABEL());
  10188. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
  10189. break;
  10190. }
  10191. #endif
  10192. if (type == OP_ALLANY)
  10193. {
  10194. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
  10195. if (common->mode == PCRE2_JIT_COMPLETE)
  10196. {
  10197. OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
  10198. CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
  10199. }
  10200. else
  10201. {
  10202. jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
  10203. process_partial_match(common);
  10204. JUMPHERE(jump);
  10205. }
  10206. break;
  10207. }
  10208. OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
  10209. detect_partial_match(common, &no_match);
  10210. label = LABEL();
  10211. compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
  10212. OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
  10213. add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
  10214. detect_partial_match_to(common, label);
  10215. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  10216. set_jumps(no_char1_match, LABEL());
  10217. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  10218. set_jumps(no_match, LABEL());
  10219. break;
  10220. case OP_POSQUERY:
  10221. SLJIT_ASSERT(early_fail_ptr == 0);
  10222. OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
  10223. compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
  10224. OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
  10225. set_jumps(no_match, LABEL());
  10226. OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
  10227. break;
  10228. default:
  10229. SLJIT_UNREACHABLE();
  10230. break;
  10231. }
  10232. count_match(common);
  10233. return end;
  10234. }
  10235. static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
  10236. {
  10237. DEFINE_COMPILER;
  10238. backtrack_common *backtrack;
  10239. PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
  10240. if (*cc == OP_FAIL)
  10241. {
  10242. add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
  10243. return cc + 1;
  10244. }
  10245. if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
  10246. add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
  10247. if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
  10248. {
  10249. /* No need to check notempty conditions. */
  10250. if (common->accept_label == NULL)
  10251. add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
  10252. else
  10253. JUMPTO(SLJIT_JUMP, common->accept_label);
  10254. return cc + 1;
  10255. }
  10256. if (common->accept_label == NULL)
  10257. add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
  10258. else
  10259. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
  10260. if (HAS_VIRTUAL_REGISTERS)
  10261. {
  10262. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  10263. OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
  10264. }
  10265. else
  10266. OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
  10267. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
  10268. add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
  10269. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
  10270. if (common->accept_label == NULL)
  10271. add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
  10272. else
  10273. JUMPTO(SLJIT_ZERO, common->accept_label);
  10274. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
  10275. if (common->accept_label == NULL)
  10276. add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
  10277. else
  10278. CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
  10279. add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
  10280. return cc + 1;
  10281. }
  10282. static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
  10283. {
  10284. DEFINE_COMPILER;
  10285. int offset = GET2(cc, 1);
  10286. BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
  10287. /* Data will be discarded anyway... */
  10288. if (common->currententry != NULL)
  10289. return cc + 1 + IMM2_SIZE;
  10290. if (!optimized_cbracket)
  10291. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
  10292. offset <<= 1;
  10293. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
  10294. if (!optimized_cbracket)
  10295. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
  10296. return cc + 1 + IMM2_SIZE;
  10297. }
  10298. static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
  10299. {
  10300. DEFINE_COMPILER;
  10301. backtrack_common *backtrack;
  10302. PCRE2_UCHAR opcode = *cc;
  10303. PCRE2_SPTR ccend = cc + 1;
  10304. if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
  10305. opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
  10306. ccend += 2 + cc[1];
  10307. PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
  10308. if (opcode == OP_SKIP)
  10309. {
  10310. allocate_stack(common, 1);
  10311. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  10312. return ccend;
  10313. }
  10314. if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
  10315. {
  10316. if (HAS_VIRTUAL_REGISTERS)
  10317. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  10318. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
  10319. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
  10320. OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
  10321. }
  10322. return ccend;
  10323. }
  10324. static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
  10325. static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
  10326. {
  10327. DEFINE_COMPILER;
  10328. backtrack_common *backtrack;
  10329. BOOL needs_control_head;
  10330. int size;
  10331. PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
  10332. common->then_trap = BACKTRACK_AS(then_trap_backtrack);
  10333. BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
  10334. BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
  10335. BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
  10336. size = BACKTRACK_AS(then_trap_backtrack)->framesize;
  10337. size = 3 + (size < 0 ? 0 : size);
  10338. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  10339. allocate_stack(common, size);
  10340. if (size > 3)
  10341. OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
  10342. else
  10343. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
  10344. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
  10345. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
  10346. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
  10347. size = BACKTRACK_AS(then_trap_backtrack)->framesize;
  10348. if (size >= 0)
  10349. init_frame(common, cc, ccend, size - 1, 0);
  10350. }
  10351. static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
  10352. {
  10353. DEFINE_COMPILER;
  10354. backtrack_common *backtrack;
  10355. BOOL has_then_trap = FALSE;
  10356. then_trap_backtrack *save_then_trap = NULL;
  10357. SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
  10358. if (common->has_then && common->then_offsets[cc - common->start] != 0)
  10359. {
  10360. SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
  10361. has_then_trap = TRUE;
  10362. save_then_trap = common->then_trap;
  10363. /* Tail item on backtrack. */
  10364. compile_then_trap_matchingpath(common, cc, ccend, parent);
  10365. }
  10366. while (cc < ccend)
  10367. {
  10368. switch(*cc)
  10369. {
  10370. case OP_SOD:
  10371. case OP_SOM:
  10372. case OP_NOT_WORD_BOUNDARY:
  10373. case OP_WORD_BOUNDARY:
  10374. case OP_EODN:
  10375. case OP_EOD:
  10376. case OP_DOLL:
  10377. case OP_DOLLM:
  10378. case OP_CIRC:
  10379. case OP_CIRCM:
  10380. case OP_REVERSE:
  10381. cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
  10382. break;
  10383. case OP_NOT_DIGIT:
  10384. case OP_DIGIT:
  10385. case OP_NOT_WHITESPACE:
  10386. case OP_WHITESPACE:
  10387. case OP_NOT_WORDCHAR:
  10388. case OP_WORDCHAR:
  10389. case OP_ANY:
  10390. case OP_ALLANY:
  10391. case OP_ANYBYTE:
  10392. case OP_NOTPROP:
  10393. case OP_PROP:
  10394. case OP_ANYNL:
  10395. case OP_NOT_HSPACE:
  10396. case OP_HSPACE:
  10397. case OP_NOT_VSPACE:
  10398. case OP_VSPACE:
  10399. case OP_EXTUNI:
  10400. case OP_NOT:
  10401. case OP_NOTI:
  10402. cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
  10403. break;
  10404. case OP_SET_SOM:
  10405. PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
  10406. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
  10407. allocate_stack(common, 1);
  10408. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
  10409. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
  10410. cc++;
  10411. break;
  10412. case OP_CHAR:
  10413. case OP_CHARI:
  10414. if (common->mode == PCRE2_JIT_COMPLETE)
  10415. cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
  10416. else
  10417. cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
  10418. break;
  10419. case OP_STAR:
  10420. case OP_MINSTAR:
  10421. case OP_PLUS:
  10422. case OP_MINPLUS:
  10423. case OP_QUERY:
  10424. case OP_MINQUERY:
  10425. case OP_UPTO:
  10426. case OP_MINUPTO:
  10427. case OP_EXACT:
  10428. case OP_POSSTAR:
  10429. case OP_POSPLUS:
  10430. case OP_POSQUERY:
  10431. case OP_POSUPTO:
  10432. case OP_STARI:
  10433. case OP_MINSTARI:
  10434. case OP_PLUSI:
  10435. case OP_MINPLUSI:
  10436. case OP_QUERYI:
  10437. case OP_MINQUERYI:
  10438. case OP_UPTOI:
  10439. case OP_MINUPTOI:
  10440. case OP_EXACTI:
  10441. case OP_POSSTARI:
  10442. case OP_POSPLUSI:
  10443. case OP_POSQUERYI:
  10444. case OP_POSUPTOI:
  10445. case OP_NOTSTAR:
  10446. case OP_NOTMINSTAR:
  10447. case OP_NOTPLUS:
  10448. case OP_NOTMINPLUS:
  10449. case OP_NOTQUERY:
  10450. case OP_NOTMINQUERY:
  10451. case OP_NOTUPTO:
  10452. case OP_NOTMINUPTO:
  10453. case OP_NOTEXACT:
  10454. case OP_NOTPOSSTAR:
  10455. case OP_NOTPOSPLUS:
  10456. case OP_NOTPOSQUERY:
  10457. case OP_NOTPOSUPTO:
  10458. case OP_NOTSTARI:
  10459. case OP_NOTMINSTARI:
  10460. case OP_NOTPLUSI:
  10461. case OP_NOTMINPLUSI:
  10462. case OP_NOTQUERYI:
  10463. case OP_NOTMINQUERYI:
  10464. case OP_NOTUPTOI:
  10465. case OP_NOTMINUPTOI:
  10466. case OP_NOTEXACTI:
  10467. case OP_NOTPOSSTARI:
  10468. case OP_NOTPOSPLUSI:
  10469. case OP_NOTPOSQUERYI:
  10470. case OP_NOTPOSUPTOI:
  10471. case OP_TYPESTAR:
  10472. case OP_TYPEMINSTAR:
  10473. case OP_TYPEPLUS:
  10474. case OP_TYPEMINPLUS:
  10475. case OP_TYPEQUERY:
  10476. case OP_TYPEMINQUERY:
  10477. case OP_TYPEUPTO:
  10478. case OP_TYPEMINUPTO:
  10479. case OP_TYPEEXACT:
  10480. case OP_TYPEPOSSTAR:
  10481. case OP_TYPEPOSPLUS:
  10482. case OP_TYPEPOSQUERY:
  10483. case OP_TYPEPOSUPTO:
  10484. cc = compile_iterator_matchingpath(common, cc, parent);
  10485. break;
  10486. case OP_CLASS:
  10487. case OP_NCLASS:
  10488. if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
  10489. cc = compile_iterator_matchingpath(common, cc, parent);
  10490. else
  10491. cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
  10492. break;
  10493. #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
  10494. case OP_XCLASS:
  10495. if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
  10496. cc = compile_iterator_matchingpath(common, cc, parent);
  10497. else
  10498. cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
  10499. break;
  10500. #endif
  10501. case OP_REF:
  10502. case OP_REFI:
  10503. if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
  10504. cc = compile_ref_iterator_matchingpath(common, cc, parent);
  10505. else
  10506. {
  10507. compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
  10508. cc += 1 + IMM2_SIZE;
  10509. }
  10510. break;
  10511. case OP_DNREF:
  10512. case OP_DNREFI:
  10513. if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
  10514. cc = compile_ref_iterator_matchingpath(common, cc, parent);
  10515. else
  10516. {
  10517. compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
  10518. compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
  10519. cc += 1 + 2 * IMM2_SIZE;
  10520. }
  10521. break;
  10522. case OP_RECURSE:
  10523. cc = compile_recurse_matchingpath(common, cc, parent);
  10524. break;
  10525. case OP_CALLOUT:
  10526. case OP_CALLOUT_STR:
  10527. cc = compile_callout_matchingpath(common, cc, parent);
  10528. break;
  10529. case OP_ASSERT:
  10530. case OP_ASSERT_NOT:
  10531. case OP_ASSERTBACK:
  10532. case OP_ASSERTBACK_NOT:
  10533. PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
  10534. cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
  10535. break;
  10536. case OP_BRAMINZERO:
  10537. PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
  10538. cc = bracketend(cc + 1);
  10539. if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
  10540. {
  10541. allocate_stack(common, 1);
  10542. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  10543. }
  10544. else
  10545. {
  10546. allocate_stack(common, 2);
  10547. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  10548. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
  10549. }
  10550. BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
  10551. count_match(common);
  10552. break;
  10553. case OP_ASSERT_NA:
  10554. case OP_ASSERTBACK_NA:
  10555. case OP_ONCE:
  10556. case OP_SCRIPT_RUN:
  10557. case OP_BRA:
  10558. case OP_CBRA:
  10559. case OP_COND:
  10560. case OP_SBRA:
  10561. case OP_SCBRA:
  10562. case OP_SCOND:
  10563. cc = compile_bracket_matchingpath(common, cc, parent);
  10564. break;
  10565. case OP_BRAZERO:
  10566. if (cc[1] > OP_ASSERTBACK_NOT)
  10567. cc = compile_bracket_matchingpath(common, cc, parent);
  10568. else
  10569. {
  10570. PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
  10571. cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
  10572. }
  10573. break;
  10574. case OP_BRAPOS:
  10575. case OP_CBRAPOS:
  10576. case OP_SBRAPOS:
  10577. case OP_SCBRAPOS:
  10578. case OP_BRAPOSZERO:
  10579. cc = compile_bracketpos_matchingpath(common, cc, parent);
  10580. break;
  10581. case OP_MARK:
  10582. PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
  10583. SLJIT_ASSERT(common->mark_ptr != 0);
  10584. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
  10585. allocate_stack(common, common->has_skip_arg ? 5 : 1);
  10586. if (HAS_VIRTUAL_REGISTERS)
  10587. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  10588. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
  10589. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
  10590. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
  10591. OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
  10592. if (common->has_skip_arg)
  10593. {
  10594. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  10595. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
  10596. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
  10597. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
  10598. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
  10599. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
  10600. }
  10601. cc += 1 + 2 + cc[1];
  10602. break;
  10603. case OP_PRUNE:
  10604. case OP_PRUNE_ARG:
  10605. case OP_SKIP:
  10606. case OP_SKIP_ARG:
  10607. case OP_THEN:
  10608. case OP_THEN_ARG:
  10609. case OP_COMMIT:
  10610. case OP_COMMIT_ARG:
  10611. cc = compile_control_verb_matchingpath(common, cc, parent);
  10612. break;
  10613. case OP_FAIL:
  10614. case OP_ACCEPT:
  10615. case OP_ASSERT_ACCEPT:
  10616. cc = compile_fail_accept_matchingpath(common, cc, parent);
  10617. break;
  10618. case OP_CLOSE:
  10619. cc = compile_close_matchingpath(common, cc);
  10620. break;
  10621. case OP_SKIPZERO:
  10622. cc = bracketend(cc + 1);
  10623. break;
  10624. default:
  10625. SLJIT_UNREACHABLE();
  10626. return;
  10627. }
  10628. if (cc == NULL)
  10629. return;
  10630. }
  10631. if (has_then_trap)
  10632. {
  10633. /* Head item on backtrack. */
  10634. PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
  10635. BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
  10636. BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
  10637. common->then_trap = save_then_trap;
  10638. }
  10639. SLJIT_ASSERT(cc == ccend);
  10640. }
  10641. #undef PUSH_BACKTRACK
  10642. #undef PUSH_BACKTRACK_NOVALUE
  10643. #undef BACKTRACK_AS
  10644. #define COMPILE_BACKTRACKINGPATH(current) \
  10645. do \
  10646. { \
  10647. compile_backtrackingpath(common, (current)); \
  10648. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
  10649. return; \
  10650. } \
  10651. while (0)
  10652. #define CURRENT_AS(type) ((type *)current)
  10653. static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  10654. {
  10655. DEFINE_COMPILER;
  10656. PCRE2_SPTR cc = current->cc;
  10657. PCRE2_UCHAR opcode;
  10658. PCRE2_UCHAR type;
  10659. sljit_u32 max = 0, exact;
  10660. struct sljit_label *label = NULL;
  10661. struct sljit_jump *jump = NULL;
  10662. jump_list *jumplist = NULL;
  10663. PCRE2_SPTR end;
  10664. int private_data_ptr = PRIVATE_DATA(cc);
  10665. int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
  10666. int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
  10667. int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
  10668. cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
  10669. switch(opcode)
  10670. {
  10671. case OP_STAR:
  10672. case OP_UPTO:
  10673. if (type == OP_ANYNL || type == OP_EXTUNI)
  10674. {
  10675. SLJIT_ASSERT(private_data_ptr == 0);
  10676. set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
  10677. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  10678. free_stack(common, 1);
  10679. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
  10680. }
  10681. else
  10682. {
  10683. if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
  10684. {
  10685. OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
  10686. OP1(SLJIT_MOV, TMP2, 0, base, offset1);
  10687. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  10688. jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
  10689. label = LABEL();
  10690. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
  10691. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  10692. if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
  10693. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
  10694. CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
  10695. move_back(common, NULL, TRUE);
  10696. CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
  10697. }
  10698. else
  10699. {
  10700. OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
  10701. jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
  10702. move_back(common, NULL, TRUE);
  10703. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  10704. JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
  10705. }
  10706. JUMPHERE(jump);
  10707. if (private_data_ptr == 0)
  10708. free_stack(common, 2);
  10709. }
  10710. break;
  10711. case OP_MINSTAR:
  10712. OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
  10713. compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
  10714. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  10715. JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
  10716. set_jumps(jumplist, LABEL());
  10717. if (private_data_ptr == 0)
  10718. free_stack(common, 1);
  10719. break;
  10720. case OP_MINUPTO:
  10721. OP1(SLJIT_MOV, TMP1, 0, base, offset1);
  10722. OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
  10723. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
  10724. add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
  10725. OP1(SLJIT_MOV, base, offset1, TMP1, 0);
  10726. compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
  10727. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  10728. JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
  10729. set_jumps(jumplist, LABEL());
  10730. if (private_data_ptr == 0)
  10731. free_stack(common, 2);
  10732. break;
  10733. case OP_QUERY:
  10734. OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
  10735. OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
  10736. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
  10737. jump = JUMP(SLJIT_JUMP);
  10738. set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
  10739. OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
  10740. OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
  10741. JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
  10742. JUMPHERE(jump);
  10743. if (private_data_ptr == 0)
  10744. free_stack(common, 1);
  10745. break;
  10746. case OP_MINQUERY:
  10747. OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
  10748. OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
  10749. jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
  10750. compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
  10751. JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
  10752. set_jumps(jumplist, LABEL());
  10753. JUMPHERE(jump);
  10754. if (private_data_ptr == 0)
  10755. free_stack(common, 1);
  10756. break;
  10757. case OP_EXACT:
  10758. case OP_POSSTAR:
  10759. case OP_POSQUERY:
  10760. case OP_POSUPTO:
  10761. break;
  10762. default:
  10763. SLJIT_UNREACHABLE();
  10764. break;
  10765. }
  10766. set_jumps(current->topbacktracks, LABEL());
  10767. }
  10768. static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  10769. {
  10770. DEFINE_COMPILER;
  10771. PCRE2_SPTR cc = current->cc;
  10772. BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
  10773. PCRE2_UCHAR type;
  10774. type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
  10775. if ((type & 0x1) == 0)
  10776. {
  10777. /* Maximize case. */
  10778. set_jumps(current->topbacktracks, LABEL());
  10779. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  10780. free_stack(common, 1);
  10781. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
  10782. return;
  10783. }
  10784. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  10785. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
  10786. set_jumps(current->topbacktracks, LABEL());
  10787. free_stack(common, ref ? 2 : 3);
  10788. }
  10789. static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  10790. {
  10791. DEFINE_COMPILER;
  10792. recurse_entry *entry;
  10793. if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
  10794. {
  10795. entry = CURRENT_AS(recurse_backtrack)->entry;
  10796. if (entry->backtrack_label == NULL)
  10797. add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
  10798. else
  10799. JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
  10800. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
  10801. }
  10802. else
  10803. compile_backtrackingpath(common, current->top);
  10804. set_jumps(current->topbacktracks, LABEL());
  10805. }
  10806. static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  10807. {
  10808. DEFINE_COMPILER;
  10809. PCRE2_SPTR cc = current->cc;
  10810. PCRE2_UCHAR bra = OP_BRA;
  10811. struct sljit_jump *brajump = NULL;
  10812. SLJIT_ASSERT(*cc != OP_BRAMINZERO);
  10813. if (*cc == OP_BRAZERO)
  10814. {
  10815. bra = *cc;
  10816. cc++;
  10817. }
  10818. if (bra == OP_BRAZERO)
  10819. {
  10820. SLJIT_ASSERT(current->topbacktracks == NULL);
  10821. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  10822. }
  10823. if (CURRENT_AS(assert_backtrack)->framesize < 0)
  10824. {
  10825. set_jumps(current->topbacktracks, LABEL());
  10826. if (bra == OP_BRAZERO)
  10827. {
  10828. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  10829. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
  10830. free_stack(common, 1);
  10831. }
  10832. return;
  10833. }
  10834. if (bra == OP_BRAZERO)
  10835. {
  10836. if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
  10837. {
  10838. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  10839. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
  10840. free_stack(common, 1);
  10841. return;
  10842. }
  10843. free_stack(common, 1);
  10844. brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
  10845. }
  10846. if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
  10847. {
  10848. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
  10849. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  10850. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
  10851. OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
  10852. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
  10853. set_jumps(current->topbacktracks, LABEL());
  10854. }
  10855. else
  10856. set_jumps(current->topbacktracks, LABEL());
  10857. if (bra == OP_BRAZERO)
  10858. {
  10859. /* We know there is enough place on the stack. */
  10860. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
  10861. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  10862. JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
  10863. JUMPHERE(brajump);
  10864. }
  10865. }
  10866. static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  10867. {
  10868. DEFINE_COMPILER;
  10869. int opcode, stacksize, alt_count, alt_max;
  10870. int offset = 0;
  10871. int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
  10872. int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
  10873. PCRE2_SPTR cc = current->cc;
  10874. PCRE2_SPTR ccbegin;
  10875. PCRE2_SPTR ccprev;
  10876. PCRE2_UCHAR bra = OP_BRA;
  10877. PCRE2_UCHAR ket;
  10878. assert_backtrack *assert;
  10879. BOOL has_alternatives;
  10880. BOOL needs_control_head = FALSE;
  10881. struct sljit_jump *brazero = NULL;
  10882. struct sljit_jump *next_alt = NULL;
  10883. struct sljit_jump *once = NULL;
  10884. struct sljit_jump *cond = NULL;
  10885. struct sljit_label *rmin_label = NULL;
  10886. struct sljit_label *exact_label = NULL;
  10887. struct sljit_put_label *put_label = NULL;
  10888. if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
  10889. {
  10890. bra = *cc;
  10891. cc++;
  10892. }
  10893. opcode = *cc;
  10894. ccbegin = bracketend(cc) - 1 - LINK_SIZE;
  10895. ket = *ccbegin;
  10896. if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
  10897. {
  10898. repeat_ptr = PRIVATE_DATA(ccbegin);
  10899. repeat_type = PRIVATE_DATA(ccbegin + 2);
  10900. repeat_count = PRIVATE_DATA(ccbegin + 3);
  10901. SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
  10902. if (repeat_type == OP_UPTO)
  10903. ket = OP_KETRMAX;
  10904. if (repeat_type == OP_MINUPTO)
  10905. ket = OP_KETRMIN;
  10906. }
  10907. ccbegin = cc;
  10908. cc += GET(cc, 1);
  10909. has_alternatives = *cc == OP_ALT;
  10910. if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
  10911. has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
  10912. if (opcode == OP_CBRA || opcode == OP_SCBRA)
  10913. offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
  10914. if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
  10915. opcode = OP_SCOND;
  10916. alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
  10917. /* Decoding the needs_control_head in framesize. */
  10918. if (opcode == OP_ONCE)
  10919. {
  10920. needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
  10921. CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
  10922. }
  10923. if (ket != OP_KET && repeat_type != 0)
  10924. {
  10925. /* TMP1 is used in OP_KETRMIN below. */
  10926. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  10927. free_stack(common, 1);
  10928. if (repeat_type == OP_UPTO)
  10929. OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
  10930. else
  10931. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
  10932. }
  10933. if (ket == OP_KETRMAX)
  10934. {
  10935. if (bra == OP_BRAZERO)
  10936. {
  10937. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  10938. free_stack(common, 1);
  10939. brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
  10940. }
  10941. }
  10942. else if (ket == OP_KETRMIN)
  10943. {
  10944. if (bra != OP_BRAMINZERO)
  10945. {
  10946. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  10947. if (repeat_type != 0)
  10948. {
  10949. /* TMP1 was set a few lines above. */
  10950. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
  10951. /* Drop STR_PTR for non-greedy plus quantifier. */
  10952. if (opcode != OP_ONCE)
  10953. free_stack(common, 1);
  10954. }
  10955. else if (opcode >= OP_SBRA || opcode == OP_ONCE)
  10956. {
  10957. /* Checking zero-length iteration. */
  10958. if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
  10959. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
  10960. else
  10961. {
  10962. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  10963. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
  10964. }
  10965. /* Drop STR_PTR for non-greedy plus quantifier. */
  10966. if (opcode != OP_ONCE)
  10967. free_stack(common, 1);
  10968. }
  10969. else
  10970. JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
  10971. }
  10972. rmin_label = LABEL();
  10973. if (repeat_type != 0)
  10974. OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
  10975. }
  10976. else if (bra == OP_BRAZERO)
  10977. {
  10978. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  10979. free_stack(common, 1);
  10980. brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
  10981. }
  10982. else if (repeat_type == OP_EXACT)
  10983. {
  10984. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
  10985. exact_label = LABEL();
  10986. }
  10987. if (offset != 0)
  10988. {
  10989. if (common->capture_last_ptr != 0)
  10990. {
  10991. SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
  10992. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  10993. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  10994. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
  10995. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
  10996. free_stack(common, 3);
  10997. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
  10998. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
  10999. }
  11000. else if (common->optimized_cbracket[offset >> 1] == 0)
  11001. {
  11002. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  11003. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  11004. free_stack(common, 2);
  11005. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
  11006. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
  11007. }
  11008. }
  11009. if (SLJIT_UNLIKELY(opcode == OP_ONCE))
  11010. {
  11011. if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
  11012. {
  11013. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  11014. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  11015. OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
  11016. }
  11017. once = JUMP(SLJIT_JUMP);
  11018. }
  11019. else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
  11020. {
  11021. if (has_alternatives)
  11022. {
  11023. /* Always exactly one alternative. */
  11024. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  11025. free_stack(common, 1);
  11026. alt_max = 2;
  11027. next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
  11028. }
  11029. }
  11030. else if (has_alternatives)
  11031. {
  11032. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  11033. free_stack(common, 1);
  11034. if (alt_max > 3)
  11035. {
  11036. sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
  11037. SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
  11038. sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
  11039. sljit_emit_op0(compiler, SLJIT_ENDBR);
  11040. }
  11041. else
  11042. next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
  11043. }
  11044. COMPILE_BACKTRACKINGPATH(current->top);
  11045. if (current->topbacktracks)
  11046. set_jumps(current->topbacktracks, LABEL());
  11047. if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
  11048. {
  11049. /* Conditional block always has at most one alternative. */
  11050. if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
  11051. {
  11052. SLJIT_ASSERT(has_alternatives);
  11053. assert = CURRENT_AS(bracket_backtrack)->u.assert;
  11054. if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
  11055. {
  11056. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
  11057. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  11058. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
  11059. OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
  11060. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
  11061. }
  11062. cond = JUMP(SLJIT_JUMP);
  11063. set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
  11064. }
  11065. else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
  11066. {
  11067. SLJIT_ASSERT(has_alternatives);
  11068. cond = JUMP(SLJIT_JUMP);
  11069. set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
  11070. }
  11071. else
  11072. SLJIT_ASSERT(!has_alternatives);
  11073. }
  11074. if (has_alternatives)
  11075. {
  11076. alt_count = 1;
  11077. do
  11078. {
  11079. current->top = NULL;
  11080. current->topbacktracks = NULL;
  11081. current->nextbacktracks = NULL;
  11082. /* Conditional blocks always have an additional alternative, even if it is empty. */
  11083. if (*cc == OP_ALT)
  11084. {
  11085. ccprev = cc + 1 + LINK_SIZE;
  11086. cc += GET(cc, 1);
  11087. if (opcode != OP_COND && opcode != OP_SCOND)
  11088. {
  11089. if (opcode != OP_ONCE)
  11090. {
  11091. if (private_data_ptr != 0)
  11092. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  11093. else
  11094. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  11095. }
  11096. else
  11097. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
  11098. }
  11099. compile_matchingpath(common, ccprev, cc, current);
  11100. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  11101. return;
  11102. if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
  11103. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  11104. if (opcode == OP_SCRIPT_RUN)
  11105. match_script_run_common(common, private_data_ptr, current);
  11106. }
  11107. /* Instructions after the current alternative is successfully matched. */
  11108. /* There is a similar code in compile_bracket_matchingpath. */
  11109. if (opcode == OP_ONCE)
  11110. match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
  11111. stacksize = 0;
  11112. if (repeat_type == OP_MINUPTO)
  11113. {
  11114. /* We need to preserve the counter. TMP2 will be used below. */
  11115. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
  11116. stacksize++;
  11117. }
  11118. if (ket != OP_KET || bra != OP_BRA)
  11119. stacksize++;
  11120. if (offset != 0)
  11121. {
  11122. if (common->capture_last_ptr != 0)
  11123. stacksize++;
  11124. if (common->optimized_cbracket[offset >> 1] == 0)
  11125. stacksize += 2;
  11126. }
  11127. if (opcode != OP_ONCE)
  11128. stacksize++;
  11129. if (stacksize > 0)
  11130. allocate_stack(common, stacksize);
  11131. stacksize = 0;
  11132. if (repeat_type == OP_MINUPTO)
  11133. {
  11134. /* TMP2 was set above. */
  11135. OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
  11136. stacksize++;
  11137. }
  11138. if (ket != OP_KET || bra != OP_BRA)
  11139. {
  11140. if (ket != OP_KET)
  11141. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
  11142. else
  11143. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
  11144. stacksize++;
  11145. }
  11146. if (offset != 0)
  11147. stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
  11148. if (opcode != OP_ONCE)
  11149. {
  11150. if (alt_max <= 3)
  11151. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
  11152. else
  11153. put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
  11154. }
  11155. if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
  11156. {
  11157. /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
  11158. SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
  11159. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
  11160. }
  11161. JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
  11162. if (opcode != OP_ONCE)
  11163. {
  11164. if (alt_max <= 3)
  11165. {
  11166. JUMPHERE(next_alt);
  11167. alt_count++;
  11168. if (alt_count < alt_max)
  11169. {
  11170. SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
  11171. next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
  11172. }
  11173. }
  11174. else
  11175. {
  11176. sljit_set_put_label(put_label, LABEL());
  11177. sljit_emit_op0(compiler, SLJIT_ENDBR);
  11178. }
  11179. }
  11180. COMPILE_BACKTRACKINGPATH(current->top);
  11181. if (current->topbacktracks)
  11182. set_jumps(current->topbacktracks, LABEL());
  11183. SLJIT_ASSERT(!current->nextbacktracks);
  11184. }
  11185. while (*cc == OP_ALT);
  11186. if (cond != NULL)
  11187. {
  11188. SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
  11189. assert = CURRENT_AS(bracket_backtrack)->u.assert;
  11190. if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
  11191. {
  11192. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
  11193. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  11194. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
  11195. OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
  11196. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
  11197. }
  11198. JUMPHERE(cond);
  11199. }
  11200. /* Free the STR_PTR. */
  11201. if (private_data_ptr == 0)
  11202. free_stack(common, 1);
  11203. }
  11204. if (offset != 0)
  11205. {
  11206. /* Using both tmp register is better for instruction scheduling. */
  11207. if (common->optimized_cbracket[offset >> 1] != 0)
  11208. {
  11209. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  11210. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  11211. free_stack(common, 2);
  11212. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
  11213. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
  11214. }
  11215. else
  11216. {
  11217. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  11218. free_stack(common, 1);
  11219. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
  11220. }
  11221. }
  11222. else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
  11223. {
  11224. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
  11225. free_stack(common, 1);
  11226. }
  11227. else if (opcode == OP_ONCE)
  11228. {
  11229. cc = ccbegin + GET(ccbegin, 1);
  11230. stacksize = needs_control_head ? 1 : 0;
  11231. if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
  11232. {
  11233. /* Reset head and drop saved frame. */
  11234. stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
  11235. }
  11236. else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
  11237. {
  11238. /* The STR_PTR must be released. */
  11239. stacksize++;
  11240. }
  11241. if (stacksize > 0)
  11242. free_stack(common, stacksize);
  11243. JUMPHERE(once);
  11244. /* Restore previous private_data_ptr */
  11245. if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
  11246. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
  11247. else if (ket == OP_KETRMIN)
  11248. {
  11249. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  11250. /* See the comment below. */
  11251. free_stack(common, 2);
  11252. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
  11253. }
  11254. }
  11255. if (repeat_type == OP_EXACT)
  11256. {
  11257. OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
  11258. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
  11259. CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
  11260. }
  11261. else if (ket == OP_KETRMAX)
  11262. {
  11263. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  11264. if (bra != OP_BRAZERO)
  11265. free_stack(common, 1);
  11266. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
  11267. if (bra == OP_BRAZERO)
  11268. {
  11269. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  11270. JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
  11271. JUMPHERE(brazero);
  11272. free_stack(common, 1);
  11273. }
  11274. }
  11275. else if (ket == OP_KETRMIN)
  11276. {
  11277. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  11278. /* OP_ONCE removes everything in case of a backtrack, so we don't
  11279. need to explicitly release the STR_PTR. The extra release would
  11280. affect badly the free_stack(2) above. */
  11281. if (opcode != OP_ONCE)
  11282. free_stack(common, 1);
  11283. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
  11284. if (opcode == OP_ONCE)
  11285. free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
  11286. else if (bra == OP_BRAMINZERO)
  11287. free_stack(common, 1);
  11288. }
  11289. else if (bra == OP_BRAZERO)
  11290. {
  11291. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  11292. JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
  11293. JUMPHERE(brazero);
  11294. }
  11295. }
  11296. static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  11297. {
  11298. DEFINE_COMPILER;
  11299. int offset;
  11300. struct sljit_jump *jump;
  11301. if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
  11302. {
  11303. if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
  11304. {
  11305. offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
  11306. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  11307. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  11308. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
  11309. if (common->capture_last_ptr != 0)
  11310. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
  11311. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
  11312. if (common->capture_last_ptr != 0)
  11313. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
  11314. }
  11315. set_jumps(current->topbacktracks, LABEL());
  11316. free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
  11317. return;
  11318. }
  11319. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
  11320. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  11321. OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
  11322. if (current->topbacktracks)
  11323. {
  11324. jump = JUMP(SLJIT_JUMP);
  11325. set_jumps(current->topbacktracks, LABEL());
  11326. /* Drop the stack frame. */
  11327. free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
  11328. JUMPHERE(jump);
  11329. }
  11330. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
  11331. }
  11332. static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  11333. {
  11334. assert_backtrack backtrack;
  11335. current->top = NULL;
  11336. current->topbacktracks = NULL;
  11337. current->nextbacktracks = NULL;
  11338. if (current->cc[1] > OP_ASSERTBACK_NOT)
  11339. {
  11340. /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
  11341. compile_bracket_matchingpath(common, current->cc, current);
  11342. compile_bracket_backtrackingpath(common, current->top);
  11343. }
  11344. else
  11345. {
  11346. memset(&backtrack, 0, sizeof(backtrack));
  11347. backtrack.common.cc = current->cc;
  11348. backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
  11349. /* Manual call of compile_assert_matchingpath. */
  11350. compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
  11351. }
  11352. SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
  11353. }
  11354. static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  11355. {
  11356. DEFINE_COMPILER;
  11357. PCRE2_UCHAR opcode = *current->cc;
  11358. struct sljit_label *loop;
  11359. struct sljit_jump *jump;
  11360. if (opcode == OP_THEN || opcode == OP_THEN_ARG)
  11361. {
  11362. if (common->then_trap != NULL)
  11363. {
  11364. SLJIT_ASSERT(common->control_head_ptr != 0);
  11365. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  11366. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
  11367. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
  11368. jump = JUMP(SLJIT_JUMP);
  11369. loop = LABEL();
  11370. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  11371. JUMPHERE(jump);
  11372. CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
  11373. CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
  11374. add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
  11375. return;
  11376. }
  11377. else if (!common->local_quit_available && common->in_positive_assertion)
  11378. {
  11379. add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
  11380. return;
  11381. }
  11382. }
  11383. if (common->local_quit_available)
  11384. {
  11385. /* Abort match with a fail. */
  11386. if (common->quit_label == NULL)
  11387. add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
  11388. else
  11389. JUMPTO(SLJIT_JUMP, common->quit_label);
  11390. return;
  11391. }
  11392. if (opcode == OP_SKIP_ARG)
  11393. {
  11394. SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
  11395. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  11396. OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
  11397. sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
  11398. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
  11399. add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
  11400. return;
  11401. }
  11402. if (opcode == OP_SKIP)
  11403. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  11404. else
  11405. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
  11406. add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
  11407. }
  11408. static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  11409. {
  11410. DEFINE_COMPILER;
  11411. struct sljit_jump *jump;
  11412. int size;
  11413. if (CURRENT_AS(then_trap_backtrack)->then_trap)
  11414. {
  11415. common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
  11416. return;
  11417. }
  11418. size = CURRENT_AS(then_trap_backtrack)->framesize;
  11419. size = 3 + (size < 0 ? 0 : size);
  11420. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
  11421. free_stack(common, size);
  11422. jump = JUMP(SLJIT_JUMP);
  11423. set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
  11424. /* STACK_TOP is set by THEN. */
  11425. if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
  11426. {
  11427. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  11428. OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
  11429. }
  11430. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  11431. free_stack(common, 3);
  11432. JUMPHERE(jump);
  11433. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
  11434. }
  11435. static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  11436. {
  11437. DEFINE_COMPILER;
  11438. then_trap_backtrack *save_then_trap = common->then_trap;
  11439. while (current)
  11440. {
  11441. if (current->nextbacktracks != NULL)
  11442. set_jumps(current->nextbacktracks, LABEL());
  11443. switch(*current->cc)
  11444. {
  11445. case OP_SET_SOM:
  11446. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  11447. free_stack(common, 1);
  11448. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
  11449. break;
  11450. case OP_STAR:
  11451. case OP_MINSTAR:
  11452. case OP_PLUS:
  11453. case OP_MINPLUS:
  11454. case OP_QUERY:
  11455. case OP_MINQUERY:
  11456. case OP_UPTO:
  11457. case OP_MINUPTO:
  11458. case OP_EXACT:
  11459. case OP_POSSTAR:
  11460. case OP_POSPLUS:
  11461. case OP_POSQUERY:
  11462. case OP_POSUPTO:
  11463. case OP_STARI:
  11464. case OP_MINSTARI:
  11465. case OP_PLUSI:
  11466. case OP_MINPLUSI:
  11467. case OP_QUERYI:
  11468. case OP_MINQUERYI:
  11469. case OP_UPTOI:
  11470. case OP_MINUPTOI:
  11471. case OP_EXACTI:
  11472. case OP_POSSTARI:
  11473. case OP_POSPLUSI:
  11474. case OP_POSQUERYI:
  11475. case OP_POSUPTOI:
  11476. case OP_NOTSTAR:
  11477. case OP_NOTMINSTAR:
  11478. case OP_NOTPLUS:
  11479. case OP_NOTMINPLUS:
  11480. case OP_NOTQUERY:
  11481. case OP_NOTMINQUERY:
  11482. case OP_NOTUPTO:
  11483. case OP_NOTMINUPTO:
  11484. case OP_NOTEXACT:
  11485. case OP_NOTPOSSTAR:
  11486. case OP_NOTPOSPLUS:
  11487. case OP_NOTPOSQUERY:
  11488. case OP_NOTPOSUPTO:
  11489. case OP_NOTSTARI:
  11490. case OP_NOTMINSTARI:
  11491. case OP_NOTPLUSI:
  11492. case OP_NOTMINPLUSI:
  11493. case OP_NOTQUERYI:
  11494. case OP_NOTMINQUERYI:
  11495. case OP_NOTUPTOI:
  11496. case OP_NOTMINUPTOI:
  11497. case OP_NOTEXACTI:
  11498. case OP_NOTPOSSTARI:
  11499. case OP_NOTPOSPLUSI:
  11500. case OP_NOTPOSQUERYI:
  11501. case OP_NOTPOSUPTOI:
  11502. case OP_TYPESTAR:
  11503. case OP_TYPEMINSTAR:
  11504. case OP_TYPEPLUS:
  11505. case OP_TYPEMINPLUS:
  11506. case OP_TYPEQUERY:
  11507. case OP_TYPEMINQUERY:
  11508. case OP_TYPEUPTO:
  11509. case OP_TYPEMINUPTO:
  11510. case OP_TYPEEXACT:
  11511. case OP_TYPEPOSSTAR:
  11512. case OP_TYPEPOSPLUS:
  11513. case OP_TYPEPOSQUERY:
  11514. case OP_TYPEPOSUPTO:
  11515. case OP_CLASS:
  11516. case OP_NCLASS:
  11517. #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
  11518. case OP_XCLASS:
  11519. #endif
  11520. compile_iterator_backtrackingpath(common, current);
  11521. break;
  11522. case OP_REF:
  11523. case OP_REFI:
  11524. case OP_DNREF:
  11525. case OP_DNREFI:
  11526. compile_ref_iterator_backtrackingpath(common, current);
  11527. break;
  11528. case OP_RECURSE:
  11529. compile_recurse_backtrackingpath(common, current);
  11530. break;
  11531. case OP_ASSERT:
  11532. case OP_ASSERT_NOT:
  11533. case OP_ASSERTBACK:
  11534. case OP_ASSERTBACK_NOT:
  11535. compile_assert_backtrackingpath(common, current);
  11536. break;
  11537. case OP_ASSERT_NA:
  11538. case OP_ASSERTBACK_NA:
  11539. case OP_ONCE:
  11540. case OP_SCRIPT_RUN:
  11541. case OP_BRA:
  11542. case OP_CBRA:
  11543. case OP_COND:
  11544. case OP_SBRA:
  11545. case OP_SCBRA:
  11546. case OP_SCOND:
  11547. compile_bracket_backtrackingpath(common, current);
  11548. break;
  11549. case OP_BRAZERO:
  11550. if (current->cc[1] > OP_ASSERTBACK_NOT)
  11551. compile_bracket_backtrackingpath(common, current);
  11552. else
  11553. compile_assert_backtrackingpath(common, current);
  11554. break;
  11555. case OP_BRAPOS:
  11556. case OP_CBRAPOS:
  11557. case OP_SBRAPOS:
  11558. case OP_SCBRAPOS:
  11559. case OP_BRAPOSZERO:
  11560. compile_bracketpos_backtrackingpath(common, current);
  11561. break;
  11562. case OP_BRAMINZERO:
  11563. compile_braminzero_backtrackingpath(common, current);
  11564. break;
  11565. case OP_MARK:
  11566. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
  11567. if (common->has_skip_arg)
  11568. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  11569. free_stack(common, common->has_skip_arg ? 5 : 1);
  11570. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
  11571. if (common->has_skip_arg)
  11572. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
  11573. break;
  11574. case OP_THEN:
  11575. case OP_THEN_ARG:
  11576. case OP_PRUNE:
  11577. case OP_PRUNE_ARG:
  11578. case OP_SKIP:
  11579. case OP_SKIP_ARG:
  11580. compile_control_verb_backtrackingpath(common, current);
  11581. break;
  11582. case OP_COMMIT:
  11583. case OP_COMMIT_ARG:
  11584. if (!common->local_quit_available)
  11585. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
  11586. if (common->quit_label == NULL)
  11587. add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
  11588. else
  11589. JUMPTO(SLJIT_JUMP, common->quit_label);
  11590. break;
  11591. case OP_CALLOUT:
  11592. case OP_CALLOUT_STR:
  11593. case OP_FAIL:
  11594. case OP_ACCEPT:
  11595. case OP_ASSERT_ACCEPT:
  11596. set_jumps(current->topbacktracks, LABEL());
  11597. break;
  11598. case OP_THEN_TRAP:
  11599. /* A virtual opcode for then traps. */
  11600. compile_then_trap_backtrackingpath(common, current);
  11601. break;
  11602. default:
  11603. SLJIT_UNREACHABLE();
  11604. break;
  11605. }
  11606. current = current->prev;
  11607. }
  11608. common->then_trap = save_then_trap;
  11609. }
  11610. static SLJIT_INLINE void compile_recurse(compiler_common *common)
  11611. {
  11612. DEFINE_COMPILER;
  11613. PCRE2_SPTR cc = common->start + common->currententry->start;
  11614. PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
  11615. PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
  11616. BOOL needs_control_head;
  11617. BOOL has_quit;
  11618. BOOL has_accept;
  11619. int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
  11620. int alt_count, alt_max, local_size;
  11621. backtrack_common altbacktrack;
  11622. jump_list *match = NULL;
  11623. struct sljit_jump *next_alt = NULL;
  11624. struct sljit_jump *accept_exit = NULL;
  11625. struct sljit_label *quit;
  11626. struct sljit_put_label *put_label = NULL;
  11627. /* Recurse captures then. */
  11628. common->then_trap = NULL;
  11629. SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
  11630. alt_max = no_alternatives(cc);
  11631. alt_count = 0;
  11632. /* Matching path. */
  11633. SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
  11634. common->currententry->entry_label = LABEL();
  11635. set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
  11636. sljit_emit_fast_enter(compiler, TMP2, 0);
  11637. count_match(common);
  11638. local_size = (alt_max > 1) ? 2 : 1;
  11639. /* (Reversed) stack layout:
  11640. [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
  11641. allocate_stack(common, private_data_size + local_size);
  11642. /* Save return address. */
  11643. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
  11644. copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
  11645. /* This variable is saved and restored all time when we enter or exit from a recursive context. */
  11646. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
  11647. if (needs_control_head)
  11648. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
  11649. if (alt_max > 1)
  11650. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  11651. memset(&altbacktrack, 0, sizeof(backtrack_common));
  11652. common->quit_label = NULL;
  11653. common->accept_label = NULL;
  11654. common->quit = NULL;
  11655. common->accept = NULL;
  11656. altbacktrack.cc = ccbegin;
  11657. cc += GET(cc, 1);
  11658. while (1)
  11659. {
  11660. altbacktrack.top = NULL;
  11661. altbacktrack.topbacktracks = NULL;
  11662. if (altbacktrack.cc != ccbegin)
  11663. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  11664. compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
  11665. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  11666. return;
  11667. allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
  11668. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
  11669. if (alt_max > 1 || has_accept)
  11670. {
  11671. if (alt_max > 3)
  11672. put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
  11673. else
  11674. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
  11675. }
  11676. add_jump(compiler, &match, JUMP(SLJIT_JUMP));
  11677. if (alt_count == 0)
  11678. {
  11679. /* Backtracking path entry. */
  11680. SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
  11681. common->currententry->backtrack_label = LABEL();
  11682. set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
  11683. sljit_emit_fast_enter(compiler, TMP1, 0);
  11684. if (has_accept)
  11685. accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
  11686. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  11687. /* Save return address. */
  11688. OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
  11689. copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
  11690. if (alt_max > 1)
  11691. {
  11692. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  11693. free_stack(common, 2);
  11694. if (alt_max > 3)
  11695. {
  11696. sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
  11697. sljit_set_put_label(put_label, LABEL());
  11698. sljit_emit_op0(compiler, SLJIT_ENDBR);
  11699. }
  11700. else
  11701. next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
  11702. }
  11703. else
  11704. free_stack(common, has_accept ? 2 : 1);
  11705. }
  11706. else if (alt_max > 3)
  11707. {
  11708. sljit_set_put_label(put_label, LABEL());
  11709. sljit_emit_op0(compiler, SLJIT_ENDBR);
  11710. }
  11711. else
  11712. {
  11713. JUMPHERE(next_alt);
  11714. if (alt_count + 1 < alt_max)
  11715. {
  11716. SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
  11717. next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
  11718. }
  11719. }
  11720. alt_count++;
  11721. compile_backtrackingpath(common, altbacktrack.top);
  11722. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  11723. return;
  11724. set_jumps(altbacktrack.topbacktracks, LABEL());
  11725. if (*cc != OP_ALT)
  11726. break;
  11727. altbacktrack.cc = cc + 1 + LINK_SIZE;
  11728. cc += GET(cc, 1);
  11729. }
  11730. /* No alternative is matched. */
  11731. quit = LABEL();
  11732. copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
  11733. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
  11734. free_stack(common, private_data_size + local_size);
  11735. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
  11736. OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
  11737. if (common->quit != NULL)
  11738. {
  11739. SLJIT_ASSERT(has_quit);
  11740. set_jumps(common->quit, LABEL());
  11741. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
  11742. copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
  11743. JUMPTO(SLJIT_JUMP, quit);
  11744. }
  11745. if (has_accept)
  11746. {
  11747. JUMPHERE(accept_exit);
  11748. free_stack(common, 2);
  11749. /* Save return address. */
  11750. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
  11751. copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
  11752. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
  11753. free_stack(common, private_data_size + local_size);
  11754. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
  11755. OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
  11756. }
  11757. if (common->accept != NULL)
  11758. {
  11759. SLJIT_ASSERT(has_accept);
  11760. set_jumps(common->accept, LABEL());
  11761. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
  11762. OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
  11763. allocate_stack(common, 2);
  11764. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
  11765. }
  11766. set_jumps(match, LABEL());
  11767. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
  11768. copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
  11769. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
  11770. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
  11771. OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
  11772. }
  11773. #undef COMPILE_BACKTRACKINGPATH
  11774. #undef CURRENT_AS
  11775. #define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
  11776. (PCRE2_JIT_INVALID_UTF)
  11777. static int jit_compile(pcre2_code *code, sljit_u32 mode)
  11778. {
  11779. pcre2_real_code *re = (pcre2_real_code *)code;
  11780. struct sljit_compiler *compiler;
  11781. backtrack_common rootbacktrack;
  11782. compiler_common common_data;
  11783. compiler_common *common = &common_data;
  11784. const sljit_u8 *tables = re->tables;
  11785. void *allocator_data = &re->memctl;
  11786. int private_data_size;
  11787. PCRE2_SPTR ccend;
  11788. executable_functions *functions;
  11789. void *executable_func;
  11790. sljit_uw executable_size;
  11791. sljit_uw total_length;
  11792. struct sljit_label *mainloop_label = NULL;
  11793. struct sljit_label *continue_match_label;
  11794. struct sljit_label *empty_match_found_label = NULL;
  11795. struct sljit_label *empty_match_backtrack_label = NULL;
  11796. struct sljit_label *reset_match_label;
  11797. struct sljit_label *quit_label;
  11798. struct sljit_jump *jump;
  11799. struct sljit_jump *minlength_check_failed = NULL;
  11800. struct sljit_jump *empty_match = NULL;
  11801. struct sljit_jump *end_anchor_failed = NULL;
  11802. jump_list *reqcu_not_found = NULL;
  11803. SLJIT_ASSERT(tables);
  11804. #if HAS_VIRTUAL_REGISTERS == 1
  11805. SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0);
  11806. #elif HAS_VIRTUAL_REGISTERS == 0
  11807. SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0);
  11808. #else
  11809. #error "Invalid value for HAS_VIRTUAL_REGISTERS"
  11810. #endif
  11811. memset(&rootbacktrack, 0, sizeof(backtrack_common));
  11812. memset(common, 0, sizeof(compiler_common));
  11813. common->re = re;
  11814. common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
  11815. rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
  11816. #ifdef SUPPORT_UNICODE
  11817. common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
  11818. #endif /* SUPPORT_UNICODE */
  11819. mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
  11820. common->start = rootbacktrack.cc;
  11821. common->read_only_data_head = NULL;
  11822. common->fcc = tables + fcc_offset;
  11823. common->lcc = (sljit_sw)(tables + lcc_offset);
  11824. common->mode = mode;
  11825. common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
  11826. common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
  11827. common->nltype = NLTYPE_FIXED;
  11828. switch(re->newline_convention)
  11829. {
  11830. case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
  11831. case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
  11832. case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
  11833. case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
  11834. case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
  11835. case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
  11836. default: return PCRE2_ERROR_INTERNAL;
  11837. }
  11838. common->nlmax = READ_CHAR_MAX;
  11839. common->nlmin = 0;
  11840. if (re->bsr_convention == PCRE2_BSR_UNICODE)
  11841. common->bsr_nltype = NLTYPE_ANY;
  11842. else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
  11843. common->bsr_nltype = NLTYPE_ANYCRLF;
  11844. else
  11845. {
  11846. #ifdef BSR_ANYCRLF
  11847. common->bsr_nltype = NLTYPE_ANYCRLF;
  11848. #else
  11849. common->bsr_nltype = NLTYPE_ANY;
  11850. #endif
  11851. }
  11852. common->bsr_nlmax = READ_CHAR_MAX;
  11853. common->bsr_nlmin = 0;
  11854. common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
  11855. common->ctypes = (sljit_sw)(tables + ctypes_offset);
  11856. common->name_count = re->name_count;
  11857. common->name_entry_size = re->name_entry_size;
  11858. common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
  11859. common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
  11860. #ifdef SUPPORT_UNICODE
  11861. /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
  11862. common->utf = (re->overall_options & PCRE2_UTF) != 0;
  11863. common->ucp = (re->overall_options & PCRE2_UCP) != 0;
  11864. if (common->utf)
  11865. {
  11866. if (common->nltype == NLTYPE_ANY)
  11867. common->nlmax = 0x2029;
  11868. else if (common->nltype == NLTYPE_ANYCRLF)
  11869. common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
  11870. else
  11871. {
  11872. /* We only care about the first newline character. */
  11873. common->nlmax = common->newline & 0xff;
  11874. }
  11875. if (common->nltype == NLTYPE_FIXED)
  11876. common->nlmin = common->newline & 0xff;
  11877. else
  11878. common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
  11879. if (common->bsr_nltype == NLTYPE_ANY)
  11880. common->bsr_nlmax = 0x2029;
  11881. else
  11882. common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
  11883. common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
  11884. }
  11885. else
  11886. common->invalid_utf = FALSE;
  11887. #endif /* SUPPORT_UNICODE */
  11888. ccend = bracketend(common->start);
  11889. /* Calculate the local space size on the stack. */
  11890. common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
  11891. common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
  11892. if (!common->optimized_cbracket)
  11893. return PCRE2_ERROR_NOMEMORY;
  11894. #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
  11895. memset(common->optimized_cbracket, 0, re->top_bracket + 1);
  11896. #else
  11897. memset(common->optimized_cbracket, 1, re->top_bracket + 1);
  11898. #endif
  11899. SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
  11900. #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
  11901. common->capture_last_ptr = common->ovector_start;
  11902. common->ovector_start += sizeof(sljit_sw);
  11903. #endif
  11904. if (!check_opcode_types(common, common->start, ccend))
  11905. {
  11906. SLJIT_FREE(common->optimized_cbracket, allocator_data);
  11907. return PCRE2_ERROR_NOMEMORY;
  11908. }
  11909. /* Checking flags and updating ovector_start. */
  11910. if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
  11911. {
  11912. common->req_char_ptr = common->ovector_start;
  11913. common->ovector_start += sizeof(sljit_sw);
  11914. }
  11915. if (mode != PCRE2_JIT_COMPLETE)
  11916. {
  11917. common->start_used_ptr = common->ovector_start;
  11918. common->ovector_start += sizeof(sljit_sw);
  11919. if (mode == PCRE2_JIT_PARTIAL_SOFT)
  11920. {
  11921. common->hit_start = common->ovector_start;
  11922. common->ovector_start += sizeof(sljit_sw);
  11923. }
  11924. }
  11925. if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
  11926. {
  11927. common->match_end_ptr = common->ovector_start;
  11928. common->ovector_start += sizeof(sljit_sw);
  11929. }
  11930. #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
  11931. common->control_head_ptr = 1;
  11932. #endif
  11933. if (common->control_head_ptr != 0)
  11934. {
  11935. common->control_head_ptr = common->ovector_start;
  11936. common->ovector_start += sizeof(sljit_sw);
  11937. }
  11938. if (common->has_set_som)
  11939. {
  11940. /* Saving the real start pointer is necessary. */
  11941. common->start_ptr = common->ovector_start;
  11942. common->ovector_start += sizeof(sljit_sw);
  11943. }
  11944. /* Aligning ovector to even number of sljit words. */
  11945. if ((common->ovector_start & sizeof(sljit_sw)) != 0)
  11946. common->ovector_start += sizeof(sljit_sw);
  11947. if (common->start_ptr == 0)
  11948. common->start_ptr = OVECTOR(0);
  11949. /* Capturing brackets cannot be optimized if callouts are allowed. */
  11950. if (common->capture_last_ptr != 0)
  11951. memset(common->optimized_cbracket, 0, re->top_bracket + 1);
  11952. SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
  11953. common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
  11954. total_length = ccend - common->start;
  11955. common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
  11956. if (!common->private_data_ptrs)
  11957. {
  11958. SLJIT_FREE(common->optimized_cbracket, allocator_data);
  11959. return PCRE2_ERROR_NOMEMORY;
  11960. }
  11961. memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
  11962. private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
  11963. if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
  11964. detect_early_fail(common, common->start, &private_data_size, 0, 0, TRUE);
  11965. set_private_data_ptrs(common, &private_data_size, ccend);
  11966. SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
  11967. if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
  11968. {
  11969. SLJIT_FREE(common->private_data_ptrs, allocator_data);
  11970. SLJIT_FREE(common->optimized_cbracket, allocator_data);
  11971. return PCRE2_ERROR_NOMEMORY;
  11972. }
  11973. if (common->has_then)
  11974. {
  11975. common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
  11976. memset(common->then_offsets, 0, total_length);
  11977. set_then_offsets(common, common->start, NULL);
  11978. }
  11979. compiler = sljit_create_compiler(allocator_data, NULL);
  11980. if (!compiler)
  11981. {
  11982. SLJIT_FREE(common->optimized_cbracket, allocator_data);
  11983. SLJIT_FREE(common->private_data_ptrs, allocator_data);
  11984. return PCRE2_ERROR_NOMEMORY;
  11985. }
  11986. common->compiler = compiler;
  11987. /* Main pcre_jit_exec entry. */
  11988. sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
  11989. /* Register init. */
  11990. reset_ovector(common, (re->top_bracket + 1) * 2);
  11991. if (common->req_char_ptr != 0)
  11992. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
  11993. OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
  11994. OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
  11995. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
  11996. OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
  11997. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
  11998. OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
  11999. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
  12000. OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
  12001. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
  12002. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
  12003. if (common->early_fail_start_ptr < common->early_fail_end_ptr)
  12004. reset_early_fail(common);
  12005. if (mode == PCRE2_JIT_PARTIAL_SOFT)
  12006. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
  12007. if (common->mark_ptr != 0)
  12008. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
  12009. if (common->control_head_ptr != 0)
  12010. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
  12011. /* Main part of the matching */
  12012. if ((re->overall_options & PCRE2_ANCHORED) == 0)
  12013. {
  12014. mainloop_label = mainloop_entry(common);
  12015. continue_match_label = LABEL();
  12016. /* Forward search if possible. */
  12017. if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
  12018. {
  12019. if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
  12020. ;
  12021. else if ((re->flags & PCRE2_FIRSTSET) != 0)
  12022. fast_forward_first_char(common);
  12023. else if ((re->flags & PCRE2_STARTLINE) != 0)
  12024. fast_forward_newline(common);
  12025. else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
  12026. fast_forward_start_bits(common);
  12027. }
  12028. }
  12029. else
  12030. continue_match_label = LABEL();
  12031. if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
  12032. {
  12033. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
  12034. OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
  12035. minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
  12036. }
  12037. if (common->req_char_ptr != 0)
  12038. reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
  12039. /* Store the current STR_PTR in OVECTOR(0). */
  12040. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
  12041. /* Copy the limit of allowed recursions. */
  12042. OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
  12043. if (common->capture_last_ptr != 0)
  12044. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
  12045. if (common->fast_forward_bc_ptr != NULL)
  12046. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
  12047. if (common->start_ptr != OVECTOR(0))
  12048. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
  12049. /* Copy the beginning of the string. */
  12050. if (mode == PCRE2_JIT_PARTIAL_SOFT)
  12051. {
  12052. jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
  12053. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
  12054. JUMPHERE(jump);
  12055. }
  12056. else if (mode == PCRE2_JIT_PARTIAL_HARD)
  12057. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
  12058. compile_matchingpath(common, common->start, ccend, &rootbacktrack);
  12059. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  12060. {
  12061. sljit_free_compiler(compiler);
  12062. SLJIT_FREE(common->optimized_cbracket, allocator_data);
  12063. SLJIT_FREE(common->private_data_ptrs, allocator_data);
  12064. PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
  12065. return PCRE2_ERROR_NOMEMORY;
  12066. }
  12067. if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
  12068. end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
  12069. if (common->might_be_empty)
  12070. {
  12071. empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
  12072. empty_match_found_label = LABEL();
  12073. }
  12074. common->accept_label = LABEL();
  12075. if (common->accept != NULL)
  12076. set_jumps(common->accept, common->accept_label);
  12077. /* This means we have a match. Update the ovector. */
  12078. copy_ovector(common, re->top_bracket + 1);
  12079. common->quit_label = common->abort_label = LABEL();
  12080. if (common->quit != NULL)
  12081. set_jumps(common->quit, common->quit_label);
  12082. if (common->abort != NULL)
  12083. set_jumps(common->abort, common->abort_label);
  12084. if (minlength_check_failed != NULL)
  12085. SET_LABEL(minlength_check_failed, common->abort_label);
  12086. sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
  12087. sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
  12088. if (common->failed_match != NULL)
  12089. {
  12090. SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
  12091. set_jumps(common->failed_match, LABEL());
  12092. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
  12093. JUMPTO(SLJIT_JUMP, common->abort_label);
  12094. }
  12095. if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
  12096. JUMPHERE(end_anchor_failed);
  12097. if (mode != PCRE2_JIT_COMPLETE)
  12098. {
  12099. common->partialmatchlabel = LABEL();
  12100. set_jumps(common->partialmatch, common->partialmatchlabel);
  12101. return_with_partial_match(common, common->quit_label);
  12102. }
  12103. if (common->might_be_empty)
  12104. empty_match_backtrack_label = LABEL();
  12105. compile_backtrackingpath(common, rootbacktrack.top);
  12106. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  12107. {
  12108. sljit_free_compiler(compiler);
  12109. SLJIT_FREE(common->optimized_cbracket, allocator_data);
  12110. SLJIT_FREE(common->private_data_ptrs, allocator_data);
  12111. PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
  12112. return PCRE2_ERROR_NOMEMORY;
  12113. }
  12114. SLJIT_ASSERT(rootbacktrack.prev == NULL);
  12115. reset_match_label = LABEL();
  12116. if (mode == PCRE2_JIT_PARTIAL_SOFT)
  12117. {
  12118. /* Update hit_start only in the first time. */
  12119. jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
  12120. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
  12121. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
  12122. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
  12123. JUMPHERE(jump);
  12124. }
  12125. /* Check we have remaining characters. */
  12126. if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
  12127. {
  12128. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
  12129. }
  12130. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
  12131. (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
  12132. if ((re->overall_options & PCRE2_ANCHORED) == 0)
  12133. {
  12134. if (common->ff_newline_shortcut != NULL)
  12135. {
  12136. /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
  12137. if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
  12138. {
  12139. if (common->match_end_ptr != 0)
  12140. {
  12141. OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
  12142. OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
  12143. CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
  12144. OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
  12145. }
  12146. else
  12147. CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
  12148. }
  12149. }
  12150. else
  12151. CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
  12152. }
  12153. /* No more remaining characters. */
  12154. if (reqcu_not_found != NULL)
  12155. set_jumps(reqcu_not_found, LABEL());
  12156. if (mode == PCRE2_JIT_PARTIAL_SOFT)
  12157. CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
  12158. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
  12159. JUMPTO(SLJIT_JUMP, common->quit_label);
  12160. flush_stubs(common);
  12161. if (common->might_be_empty)
  12162. {
  12163. JUMPHERE(empty_match);
  12164. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  12165. OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
  12166. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
  12167. JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
  12168. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
  12169. JUMPTO(SLJIT_ZERO, empty_match_found_label);
  12170. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
  12171. CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
  12172. JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
  12173. }
  12174. common->fast_forward_bc_ptr = NULL;
  12175. common->early_fail_start_ptr = 0;
  12176. common->early_fail_end_ptr = 0;
  12177. common->currententry = common->entries;
  12178. common->local_quit_available = TRUE;
  12179. quit_label = common->quit_label;
  12180. while (common->currententry != NULL)
  12181. {
  12182. /* Might add new entries. */
  12183. compile_recurse(common);
  12184. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  12185. {
  12186. sljit_free_compiler(compiler);
  12187. SLJIT_FREE(common->optimized_cbracket, allocator_data);
  12188. SLJIT_FREE(common->private_data_ptrs, allocator_data);
  12189. PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
  12190. return PCRE2_ERROR_NOMEMORY;
  12191. }
  12192. flush_stubs(common);
  12193. common->currententry = common->currententry->next;
  12194. }
  12195. common->local_quit_available = FALSE;
  12196. common->quit_label = quit_label;
  12197. /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
  12198. /* This is a (really) rare case. */
  12199. set_jumps(common->stackalloc, LABEL());
  12200. /* RETURN_ADDR is not a saved register. */
  12201. sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  12202. SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
  12203. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
  12204. OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
  12205. OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
  12206. OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
  12207. OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
  12208. sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
  12209. jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
  12210. OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
  12211. OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
  12212. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  12213. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
  12214. OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
  12215. /* Allocation failed. */
  12216. JUMPHERE(jump);
  12217. /* We break the return address cache here, but this is a really rare case. */
  12218. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
  12219. JUMPTO(SLJIT_JUMP, common->quit_label);
  12220. /* Call limit reached. */
  12221. set_jumps(common->calllimit, LABEL());
  12222. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
  12223. JUMPTO(SLJIT_JUMP, common->quit_label);
  12224. if (common->revertframes != NULL)
  12225. {
  12226. set_jumps(common->revertframes, LABEL());
  12227. do_revertframes(common);
  12228. }
  12229. if (common->wordboundary != NULL)
  12230. {
  12231. set_jumps(common->wordboundary, LABEL());
  12232. check_wordboundary(common);
  12233. }
  12234. if (common->anynewline != NULL)
  12235. {
  12236. set_jumps(common->anynewline, LABEL());
  12237. check_anynewline(common);
  12238. }
  12239. if (common->hspace != NULL)
  12240. {
  12241. set_jumps(common->hspace, LABEL());
  12242. check_hspace(common);
  12243. }
  12244. if (common->vspace != NULL)
  12245. {
  12246. set_jumps(common->vspace, LABEL());
  12247. check_vspace(common);
  12248. }
  12249. if (common->casefulcmp != NULL)
  12250. {
  12251. set_jumps(common->casefulcmp, LABEL());
  12252. do_casefulcmp(common);
  12253. }
  12254. if (common->caselesscmp != NULL)
  12255. {
  12256. set_jumps(common->caselesscmp, LABEL());
  12257. do_caselesscmp(common);
  12258. }
  12259. if (common->reset_match != NULL)
  12260. {
  12261. set_jumps(common->reset_match, LABEL());
  12262. do_reset_match(common, (re->top_bracket + 1) * 2);
  12263. CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
  12264. OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
  12265. JUMPTO(SLJIT_JUMP, reset_match_label);
  12266. }
  12267. #ifdef SUPPORT_UNICODE
  12268. #if PCRE2_CODE_UNIT_WIDTH == 8
  12269. if (common->utfreadchar != NULL)
  12270. {
  12271. set_jumps(common->utfreadchar, LABEL());
  12272. do_utfreadchar(common);
  12273. }
  12274. if (common->utfreadtype8 != NULL)
  12275. {
  12276. set_jumps(common->utfreadtype8, LABEL());
  12277. do_utfreadtype8(common);
  12278. }
  12279. if (common->utfpeakcharback != NULL)
  12280. {
  12281. set_jumps(common->utfpeakcharback, LABEL());
  12282. do_utfpeakcharback(common);
  12283. }
  12284. #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
  12285. #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
  12286. if (common->utfreadchar_invalid != NULL)
  12287. {
  12288. set_jumps(common->utfreadchar_invalid, LABEL());
  12289. do_utfreadchar_invalid(common);
  12290. }
  12291. if (common->utfreadnewline_invalid != NULL)
  12292. {
  12293. set_jumps(common->utfreadnewline_invalid, LABEL());
  12294. do_utfreadnewline_invalid(common);
  12295. }
  12296. if (common->utfmoveback_invalid)
  12297. {
  12298. set_jumps(common->utfmoveback_invalid, LABEL());
  12299. do_utfmoveback_invalid(common);
  12300. }
  12301. if (common->utfpeakcharback_invalid)
  12302. {
  12303. set_jumps(common->utfpeakcharback_invalid, LABEL());
  12304. do_utfpeakcharback_invalid(common);
  12305. }
  12306. #endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
  12307. if (common->getucd != NULL)
  12308. {
  12309. set_jumps(common->getucd, LABEL());
  12310. do_getucd(common);
  12311. }
  12312. if (common->getucdtype != NULL)
  12313. {
  12314. set_jumps(common->getucdtype, LABEL());
  12315. do_getucdtype(common);
  12316. }
  12317. #endif /* SUPPORT_UNICODE */
  12318. SLJIT_FREE(common->optimized_cbracket, allocator_data);
  12319. SLJIT_FREE(common->private_data_ptrs, allocator_data);
  12320. executable_func = sljit_generate_code(compiler);
  12321. executable_size = sljit_get_generated_code_size(compiler);
  12322. sljit_free_compiler(compiler);
  12323. if (executable_func == NULL)
  12324. {
  12325. PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
  12326. return PCRE2_ERROR_NOMEMORY;
  12327. }
  12328. /* Reuse the function descriptor if possible. */
  12329. if (re->executable_jit != NULL)
  12330. functions = (executable_functions *)re->executable_jit;
  12331. else
  12332. {
  12333. functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
  12334. if (functions == NULL)
  12335. {
  12336. /* This case is highly unlikely since we just recently
  12337. freed a lot of memory. Not impossible though. */
  12338. sljit_free_code(executable_func, NULL);
  12339. PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
  12340. return PCRE2_ERROR_NOMEMORY;
  12341. }
  12342. memset(functions, 0, sizeof(executable_functions));
  12343. functions->top_bracket = re->top_bracket + 1;
  12344. functions->limit_match = re->limit_match;
  12345. re->executable_jit = functions;
  12346. }
  12347. /* Turn mode into an index. */
  12348. if (mode == PCRE2_JIT_COMPLETE)
  12349. mode = 0;
  12350. else
  12351. mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
  12352. SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
  12353. functions->executable_funcs[mode] = executable_func;
  12354. functions->read_only_data_heads[mode] = common->read_only_data_head;
  12355. functions->executable_sizes[mode] = executable_size;
  12356. return 0;
  12357. }
  12358. #endif
  12359. /*************************************************
  12360. * JIT compile a Regular Expression *
  12361. *************************************************/
  12362. /* This function used JIT to convert a previously-compiled pattern into machine
  12363. code.
  12364. Arguments:
  12365. code a compiled pattern
  12366. options JIT option bits
  12367. Returns: 0: success or (*NOJIT) was used
  12368. <0: an error code
  12369. */
  12370. #define PUBLIC_JIT_COMPILE_OPTIONS \
  12371. (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
  12372. PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
  12373. pcre2_jit_compile(pcre2_code *code, uint32_t options)
  12374. {
  12375. pcre2_real_code *re = (pcre2_real_code *)code;
  12376. #ifdef SUPPORT_JIT
  12377. executable_functions *functions;
  12378. static int executable_allocator_is_working = 0;
  12379. #endif
  12380. if (code == NULL)
  12381. return PCRE2_ERROR_NULL;
  12382. if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
  12383. return PCRE2_ERROR_JIT_BADOPTION;
  12384. /* Support for invalid UTF was first introduced in JIT, with the option
  12385. PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
  12386. compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
  12387. preferred feature, with the earlier option deprecated. However, for backward
  12388. compatibility, if the earlier option is set, it forces the new option so that
  12389. if JIT matching falls back to the interpreter, there is still support for
  12390. invalid UTF. However, if this function has already been successfully called
  12391. without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
  12392. non-invalid-supporting JIT code was compiled), give an error.
  12393. If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
  12394. actions are needed:
  12395. 1. Remove the definition from pcre2.h.in and from the list in
  12396. PUBLIC_JIT_COMPILE_OPTIONS above.
  12397. 2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
  12398. 3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
  12399. 4. Delete the following short block of code. The setting of "re" and
  12400. "functions" can be moved into the JIT-only block below, but if that is
  12401. done, (void)re and (void)functions will be needed in the non-JIT case, to
  12402. avoid compiler warnings.
  12403. */
  12404. #ifdef SUPPORT_JIT
  12405. functions = (executable_functions *)re->executable_jit;
  12406. #endif
  12407. if ((options & PCRE2_JIT_INVALID_UTF) != 0)
  12408. {
  12409. if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
  12410. {
  12411. #ifdef SUPPORT_JIT
  12412. if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
  12413. #endif
  12414. re->overall_options |= PCRE2_MATCH_INVALID_UTF;
  12415. }
  12416. }
  12417. /* The above tests are run with and without JIT support. This means that
  12418. PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
  12419. interpreter support) even in the absence of JIT. But now, if there is no JIT
  12420. support, give an error return. */
  12421. #ifndef SUPPORT_JIT
  12422. return PCRE2_ERROR_JIT_BADOPTION;
  12423. #else /* SUPPORT_JIT */
  12424. /* There is JIT support. Do the necessary. */
  12425. if ((re->flags & PCRE2_NOJIT) != 0) return 0;
  12426. if (executable_allocator_is_working == 0)
  12427. {
  12428. /* Checks whether the executable allocator is working. This check
  12429. might run multiple times in multi-threaded environments, but the
  12430. result should not be affected by it. */
  12431. void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
  12432. executable_allocator_is_working = -1;
  12433. if (ptr != NULL)
  12434. {
  12435. SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
  12436. executable_allocator_is_working = 1;
  12437. }
  12438. }
  12439. if (executable_allocator_is_working < 0)
  12440. return PCRE2_ERROR_NOMEMORY;
  12441. if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
  12442. options |= PCRE2_JIT_INVALID_UTF;
  12443. if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
  12444. || functions->executable_funcs[0] == NULL)) {
  12445. uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
  12446. int result = jit_compile(code, options & ~excluded_options);
  12447. if (result != 0)
  12448. return result;
  12449. }
  12450. if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
  12451. || functions->executable_funcs[1] == NULL)) {
  12452. uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
  12453. int result = jit_compile(code, options & ~excluded_options);
  12454. if (result != 0)
  12455. return result;
  12456. }
  12457. if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
  12458. || functions->executable_funcs[2] == NULL)) {
  12459. uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
  12460. int result = jit_compile(code, options & ~excluded_options);
  12461. if (result != 0)
  12462. return result;
  12463. }
  12464. return 0;
  12465. #endif /* SUPPORT_JIT */
  12466. }
  12467. /* JIT compiler uses an all-in-one approach. This improves security,
  12468. since the code generator functions are not exported. */
  12469. #define INCLUDED_FROM_PCRE2_JIT_COMPILE
  12470. #include "pcre2_jit_match.c"
  12471. #include "pcre2_jit_misc.c"
  12472. /* End of pcre2_jit_compile.c */