pcretest.c 170 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773
  1. /*************************************************
  2. * PCRE testing program *
  3. *************************************************/
  4. /* This program was hacked up as a tester for PCRE. I really should have
  5. written it more tidily in the first place. Will I ever learn? It has grown and
  6. been extended and consequently is now rather, er, *very* untidy in places. The
  7. addition of 16-bit support has made it even worse. :-(
  8. -----------------------------------------------------------------------------
  9. Redistribution and use in source and binary forms, with or without
  10. modification, are permitted provided that the following conditions are met:
  11. * Redistributions of source code must retain the above copyright notice,
  12. this list of conditions and the following disclaimer.
  13. * Redistributions in binary form must reproduce the above copyright
  14. notice, this list of conditions and the following disclaimer in the
  15. documentation and/or other materials provided with the distribution.
  16. * Neither the name of the University of Cambridge nor the names of its
  17. contributors may be used to endorse or promote products derived from
  18. this software without specific prior written permission.
  19. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  20. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  23. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24. CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25. SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26. INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27. CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28. ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29. POSSIBILITY OF SUCH DAMAGE.
  30. -----------------------------------------------------------------------------
  31. */
  32. /* This program now supports the testing of all of the 8-bit, 16-bit, and
  33. 32-bit PCRE libraries in a single program. This is different from the modules
  34. such as pcre_compile.c in the library itself, which are compiled separately for
  35. each mode. If two modes are enabled, for example, pcre_compile.c is compiled
  36. twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
  37. make use of any of the macros from pcre_internal.h that depend on
  38. COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
  39. SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
  40. supported library functions. */
  41. #ifdef HAVE_CONFIG_H
  42. #include "config.h"
  43. #endif
  44. #include <ctype.h>
  45. #include <stdio.h>
  46. #include <string.h>
  47. #include <stdlib.h>
  48. #include <time.h>
  49. #include <locale.h>
  50. #include <errno.h>
  51. /* Both libreadline and libedit are optionally supported. The user-supplied
  52. original patch uses readline/readline.h for libedit, but in at least one system
  53. it is installed as editline/readline.h, so the configuration code now looks for
  54. that first, falling back to readline/readline.h. */
  55. #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
  56. #ifdef HAVE_UNISTD_H
  57. #include <unistd.h>
  58. #endif
  59. #if defined(SUPPORT_LIBREADLINE)
  60. #include <readline/readline.h>
  61. #include <readline/history.h>
  62. #else
  63. #if defined(HAVE_EDITLINE_READLINE_H)
  64. #include <editline/readline.h>
  65. #else
  66. #include <readline/readline.h>
  67. #endif
  68. #endif
  69. #endif
  70. /* A number of things vary for Windows builds. Originally, pcretest opened its
  71. input and output without "b"; then I was told that "b" was needed in some
  72. environments, so it was added for release 5.0 to both the input and output. (It
  73. makes no difference on Unix-like systems.) Later I was told that it is wrong
  74. for the input on Windows. I've now abstracted the modes into two macros that
  75. are set here, to make it easier to fiddle with them, and removed "b" from the
  76. input mode under Windows. */
  77. #if defined(_WIN32) || defined(WIN32)
  78. #include <io.h> /* For _setmode() */
  79. #include <fcntl.h> /* For _O_BINARY */
  80. #define INPUT_MODE "r"
  81. #define OUTPUT_MODE "wb"
  82. #ifndef isatty
  83. #define isatty _isatty /* This is what Windows calls them, I'm told, */
  84. #endif /* though in some environments they seem to */
  85. /* be already defined, hence the #ifndefs. */
  86. #ifndef fileno
  87. #define fileno _fileno
  88. #endif
  89. /* A user sent this fix for Borland Builder 5 under Windows. */
  90. #ifdef __BORLANDC__
  91. #define _setmode(handle, mode) setmode(handle, mode)
  92. #endif
  93. /* Not Windows */
  94. #else
  95. #include <sys/time.h> /* These two includes are needed */
  96. #include <sys/resource.h> /* for setrlimit(). */
  97. #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
  98. #define INPUT_MODE "r"
  99. #define OUTPUT_MODE "w"
  100. #else
  101. #define INPUT_MODE "rb"
  102. #define OUTPUT_MODE "wb"
  103. #endif
  104. #endif
  105. #ifdef __VMS
  106. #include <ssdef.h>
  107. void vms_setsymbol( char *, char *, int );
  108. #endif
  109. #define PRIV(name) name
  110. /* We have to include pcre_internal.h because we need the internal info for
  111. displaying the results of pcre_study() and we also need to know about the
  112. internal macros, structures, and other internal data values; pcretest has
  113. "inside information" compared to a program that strictly follows the PCRE API.
  114. Although pcre_internal.h does itself include pcre.h, we explicitly include it
  115. here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
  116. appropriately for an application, not for building PCRE. */
  117. #include "pcre.h"
  118. #include "pcre_internal.h"
  119. /* The pcre_printint() function, which prints the internal form of a compiled
  120. regex, is held in a separate file so that (a) it can be compiled in either
  121. 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
  122. when that is compiled in debug mode. */
  123. #ifdef SUPPORT_PCRE8
  124. void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
  125. #endif
  126. #ifdef SUPPORT_PCRE16
  127. void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
  128. #endif
  129. #ifdef SUPPORT_PCRE32
  130. void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
  131. #endif
  132. /* We need access to some of the data tables that PCRE uses. So as not to have
  133. to keep two copies, we include the source files here, changing the names of the
  134. external symbols to prevent clashes. */
  135. #define PCRE_INCLUDED
  136. #include "pcre_tables.c"
  137. #include "pcre_ucd.c"
  138. /* The definition of the macro PRINTABLE, which determines whether to print an
  139. output character as-is or as a hex value when showing compiled patterns, is
  140. the same as in the printint.src file. We uses it here in cases when the locale
  141. has not been explicitly changed, so as to get consistent output from systems
  142. that differ in their output from isprint() even in the "C" locale. */
  143. #ifdef EBCDIC
  144. #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
  145. #else
  146. #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
  147. #endif
  148. #define PRINTOK(c) (locale_set? (((c) < 256) && isprint(c)) : PRINTABLE(c))
  149. /* Posix support is disabled in 16 or 32 bit only mode. */
  150. #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
  151. #define NOPOSIX
  152. #endif
  153. /* It is possible to compile this test program without including support for
  154. testing the POSIX interface, though this is not available via the standard
  155. Makefile. */
  156. #if !defined NOPOSIX
  157. #include "pcreposix.h"
  158. #endif
  159. /* It is also possible, originally for the benefit of a version that was
  160. imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
  161. NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
  162. automatically cut out the UTF support if PCRE is built without it. */
  163. #ifndef SUPPORT_UTF
  164. #ifndef NOUTF
  165. #define NOUTF
  166. #endif
  167. #endif
  168. /* To make the code a bit tidier for 8/16/32-bit support, we define macros
  169. for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
  170. only from one place and is handled differently). I couldn't dream up any way of
  171. using a single macro to do this in a generic way, because of the many different
  172. argument requirements. We know that at least one of SUPPORT_PCRE8 and
  173. SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
  174. use these in the definitions of generic macros.
  175. **** Special note about the PCHARSxxx macros: the address of the string to be
  176. printed is always given as two arguments: a base address followed by an offset.
  177. The base address is cast to the correct data size for 8 or 16 bit data; the
  178. offset is in units of this size. If the string were given as base+offset in one
  179. argument, the casting might be incorrectly applied. */
  180. #ifdef SUPPORT_PCRE8
  181. #define PCHARS8(lv, p, offset, len, f) \
  182. lv = pchars((pcre_uint8 *)(p) + offset, len, f)
  183. #define PCHARSV8(p, offset, len, f) \
  184. (void)pchars((pcre_uint8 *)(p) + offset, len, f)
  185. #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
  186. p = read_capture_name8(p, cn8, re)
  187. #define STRLEN8(p) ((int)strlen((char *)p))
  188. #define SET_PCRE_CALLOUT8(callout) \
  189. pcre_callout = callout
  190. #define SET_PCRE_STACK_GUARD8(stack_guard) \
  191. pcre_stack_guard = stack_guard
  192. #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
  193. pcre_assign_jit_stack(extra, callback, userdata)
  194. #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
  195. re = pcre_compile((char *)pat, options, error, erroffset, tables)
  196. #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
  197. namesptr, cbuffer, size) \
  198. rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
  199. (char *)namesptr, cbuffer, size)
  200. #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
  201. rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
  202. #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
  203. offsets, size_offsets, workspace, size_workspace) \
  204. count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
  205. offsets, size_offsets, workspace, size_workspace)
  206. #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
  207. offsets, size_offsets) \
  208. count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
  209. offsets, size_offsets)
  210. #define PCRE_FREE_STUDY8(extra) \
  211. pcre_free_study(extra)
  212. #define PCRE_FREE_SUBSTRING8(substring) \
  213. pcre_free_substring(substring)
  214. #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
  215. pcre_free_substring_list(listptr)
  216. #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
  217. getnamesptr, subsptr) \
  218. rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
  219. (char *)getnamesptr, subsptr)
  220. #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
  221. n = pcre_get_stringnumber(re, (char *)ptr)
  222. #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
  223. rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
  224. #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
  225. rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
  226. #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
  227. rc = pcre_pattern_to_host_byte_order(re, extra, tables)
  228. #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
  229. pcre_printint(re, outfile, debug_lengths)
  230. #define PCRE_STUDY8(extra, re, options, error) \
  231. extra = pcre_study(re, options, error)
  232. #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
  233. pcre_jit_stack_alloc(startsize, maxsize)
  234. #define PCRE_JIT_STACK_FREE8(stack) \
  235. pcre_jit_stack_free(stack)
  236. #define pcre8_maketables pcre_maketables
  237. #endif /* SUPPORT_PCRE8 */
  238. /* -----------------------------------------------------------*/
  239. #ifdef SUPPORT_PCRE16
  240. #define PCHARS16(lv, p, offset, len, f) \
  241. lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
  242. #define PCHARSV16(p, offset, len, f) \
  243. (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
  244. #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
  245. p = read_capture_name16(p, cn16, re)
  246. #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
  247. #define SET_PCRE_CALLOUT16(callout) \
  248. pcre16_callout = (int (*)(pcre16_callout_block *))callout
  249. #define SET_PCRE_STACK_GUARD16(stack_guard) \
  250. pcre16_stack_guard = (int (*)(void))stack_guard
  251. #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
  252. pcre16_assign_jit_stack((pcre16_extra *)extra, \
  253. (pcre16_jit_callback)callback, userdata)
  254. #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
  255. re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
  256. tables)
  257. #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
  258. namesptr, cbuffer, size) \
  259. rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
  260. count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
  261. #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
  262. rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
  263. (PCRE_UCHAR16 *)cbuffer, size/2)
  264. #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
  265. offsets, size_offsets, workspace, size_workspace) \
  266. count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
  267. (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
  268. workspace, size_workspace)
  269. #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
  270. offsets, size_offsets) \
  271. count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
  272. len, start_offset, options, offsets, size_offsets)
  273. #define PCRE_FREE_STUDY16(extra) \
  274. pcre16_free_study((pcre16_extra *)extra)
  275. #define PCRE_FREE_SUBSTRING16(substring) \
  276. pcre16_free_substring((PCRE_SPTR16)substring)
  277. #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
  278. pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
  279. #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
  280. getnamesptr, subsptr) \
  281. rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
  282. count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
  283. #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
  284. n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
  285. #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
  286. rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
  287. (PCRE_SPTR16 *)(void*)subsptr)
  288. #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
  289. rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
  290. (PCRE_SPTR16 **)(void*)listptr)
  291. #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
  292. rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
  293. tables)
  294. #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
  295. pcre16_printint(re, outfile, debug_lengths)
  296. #define PCRE_STUDY16(extra, re, options, error) \
  297. extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
  298. #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
  299. (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
  300. #define PCRE_JIT_STACK_FREE16(stack) \
  301. pcre16_jit_stack_free((pcre16_jit_stack *)stack)
  302. #endif /* SUPPORT_PCRE16 */
  303. /* -----------------------------------------------------------*/
  304. #ifdef SUPPORT_PCRE32
  305. #define PCHARS32(lv, p, offset, len, f) \
  306. lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
  307. #define PCHARSV32(p, offset, len, f) \
  308. (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
  309. #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
  310. p = read_capture_name32(p, cn32, re)
  311. #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
  312. #define SET_PCRE_CALLOUT32(callout) \
  313. pcre32_callout = (int (*)(pcre32_callout_block *))callout
  314. #define SET_PCRE_STACK_GUARD32(stack_guard) \
  315. pcre32_stack_guard = (int (*)(void))stack_guard
  316. #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
  317. pcre32_assign_jit_stack((pcre32_extra *)extra, \
  318. (pcre32_jit_callback)callback, userdata)
  319. #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
  320. re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
  321. tables)
  322. #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
  323. namesptr, cbuffer, size) \
  324. rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
  325. count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/4)
  326. #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
  327. rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
  328. (PCRE_UCHAR32 *)cbuffer, size/4)
  329. #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
  330. offsets, size_offsets, workspace, size_workspace) \
  331. count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
  332. (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
  333. workspace, size_workspace)
  334. #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
  335. offsets, size_offsets) \
  336. count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
  337. len, start_offset, options, offsets, size_offsets)
  338. #define PCRE_FREE_STUDY32(extra) \
  339. pcre32_free_study((pcre32_extra *)extra)
  340. #define PCRE_FREE_SUBSTRING32(substring) \
  341. pcre32_free_substring((PCRE_SPTR32)substring)
  342. #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
  343. pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
  344. #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
  345. getnamesptr, subsptr) \
  346. rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
  347. count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
  348. #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
  349. n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
  350. #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
  351. rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
  352. (PCRE_SPTR32 *)(void*)subsptr)
  353. #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
  354. rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
  355. (PCRE_SPTR32 **)(void*)listptr)
  356. #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
  357. rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
  358. tables)
  359. #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
  360. pcre32_printint(re, outfile, debug_lengths)
  361. #define PCRE_STUDY32(extra, re, options, error) \
  362. extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
  363. #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
  364. (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
  365. #define PCRE_JIT_STACK_FREE32(stack) \
  366. pcre32_jit_stack_free((pcre32_jit_stack *)stack)
  367. #endif /* SUPPORT_PCRE32 */
  368. /* ----- More than one mode is supported; a runtime test is needed, except for
  369. pcre_config(), and the JIT stack functions, when it doesn't matter which
  370. available version is called. ----- */
  371. enum {
  372. PCRE8_MODE,
  373. PCRE16_MODE,
  374. PCRE32_MODE
  375. };
  376. #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
  377. defined (SUPPORT_PCRE32)) >= 2
  378. #define CHAR_SIZE (1 << pcre_mode)
  379. /* There doesn't seem to be an easy way of writing these macros that can cope
  380. with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
  381. cases separately. */
  382. /* ----- All three modes supported ----- */
  383. #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
  384. #define PCHARS(lv, p, offset, len, f) \
  385. if (pcre_mode == PCRE32_MODE) \
  386. PCHARS32(lv, p, offset, len, f); \
  387. else if (pcre_mode == PCRE16_MODE) \
  388. PCHARS16(lv, p, offset, len, f); \
  389. else \
  390. PCHARS8(lv, p, offset, len, f)
  391. #define PCHARSV(p, offset, len, f) \
  392. if (pcre_mode == PCRE32_MODE) \
  393. PCHARSV32(p, offset, len, f); \
  394. else if (pcre_mode == PCRE16_MODE) \
  395. PCHARSV16(p, offset, len, f); \
  396. else \
  397. PCHARSV8(p, offset, len, f)
  398. #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
  399. if (pcre_mode == PCRE32_MODE) \
  400. READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
  401. else if (pcre_mode == PCRE16_MODE) \
  402. READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
  403. else \
  404. READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
  405. #define SET_PCRE_CALLOUT(callout) \
  406. if (pcre_mode == PCRE32_MODE) \
  407. SET_PCRE_CALLOUT32(callout); \
  408. else if (pcre_mode == PCRE16_MODE) \
  409. SET_PCRE_CALLOUT16(callout); \
  410. else \
  411. SET_PCRE_CALLOUT8(callout)
  412. #define SET_PCRE_STACK_GUARD(stack_guard) \
  413. if (pcre_mode == PCRE32_MODE) \
  414. SET_PCRE_STACK_GUARD32(stack_guard); \
  415. else if (pcre_mode == PCRE16_MODE) \
  416. SET_PCRE_STACK_GUARD16(stack_guard); \
  417. else \
  418. SET_PCRE_STACK_GUARD8(stack_guard)
  419. #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
  420. #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
  421. if (pcre_mode == PCRE32_MODE) \
  422. PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
  423. else if (pcre_mode == PCRE16_MODE) \
  424. PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
  425. else \
  426. PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
  427. #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
  428. if (pcre_mode == PCRE32_MODE) \
  429. PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
  430. else if (pcre_mode == PCRE16_MODE) \
  431. PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
  432. else \
  433. PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
  434. #define PCRE_CONFIG pcre_config
  435. #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
  436. namesptr, cbuffer, size) \
  437. if (pcre_mode == PCRE32_MODE) \
  438. PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
  439. namesptr, cbuffer, size); \
  440. else if (pcre_mode == PCRE16_MODE) \
  441. PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
  442. namesptr, cbuffer, size); \
  443. else \
  444. PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
  445. namesptr, cbuffer, size)
  446. #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
  447. if (pcre_mode == PCRE32_MODE) \
  448. PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
  449. else if (pcre_mode == PCRE16_MODE) \
  450. PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
  451. else \
  452. PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
  453. #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
  454. offsets, size_offsets, workspace, size_workspace) \
  455. if (pcre_mode == PCRE32_MODE) \
  456. PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
  457. offsets, size_offsets, workspace, size_workspace); \
  458. else if (pcre_mode == PCRE16_MODE) \
  459. PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
  460. offsets, size_offsets, workspace, size_workspace); \
  461. else \
  462. PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
  463. offsets, size_offsets, workspace, size_workspace)
  464. #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
  465. offsets, size_offsets) \
  466. if (pcre_mode == PCRE32_MODE) \
  467. PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
  468. offsets, size_offsets); \
  469. else if (pcre_mode == PCRE16_MODE) \
  470. PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
  471. offsets, size_offsets); \
  472. else \
  473. PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
  474. offsets, size_offsets)
  475. #define PCRE_FREE_STUDY(extra) \
  476. if (pcre_mode == PCRE32_MODE) \
  477. PCRE_FREE_STUDY32(extra); \
  478. else if (pcre_mode == PCRE16_MODE) \
  479. PCRE_FREE_STUDY16(extra); \
  480. else \
  481. PCRE_FREE_STUDY8(extra)
  482. #define PCRE_FREE_SUBSTRING(substring) \
  483. if (pcre_mode == PCRE32_MODE) \
  484. PCRE_FREE_SUBSTRING32(substring); \
  485. else if (pcre_mode == PCRE16_MODE) \
  486. PCRE_FREE_SUBSTRING16(substring); \
  487. else \
  488. PCRE_FREE_SUBSTRING8(substring)
  489. #define PCRE_FREE_SUBSTRING_LIST(listptr) \
  490. if (pcre_mode == PCRE32_MODE) \
  491. PCRE_FREE_SUBSTRING_LIST32(listptr); \
  492. else if (pcre_mode == PCRE16_MODE) \
  493. PCRE_FREE_SUBSTRING_LIST16(listptr); \
  494. else \
  495. PCRE_FREE_SUBSTRING_LIST8(listptr)
  496. #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
  497. getnamesptr, subsptr) \
  498. if (pcre_mode == PCRE32_MODE) \
  499. PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
  500. getnamesptr, subsptr); \
  501. else if (pcre_mode == PCRE16_MODE) \
  502. PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
  503. getnamesptr, subsptr); \
  504. else \
  505. PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
  506. getnamesptr, subsptr)
  507. #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
  508. if (pcre_mode == PCRE32_MODE) \
  509. PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
  510. else if (pcre_mode == PCRE16_MODE) \
  511. PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
  512. else \
  513. PCRE_GET_STRINGNUMBER8(n, rc, ptr)
  514. #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
  515. if (pcre_mode == PCRE32_MODE) \
  516. PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
  517. else if (pcre_mode == PCRE16_MODE) \
  518. PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
  519. else \
  520. PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
  521. #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
  522. if (pcre_mode == PCRE32_MODE) \
  523. PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
  524. else if (pcre_mode == PCRE16_MODE) \
  525. PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
  526. else \
  527. PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
  528. #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
  529. (pcre_mode == PCRE32_MODE ? \
  530. PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
  531. : pcre_mode == PCRE16_MODE ? \
  532. PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
  533. : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
  534. #define PCRE_JIT_STACK_FREE(stack) \
  535. if (pcre_mode == PCRE32_MODE) \
  536. PCRE_JIT_STACK_FREE32(stack); \
  537. else if (pcre_mode == PCRE16_MODE) \
  538. PCRE_JIT_STACK_FREE16(stack); \
  539. else \
  540. PCRE_JIT_STACK_FREE8(stack)
  541. #define PCRE_MAKETABLES \
  542. (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
  543. #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
  544. if (pcre_mode == PCRE32_MODE) \
  545. PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
  546. else if (pcre_mode == PCRE16_MODE) \
  547. PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
  548. else \
  549. PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
  550. #define PCRE_PRINTINT(re, outfile, debug_lengths) \
  551. if (pcre_mode == PCRE32_MODE) \
  552. PCRE_PRINTINT32(re, outfile, debug_lengths); \
  553. else if (pcre_mode == PCRE16_MODE) \
  554. PCRE_PRINTINT16(re, outfile, debug_lengths); \
  555. else \
  556. PCRE_PRINTINT8(re, outfile, debug_lengths)
  557. #define PCRE_STUDY(extra, re, options, error) \
  558. if (pcre_mode == PCRE32_MODE) \
  559. PCRE_STUDY32(extra, re, options, error); \
  560. else if (pcre_mode == PCRE16_MODE) \
  561. PCRE_STUDY16(extra, re, options, error); \
  562. else \
  563. PCRE_STUDY8(extra, re, options, error)
  564. /* ----- Two out of three modes are supported ----- */
  565. #else
  566. /* We can use some macro trickery to make a single set of definitions work in
  567. the three different cases. */
  568. /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
  569. #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
  570. #define BITONE 32
  571. #define BITTWO 16
  572. /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
  573. #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
  574. #define BITONE 32
  575. #define BITTWO 8
  576. /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
  577. #else
  578. #define BITONE 16
  579. #define BITTWO 8
  580. #endif
  581. #define glue(a,b) a##b
  582. #define G(a,b) glue(a,b)
  583. /* ----- Common macros for two-mode cases ----- */
  584. #define PCHARS(lv, p, offset, len, f) \
  585. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  586. G(PCHARS,BITONE)(lv, p, offset, len, f); \
  587. else \
  588. G(PCHARS,BITTWO)(lv, p, offset, len, f)
  589. #define PCHARSV(p, offset, len, f) \
  590. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  591. G(PCHARSV,BITONE)(p, offset, len, f); \
  592. else \
  593. G(PCHARSV,BITTWO)(p, offset, len, f)
  594. #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
  595. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  596. G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
  597. else \
  598. G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
  599. #define SET_PCRE_CALLOUT(callout) \
  600. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  601. G(SET_PCRE_CALLOUT,BITONE)(callout); \
  602. else \
  603. G(SET_PCRE_CALLOUT,BITTWO)(callout)
  604. #define SET_PCRE_STACK_GUARD(stack_guard) \
  605. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  606. G(SET_PCRE_STACK_GUARD,BITONE)(stack_guard); \
  607. else \
  608. G(SET_PCRE_STACK_GUARD,BITTWO)(stack_guard)
  609. #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
  610. G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
  611. #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
  612. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  613. G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
  614. else \
  615. G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
  616. #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
  617. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  618. G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
  619. else \
  620. G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
  621. #define PCRE_CONFIG G(G(pcre,BITONE),_config)
  622. #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
  623. namesptr, cbuffer, size) \
  624. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  625. G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
  626. namesptr, cbuffer, size); \
  627. else \
  628. G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
  629. namesptr, cbuffer, size)
  630. #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
  631. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  632. G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
  633. else \
  634. G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
  635. #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
  636. offsets, size_offsets, workspace, size_workspace) \
  637. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  638. G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
  639. offsets, size_offsets, workspace, size_workspace); \
  640. else \
  641. G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
  642. offsets, size_offsets, workspace, size_workspace)
  643. #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
  644. offsets, size_offsets) \
  645. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  646. G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
  647. offsets, size_offsets); \
  648. else \
  649. G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
  650. offsets, size_offsets)
  651. #define PCRE_FREE_STUDY(extra) \
  652. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  653. G(PCRE_FREE_STUDY,BITONE)(extra); \
  654. else \
  655. G(PCRE_FREE_STUDY,BITTWO)(extra)
  656. #define PCRE_FREE_SUBSTRING(substring) \
  657. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  658. G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
  659. else \
  660. G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
  661. #define PCRE_FREE_SUBSTRING_LIST(listptr) \
  662. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  663. G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
  664. else \
  665. G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
  666. #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
  667. getnamesptr, subsptr) \
  668. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  669. G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
  670. getnamesptr, subsptr); \
  671. else \
  672. G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
  673. getnamesptr, subsptr)
  674. #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
  675. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  676. G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
  677. else \
  678. G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
  679. #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
  680. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  681. G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
  682. else \
  683. G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
  684. #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
  685. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  686. G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
  687. else \
  688. G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
  689. #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
  690. (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
  691. G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
  692. : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
  693. #define PCRE_JIT_STACK_FREE(stack) \
  694. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  695. G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
  696. else \
  697. G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
  698. #define PCRE_MAKETABLES \
  699. (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
  700. G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
  701. #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
  702. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  703. G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
  704. else \
  705. G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
  706. #define PCRE_PRINTINT(re, outfile, debug_lengths) \
  707. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  708. G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
  709. else \
  710. G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
  711. #define PCRE_STUDY(extra, re, options, error) \
  712. if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
  713. G(PCRE_STUDY,BITONE)(extra, re, options, error); \
  714. else \
  715. G(PCRE_STUDY,BITTWO)(extra, re, options, error)
  716. #endif /* Two out of three modes */
  717. /* ----- End of cases where more than one mode is supported ----- */
  718. /* ----- Only 8-bit mode is supported ----- */
  719. #elif defined SUPPORT_PCRE8
  720. #define CHAR_SIZE 1
  721. #define PCHARS PCHARS8
  722. #define PCHARSV PCHARSV8
  723. #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
  724. #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
  725. #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD8
  726. #define STRLEN STRLEN8
  727. #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
  728. #define PCRE_COMPILE PCRE_COMPILE8
  729. #define PCRE_CONFIG pcre_config
  730. #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
  731. #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
  732. #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
  733. #define PCRE_EXEC PCRE_EXEC8
  734. #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
  735. #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
  736. #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
  737. #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
  738. #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
  739. #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
  740. #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
  741. #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
  742. #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
  743. #define PCRE_MAKETABLES pcre_maketables()
  744. #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
  745. #define PCRE_PRINTINT PCRE_PRINTINT8
  746. #define PCRE_STUDY PCRE_STUDY8
  747. /* ----- Only 16-bit mode is supported ----- */
  748. #elif defined SUPPORT_PCRE16
  749. #define CHAR_SIZE 2
  750. #define PCHARS PCHARS16
  751. #define PCHARSV PCHARSV16
  752. #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
  753. #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
  754. #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD16
  755. #define STRLEN STRLEN16
  756. #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
  757. #define PCRE_COMPILE PCRE_COMPILE16
  758. #define PCRE_CONFIG pcre16_config
  759. #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
  760. #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
  761. #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
  762. #define PCRE_EXEC PCRE_EXEC16
  763. #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
  764. #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
  765. #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
  766. #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
  767. #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
  768. #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
  769. #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
  770. #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
  771. #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
  772. #define PCRE_MAKETABLES pcre16_maketables()
  773. #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
  774. #define PCRE_PRINTINT PCRE_PRINTINT16
  775. #define PCRE_STUDY PCRE_STUDY16
  776. /* ----- Only 32-bit mode is supported ----- */
  777. #elif defined SUPPORT_PCRE32
  778. #define CHAR_SIZE 4
  779. #define PCHARS PCHARS32
  780. #define PCHARSV PCHARSV32
  781. #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
  782. #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
  783. #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD32
  784. #define STRLEN STRLEN32
  785. #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
  786. #define PCRE_COMPILE PCRE_COMPILE32
  787. #define PCRE_CONFIG pcre32_config
  788. #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
  789. #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
  790. #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
  791. #define PCRE_EXEC PCRE_EXEC32
  792. #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
  793. #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
  794. #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
  795. #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
  796. #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
  797. #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
  798. #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
  799. #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
  800. #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
  801. #define PCRE_MAKETABLES pcre32_maketables()
  802. #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
  803. #define PCRE_PRINTINT PCRE_PRINTINT32
  804. #define PCRE_STUDY PCRE_STUDY32
  805. #endif
  806. /* ----- End of mode-specific function call macros ----- */
  807. /* Other parameters */
  808. #ifndef CLOCKS_PER_SEC
  809. #ifdef CLK_TCK
  810. #define CLOCKS_PER_SEC CLK_TCK
  811. #else
  812. #define CLOCKS_PER_SEC 100
  813. #endif
  814. #endif
  815. #if !defined NODFA
  816. #define DFA_WS_DIMENSION 1000
  817. #endif
  818. /* This is the default loop count for timing. */
  819. #define LOOPREPEAT 500000
  820. /* Static variables */
  821. static FILE *outfile;
  822. static int log_store = 0;
  823. static int callout_count;
  824. static int callout_extra;
  825. static int callout_fail_count;
  826. static int callout_fail_id;
  827. static int debug_lengths;
  828. static int first_callout;
  829. static int jit_was_used;
  830. static int locale_set = 0;
  831. static int show_malloc;
  832. static int stack_guard_return;
  833. static int use_utf;
  834. static const unsigned char *last_callout_mark = NULL;
  835. /* The buffers grow automatically if very long input lines are encountered. */
  836. static int buffer_size = 50000;
  837. static pcre_uint8 *buffer = NULL;
  838. static pcre_uint8 *pbuffer = NULL;
  839. /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
  840. #ifdef COMPILE_PCRE16
  841. #error COMPILE_PCRE16 must not be set when compiling pcretest.c
  842. #endif
  843. #ifdef COMPILE_PCRE32
  844. #error COMPILE_PCRE32 must not be set when compiling pcretest.c
  845. #endif
  846. /* We need buffers for building 16/32-bit strings, and the tables of operator
  847. lengths that are used for 16/32-bit compiling, in order to swap bytes in a
  848. pattern for saving/reloading testing. Luckily, the data for these tables is
  849. defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
  850. are used in the tables) are adjusted appropriately for the 16/32-bit world.
  851. LINK_SIZE is also used later in this program. */
  852. #ifdef SUPPORT_PCRE16
  853. #undef IMM2_SIZE
  854. #define IMM2_SIZE 1
  855. #if LINK_SIZE == 2
  856. #undef LINK_SIZE
  857. #define LINK_SIZE 1
  858. #elif LINK_SIZE == 3 || LINK_SIZE == 4
  859. #undef LINK_SIZE
  860. #define LINK_SIZE 2
  861. #else
  862. #error LINK_SIZE must be either 2, 3, or 4
  863. #endif
  864. static int buffer16_size = 0;
  865. static pcre_uint16 *buffer16 = NULL;
  866. static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
  867. #endif /* SUPPORT_PCRE16 */
  868. #ifdef SUPPORT_PCRE32
  869. #undef IMM2_SIZE
  870. #define IMM2_SIZE 1
  871. #undef LINK_SIZE
  872. #define LINK_SIZE 1
  873. static int buffer32_size = 0;
  874. static pcre_uint32 *buffer32 = NULL;
  875. static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
  876. #endif /* SUPPORT_PCRE32 */
  877. /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
  878. support, it can be changed by an option. If there is no 8-bit support, there
  879. must be 16-or 32-bit support, so default it to 1. */
  880. #if defined SUPPORT_PCRE8
  881. static int pcre_mode = PCRE8_MODE;
  882. #elif defined SUPPORT_PCRE16
  883. static int pcre_mode = PCRE16_MODE;
  884. #elif defined SUPPORT_PCRE32
  885. static int pcre_mode = PCRE32_MODE;
  886. #endif
  887. /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
  888. static int jit_study_bits[] =
  889. {
  890. PCRE_STUDY_JIT_COMPILE,
  891. PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
  892. PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
  893. PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
  894. PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
  895. PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
  896. PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
  897. PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
  898. };
  899. #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
  900. PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
  901. /* Textual explanations for runtime error codes */
  902. static const char *errtexts[] = {
  903. NULL, /* 0 is no error */
  904. NULL, /* NOMATCH is handled specially */
  905. "NULL argument passed",
  906. "bad option value",
  907. "magic number missing",
  908. "unknown opcode - pattern overwritten?",
  909. "no more memory",
  910. NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
  911. "match limit exceeded",
  912. "callout error code",
  913. NULL, /* BADUTF8/16 is handled specially */
  914. NULL, /* BADUTF8/16 offset is handled specially */
  915. NULL, /* PARTIAL is handled specially */
  916. "not used - internal error",
  917. "internal error - pattern overwritten?",
  918. "bad count value",
  919. "item unsupported for DFA matching",
  920. "backreference condition or recursion test not supported for DFA matching",
  921. "match limit not supported for DFA matching",
  922. "workspace size exceeded in DFA matching",
  923. "too much recursion for DFA matching",
  924. "recursion limit exceeded",
  925. "not used - internal error",
  926. "invalid combination of newline options",
  927. "bad offset value",
  928. NULL, /* SHORTUTF8/16 is handled specially */
  929. "nested recursion at the same subject position",
  930. "JIT stack limit reached",
  931. "pattern compiled in wrong mode: 8-bit/16-bit error",
  932. "pattern compiled with other endianness",
  933. "invalid data in workspace for DFA restart",
  934. "bad JIT option",
  935. "bad length"
  936. };
  937. /*************************************************
  938. * Alternate character tables *
  939. *************************************************/
  940. /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
  941. using the default tables of the library. However, the T option can be used to
  942. select alternate sets of tables, for different kinds of testing. Note also that
  943. the L (locale) option also adjusts the tables. */
  944. /* This is the set of tables distributed as default with PCRE. It recognizes
  945. only ASCII characters. */
  946. static const pcre_uint8 tables0[] = {
  947. /* This table is a lower casing table. */
  948. 0, 1, 2, 3, 4, 5, 6, 7,
  949. 8, 9, 10, 11, 12, 13, 14, 15,
  950. 16, 17, 18, 19, 20, 21, 22, 23,
  951. 24, 25, 26, 27, 28, 29, 30, 31,
  952. 32, 33, 34, 35, 36, 37, 38, 39,
  953. 40, 41, 42, 43, 44, 45, 46, 47,
  954. 48, 49, 50, 51, 52, 53, 54, 55,
  955. 56, 57, 58, 59, 60, 61, 62, 63,
  956. 64, 97, 98, 99,100,101,102,103,
  957. 104,105,106,107,108,109,110,111,
  958. 112,113,114,115,116,117,118,119,
  959. 120,121,122, 91, 92, 93, 94, 95,
  960. 96, 97, 98, 99,100,101,102,103,
  961. 104,105,106,107,108,109,110,111,
  962. 112,113,114,115,116,117,118,119,
  963. 120,121,122,123,124,125,126,127,
  964. 128,129,130,131,132,133,134,135,
  965. 136,137,138,139,140,141,142,143,
  966. 144,145,146,147,148,149,150,151,
  967. 152,153,154,155,156,157,158,159,
  968. 160,161,162,163,164,165,166,167,
  969. 168,169,170,171,172,173,174,175,
  970. 176,177,178,179,180,181,182,183,
  971. 184,185,186,187,188,189,190,191,
  972. 192,193,194,195,196,197,198,199,
  973. 200,201,202,203,204,205,206,207,
  974. 208,209,210,211,212,213,214,215,
  975. 216,217,218,219,220,221,222,223,
  976. 224,225,226,227,228,229,230,231,
  977. 232,233,234,235,236,237,238,239,
  978. 240,241,242,243,244,245,246,247,
  979. 248,249,250,251,252,253,254,255,
  980. /* This table is a case flipping table. */
  981. 0, 1, 2, 3, 4, 5, 6, 7,
  982. 8, 9, 10, 11, 12, 13, 14, 15,
  983. 16, 17, 18, 19, 20, 21, 22, 23,
  984. 24, 25, 26, 27, 28, 29, 30, 31,
  985. 32, 33, 34, 35, 36, 37, 38, 39,
  986. 40, 41, 42, 43, 44, 45, 46, 47,
  987. 48, 49, 50, 51, 52, 53, 54, 55,
  988. 56, 57, 58, 59, 60, 61, 62, 63,
  989. 64, 97, 98, 99,100,101,102,103,
  990. 104,105,106,107,108,109,110,111,
  991. 112,113,114,115,116,117,118,119,
  992. 120,121,122, 91, 92, 93, 94, 95,
  993. 96, 65, 66, 67, 68, 69, 70, 71,
  994. 72, 73, 74, 75, 76, 77, 78, 79,
  995. 80, 81, 82, 83, 84, 85, 86, 87,
  996. 88, 89, 90,123,124,125,126,127,
  997. 128,129,130,131,132,133,134,135,
  998. 136,137,138,139,140,141,142,143,
  999. 144,145,146,147,148,149,150,151,
  1000. 152,153,154,155,156,157,158,159,
  1001. 160,161,162,163,164,165,166,167,
  1002. 168,169,170,171,172,173,174,175,
  1003. 176,177,178,179,180,181,182,183,
  1004. 184,185,186,187,188,189,190,191,
  1005. 192,193,194,195,196,197,198,199,
  1006. 200,201,202,203,204,205,206,207,
  1007. 208,209,210,211,212,213,214,215,
  1008. 216,217,218,219,220,221,222,223,
  1009. 224,225,226,227,228,229,230,231,
  1010. 232,233,234,235,236,237,238,239,
  1011. 240,241,242,243,244,245,246,247,
  1012. 248,249,250,251,252,253,254,255,
  1013. /* This table contains bit maps for various character classes. Each map is 32
  1014. bytes long and the bits run from the least significant end of each byte. The
  1015. classes that have their own maps are: space, xdigit, digit, upper, lower, word,
  1016. graph, print, punct, and cntrl. Other classes are built from combinations. */
  1017. 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
  1018. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1019. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1020. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1021. 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
  1022. 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
  1023. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1024. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1025. 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
  1026. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1027. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1028. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1029. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1030. 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
  1031. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1032. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1033. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1034. 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
  1035. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1036. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1037. 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
  1038. 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
  1039. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1040. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1041. 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
  1042. 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
  1043. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1044. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1045. 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
  1046. 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
  1047. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1048. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1049. 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
  1050. 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
  1051. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1052. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1053. 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
  1054. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
  1055. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1056. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  1057. /* This table identifies various classes of character by individual bits:
  1058. 0x01 white space character
  1059. 0x02 letter
  1060. 0x04 decimal digit
  1061. 0x08 hexadecimal digit
  1062. 0x10 alphanumeric or '_'
  1063. 0x80 regular expression metacharacter or binary zero
  1064. */
  1065. 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
  1066. 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
  1067. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
  1068. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
  1069. 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
  1070. 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
  1071. 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
  1072. 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
  1073. 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
  1074. 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
  1075. 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
  1076. 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
  1077. 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
  1078. 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
  1079. 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
  1080. 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
  1081. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
  1082. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
  1083. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
  1084. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
  1085. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
  1086. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
  1087. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
  1088. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
  1089. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
  1090. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
  1091. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
  1092. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
  1093. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
  1094. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
  1095. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
  1096. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
  1097. /* This is a set of tables that came originally from a Windows user. It seems
  1098. to be at least an approximation of ISO 8859. In particular, there are
  1099. characters greater than 128 that are marked as spaces, letters, etc. */
  1100. static const pcre_uint8 tables1[] = {
  1101. 0,1,2,3,4,5,6,7,
  1102. 8,9,10,11,12,13,14,15,
  1103. 16,17,18,19,20,21,22,23,
  1104. 24,25,26,27,28,29,30,31,
  1105. 32,33,34,35,36,37,38,39,
  1106. 40,41,42,43,44,45,46,47,
  1107. 48,49,50,51,52,53,54,55,
  1108. 56,57,58,59,60,61,62,63,
  1109. 64,97,98,99,100,101,102,103,
  1110. 104,105,106,107,108,109,110,111,
  1111. 112,113,114,115,116,117,118,119,
  1112. 120,121,122,91,92,93,94,95,
  1113. 96,97,98,99,100,101,102,103,
  1114. 104,105,106,107,108,109,110,111,
  1115. 112,113,114,115,116,117,118,119,
  1116. 120,121,122,123,124,125,126,127,
  1117. 128,129,130,131,132,133,134,135,
  1118. 136,137,138,139,140,141,142,143,
  1119. 144,145,146,147,148,149,150,151,
  1120. 152,153,154,155,156,157,158,159,
  1121. 160,161,162,163,164,165,166,167,
  1122. 168,169,170,171,172,173,174,175,
  1123. 176,177,178,179,180,181,182,183,
  1124. 184,185,186,187,188,189,190,191,
  1125. 224,225,226,227,228,229,230,231,
  1126. 232,233,234,235,236,237,238,239,
  1127. 240,241,242,243,244,245,246,215,
  1128. 248,249,250,251,252,253,254,223,
  1129. 224,225,226,227,228,229,230,231,
  1130. 232,233,234,235,236,237,238,239,
  1131. 240,241,242,243,244,245,246,247,
  1132. 248,249,250,251,252,253,254,255,
  1133. 0,1,2,3,4,5,6,7,
  1134. 8,9,10,11,12,13,14,15,
  1135. 16,17,18,19,20,21,22,23,
  1136. 24,25,26,27,28,29,30,31,
  1137. 32,33,34,35,36,37,38,39,
  1138. 40,41,42,43,44,45,46,47,
  1139. 48,49,50,51,52,53,54,55,
  1140. 56,57,58,59,60,61,62,63,
  1141. 64,97,98,99,100,101,102,103,
  1142. 104,105,106,107,108,109,110,111,
  1143. 112,113,114,115,116,117,118,119,
  1144. 120,121,122,91,92,93,94,95,
  1145. 96,65,66,67,68,69,70,71,
  1146. 72,73,74,75,76,77,78,79,
  1147. 80,81,82,83,84,85,86,87,
  1148. 88,89,90,123,124,125,126,127,
  1149. 128,129,130,131,132,133,134,135,
  1150. 136,137,138,139,140,141,142,143,
  1151. 144,145,146,147,148,149,150,151,
  1152. 152,153,154,155,156,157,158,159,
  1153. 160,161,162,163,164,165,166,167,
  1154. 168,169,170,171,172,173,174,175,
  1155. 176,177,178,179,180,181,182,183,
  1156. 184,185,186,187,188,189,190,191,
  1157. 224,225,226,227,228,229,230,231,
  1158. 232,233,234,235,236,237,238,239,
  1159. 240,241,242,243,244,245,246,215,
  1160. 248,249,250,251,252,253,254,223,
  1161. 192,193,194,195,196,197,198,199,
  1162. 200,201,202,203,204,205,206,207,
  1163. 208,209,210,211,212,213,214,247,
  1164. 216,217,218,219,220,221,222,255,
  1165. 0,62,0,0,1,0,0,0,
  1166. 0,0,0,0,0,0,0,0,
  1167. 32,0,0,0,1,0,0,0,
  1168. 0,0,0,0,0,0,0,0,
  1169. 0,0,0,0,0,0,255,3,
  1170. 126,0,0,0,126,0,0,0,
  1171. 0,0,0,0,0,0,0,0,
  1172. 0,0,0,0,0,0,0,0,
  1173. 0,0,0,0,0,0,255,3,
  1174. 0,0,0,0,0,0,0,0,
  1175. 0,0,0,0,0,0,12,2,
  1176. 0,0,0,0,0,0,0,0,
  1177. 0,0,0,0,0,0,0,0,
  1178. 254,255,255,7,0,0,0,0,
  1179. 0,0,0,0,0,0,0,0,
  1180. 255,255,127,127,0,0,0,0,
  1181. 0,0,0,0,0,0,0,0,
  1182. 0,0,0,0,254,255,255,7,
  1183. 0,0,0,0,0,4,32,4,
  1184. 0,0,0,128,255,255,127,255,
  1185. 0,0,0,0,0,0,255,3,
  1186. 254,255,255,135,254,255,255,7,
  1187. 0,0,0,0,0,4,44,6,
  1188. 255,255,127,255,255,255,127,255,
  1189. 0,0,0,0,254,255,255,255,
  1190. 255,255,255,255,255,255,255,127,
  1191. 0,0,0,0,254,255,255,255,
  1192. 255,255,255,255,255,255,255,255,
  1193. 0,2,0,0,255,255,255,255,
  1194. 255,255,255,255,255,255,255,127,
  1195. 0,0,0,0,255,255,255,255,
  1196. 255,255,255,255,255,255,255,255,
  1197. 0,0,0,0,254,255,0,252,
  1198. 1,0,0,248,1,0,0,120,
  1199. 0,0,0,0,254,255,255,255,
  1200. 0,0,128,0,0,0,128,0,
  1201. 255,255,255,255,0,0,0,0,
  1202. 0,0,0,0,0,0,0,128,
  1203. 255,255,255,255,0,0,0,0,
  1204. 0,0,0,0,0,0,0,0,
  1205. 128,0,0,0,0,0,0,0,
  1206. 0,1,1,0,1,1,0,0,
  1207. 0,0,0,0,0,0,0,0,
  1208. 0,0,0,0,0,0,0,0,
  1209. 1,0,0,0,128,0,0,0,
  1210. 128,128,128,128,0,0,128,0,
  1211. 28,28,28,28,28,28,28,28,
  1212. 28,28,0,0,0,0,0,128,
  1213. 0,26,26,26,26,26,26,18,
  1214. 18,18,18,18,18,18,18,18,
  1215. 18,18,18,18,18,18,18,18,
  1216. 18,18,18,128,128,0,128,16,
  1217. 0,26,26,26,26,26,26,18,
  1218. 18,18,18,18,18,18,18,18,
  1219. 18,18,18,18,18,18,18,18,
  1220. 18,18,18,128,128,0,0,0,
  1221. 0,0,0,0,0,1,0,0,
  1222. 0,0,0,0,0,0,0,0,
  1223. 0,0,0,0,0,0,0,0,
  1224. 0,0,0,0,0,0,0,0,
  1225. 1,0,0,0,0,0,0,0,
  1226. 0,0,18,0,0,0,0,0,
  1227. 0,0,20,20,0,18,0,0,
  1228. 0,20,18,0,0,0,0,0,
  1229. 18,18,18,18,18,18,18,18,
  1230. 18,18,18,18,18,18,18,18,
  1231. 18,18,18,18,18,18,18,0,
  1232. 18,18,18,18,18,18,18,18,
  1233. 18,18,18,18,18,18,18,18,
  1234. 18,18,18,18,18,18,18,18,
  1235. 18,18,18,18,18,18,18,0,
  1236. 18,18,18,18,18,18,18,18
  1237. };
  1238. #ifndef HAVE_STRERROR
  1239. /*************************************************
  1240. * Provide strerror() for non-ANSI libraries *
  1241. *************************************************/
  1242. /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
  1243. in their libraries, but can provide the same facility by this simple
  1244. alternative function. */
  1245. extern int sys_nerr;
  1246. extern char *sys_errlist[];
  1247. char *
  1248. strerror(int n)
  1249. {
  1250. if (n < 0 || n >= sys_nerr) return "unknown error number";
  1251. return sys_errlist[n];
  1252. }
  1253. #endif /* HAVE_STRERROR */
  1254. /*************************************************
  1255. * Print newline configuration *
  1256. *************************************************/
  1257. /*
  1258. Arguments:
  1259. rc the return code from PCRE_CONFIG_NEWLINE
  1260. isc TRUE if called from "-C newline"
  1261. Returns: nothing
  1262. */
  1263. static void
  1264. print_newline_config(int rc, BOOL isc)
  1265. {
  1266. const char *s = NULL;
  1267. if (!isc) printf(" Newline sequence is ");
  1268. switch(rc)
  1269. {
  1270. case CHAR_CR: s = "CR"; break;
  1271. case CHAR_LF: s = "LF"; break;
  1272. case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
  1273. case -1: s = "ANY"; break;
  1274. case -2: s = "ANYCRLF"; break;
  1275. default:
  1276. printf("a non-standard value: 0x%04x\n", rc);
  1277. return;
  1278. }
  1279. printf("%s\n", s);
  1280. }
  1281. /*************************************************
  1282. * JIT memory callback *
  1283. *************************************************/
  1284. static pcre_jit_stack* jit_callback(void *arg)
  1285. {
  1286. jit_was_used = TRUE;
  1287. return (pcre_jit_stack *)arg;
  1288. }
  1289. #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
  1290. /*************************************************
  1291. * Convert UTF-8 string to value *
  1292. *************************************************/
  1293. /* This function takes one or more bytes that represents a UTF-8 character,
  1294. and returns the value of the character.
  1295. Argument:
  1296. utf8bytes a pointer to the byte vector
  1297. vptr a pointer to an int to receive the value
  1298. Returns: > 0 => the number of bytes consumed
  1299. -6 to 0 => malformed UTF-8 character at offset = (-return)
  1300. */
  1301. static int
  1302. utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
  1303. {
  1304. pcre_uint32 c = *utf8bytes++;
  1305. pcre_uint32 d = c;
  1306. int i, j, s;
  1307. for (i = -1; i < 6; i++) /* i is number of additional bytes */
  1308. {
  1309. if ((d & 0x80) == 0) break;
  1310. d <<= 1;
  1311. }
  1312. if (i == -1) { *vptr = c; return 1; } /* ascii character */
  1313. if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
  1314. /* i now has a value in the range 1-5 */
  1315. s = 6*i;
  1316. d = (c & utf8_table3[i]) << s;
  1317. for (j = 0; j < i; j++)
  1318. {
  1319. c = *utf8bytes++;
  1320. if ((c & 0xc0) != 0x80) return -(j+1);
  1321. s -= 6;
  1322. d |= (c & 0x3f) << s;
  1323. }
  1324. /* Check that encoding was the correct unique one */
  1325. for (j = 0; j < utf8_table1_size; j++)
  1326. if (d <= (pcre_uint32)utf8_table1[j]) break;
  1327. if (j != i) return -(i+1);
  1328. /* Valid value */
  1329. *vptr = d;
  1330. return i+1;
  1331. }
  1332. #endif /* NOUTF || SUPPORT_PCRE16 */
  1333. #if defined SUPPORT_PCRE8 && !defined NOUTF
  1334. /*************************************************
  1335. * Convert character value to UTF-8 *
  1336. *************************************************/
  1337. /* This function takes an integer value in the range 0 - 0x7fffffff
  1338. and encodes it as a UTF-8 character in 0 to 6 bytes.
  1339. Arguments:
  1340. cvalue the character value
  1341. utf8bytes pointer to buffer for result - at least 6 bytes long
  1342. Returns: number of characters placed in the buffer
  1343. */
  1344. static int
  1345. ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
  1346. {
  1347. register int i, j;
  1348. if (cvalue > 0x7fffffffu)
  1349. return -1;
  1350. for (i = 0; i < utf8_table1_size; i++)
  1351. if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
  1352. utf8bytes += i;
  1353. for (j = i; j > 0; j--)
  1354. {
  1355. *utf8bytes-- = 0x80 | (cvalue & 0x3f);
  1356. cvalue >>= 6;
  1357. }
  1358. *utf8bytes = utf8_table2[i] | cvalue;
  1359. return i + 1;
  1360. }
  1361. #endif
  1362. #ifdef SUPPORT_PCRE16
  1363. /*************************************************
  1364. * Convert a string to 16-bit *
  1365. *************************************************/
  1366. /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
  1367. 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
  1368. double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
  1369. in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
  1370. result is always left in buffer16.
  1371. Note that this function does not object to surrogate values. This is
  1372. deliberate; it makes it possible to construct UTF-16 strings that are invalid,
  1373. for the purpose of testing that they are correctly faulted.
  1374. Patterns to be converted are either plain ASCII or UTF-8; data lines are always
  1375. in UTF-8 so that values greater than 255 can be handled.
  1376. Arguments:
  1377. data TRUE if converting a data line; FALSE for a regex
  1378. p points to a byte string
  1379. utf true if UTF-8 (to be converted to UTF-16)
  1380. len number of bytes in the string (excluding trailing zero)
  1381. Returns: number of 16-bit data items used (excluding trailing zero)
  1382. OR -1 if a UTF-8 string is malformed
  1383. OR -2 if a value > 0x10ffff is encountered
  1384. OR -3 if a value > 0xffff is encountered when not in UTF mode
  1385. */
  1386. static int
  1387. to16(int data, pcre_uint8 *p, int utf, int len)
  1388. {
  1389. pcre_uint16 *pp;
  1390. if (buffer16_size < 2*len + 2)
  1391. {
  1392. if (buffer16 != NULL) free(buffer16);
  1393. buffer16_size = 2*len + 2;
  1394. buffer16 = (pcre_uint16 *)malloc(buffer16_size);
  1395. if (buffer16 == NULL)
  1396. {
  1397. fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
  1398. exit(1);
  1399. }
  1400. }
  1401. pp = buffer16;
  1402. if (!utf && !data)
  1403. {
  1404. while (len-- > 0) *pp++ = *p++;
  1405. }
  1406. else
  1407. {
  1408. pcre_uint32 c = 0;
  1409. while (len > 0)
  1410. {
  1411. int chlen = utf82ord(p, &c);
  1412. if (chlen <= 0) return -1;
  1413. if (c > 0x10ffff) return -2;
  1414. p += chlen;
  1415. len -= chlen;
  1416. if (c < 0x10000) *pp++ = c; else
  1417. {
  1418. if (!utf) return -3;
  1419. c -= 0x10000;
  1420. *pp++ = 0xD800 | (c >> 10);
  1421. *pp++ = 0xDC00 | (c & 0x3ff);
  1422. }
  1423. }
  1424. }
  1425. *pp = 0;
  1426. return pp - buffer16;
  1427. }
  1428. #endif
  1429. #ifdef SUPPORT_PCRE32
  1430. /*************************************************
  1431. * Convert a string to 32-bit *
  1432. *************************************************/
  1433. /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
  1434. 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
  1435. times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
  1436. in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
  1437. result is always left in buffer32.
  1438. Note that this function does not object to surrogate values. This is
  1439. deliberate; it makes it possible to construct UTF-32 strings that are invalid,
  1440. for the purpose of testing that they are correctly faulted.
  1441. Patterns to be converted are either plain ASCII or UTF-8; data lines are always
  1442. in UTF-8 so that values greater than 255 can be handled.
  1443. Arguments:
  1444. data TRUE if converting a data line; FALSE for a regex
  1445. p points to a byte string
  1446. utf true if UTF-8 (to be converted to UTF-32)
  1447. len number of bytes in the string (excluding trailing zero)
  1448. Returns: number of 32-bit data items used (excluding trailing zero)
  1449. OR -1 if a UTF-8 string is malformed
  1450. OR -2 if a value > 0x10ffff is encountered
  1451. OR -3 if an ill-formed value is encountered (i.e. a surrogate)
  1452. */
  1453. static int
  1454. to32(int data, pcre_uint8 *p, int utf, int len)
  1455. {
  1456. pcre_uint32 *pp;
  1457. if (buffer32_size < 4*len + 4)
  1458. {
  1459. if (buffer32 != NULL) free(buffer32);
  1460. buffer32_size = 4*len + 4;
  1461. buffer32 = (pcre_uint32 *)malloc(buffer32_size);
  1462. if (buffer32 == NULL)
  1463. {
  1464. fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
  1465. exit(1);
  1466. }
  1467. }
  1468. pp = buffer32;
  1469. if (!utf && !data)
  1470. {
  1471. while (len-- > 0) *pp++ = *p++;
  1472. }
  1473. else
  1474. {
  1475. pcre_uint32 c = 0;
  1476. while (len > 0)
  1477. {
  1478. int chlen = utf82ord(p, &c);
  1479. if (chlen <= 0) return -1;
  1480. if (utf)
  1481. {
  1482. if (c > 0x10ffff) return -2;
  1483. if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
  1484. }
  1485. p += chlen;
  1486. len -= chlen;
  1487. *pp++ = c;
  1488. }
  1489. }
  1490. *pp = 0;
  1491. return pp - buffer32;
  1492. }
  1493. /* Check that a 32-bit character string is valid UTF-32.
  1494. Arguments:
  1495. string points to the string
  1496. length length of string, or -1 if the string is zero-terminated
  1497. Returns: TRUE if the string is a valid UTF-32 string
  1498. FALSE otherwise
  1499. */
  1500. #ifdef NEVER /* Not used */
  1501. #ifdef SUPPORT_UTF
  1502. static BOOL
  1503. valid_utf32(pcre_uint32 *string, int length)
  1504. {
  1505. register pcre_uint32 *p;
  1506. register pcre_uint32 c;
  1507. for (p = string; length-- > 0; p++)
  1508. {
  1509. c = *p;
  1510. if (c > 0x10ffffu) return FALSE; /* Too big */
  1511. if ((c & 0xfffff800u) == 0xd800u) return FALSE; /* Surrogate */
  1512. }
  1513. return TRUE;
  1514. }
  1515. #endif /* SUPPORT_UTF */
  1516. #endif /* NEVER */
  1517. #endif /* SUPPORT_PCRE32 */
  1518. /*************************************************
  1519. * Read or extend an input line *
  1520. *************************************************/
  1521. /* Input lines are read into buffer, but both patterns and data lines can be
  1522. continued over multiple input lines. In addition, if the buffer fills up, we
  1523. want to automatically expand it so as to be able to handle extremely large
  1524. lines that are needed for certain stress tests. When the input buffer is
  1525. expanded, the other two buffers must also be expanded likewise, and the
  1526. contents of pbuffer, which are a copy of the input for callouts, must be
  1527. preserved (for when expansion happens for a data line). This is not the most
  1528. optimal way of handling this, but hey, this is just a test program!
  1529. Arguments:
  1530. f the file to read
  1531. start where in buffer to start (this *must* be within buffer)
  1532. prompt for stdin or readline()
  1533. Returns: pointer to the start of new data
  1534. could be a copy of start, or could be moved
  1535. NULL if no data read and EOF reached
  1536. */
  1537. static pcre_uint8 *
  1538. extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
  1539. {
  1540. pcre_uint8 *here = start;
  1541. for (;;)
  1542. {
  1543. size_t rlen = (size_t)(buffer_size - (here - buffer));
  1544. if (rlen > 1000)
  1545. {
  1546. int dlen;
  1547. /* If libreadline or libedit support is required, use readline() to read a
  1548. line if the input is a terminal. Note that readline() removes the trailing
  1549. newline, so we must put it back again, to be compatible with fgets(). */
  1550. #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
  1551. if (isatty(fileno(f)))
  1552. {
  1553. size_t len;
  1554. char *s = readline(prompt);
  1555. if (s == NULL) return (here == start)? NULL : start;
  1556. len = strlen(s);
  1557. if (len > 0) add_history(s);
  1558. if (len > rlen - 1) len = rlen - 1;
  1559. memcpy(here, s, len);
  1560. here[len] = '\n';
  1561. here[len+1] = 0;
  1562. free(s);
  1563. }
  1564. else
  1565. #endif
  1566. /* Read the next line by normal means, prompting if the file is stdin. */
  1567. {
  1568. if (f == stdin) printf("%s", prompt);
  1569. if (fgets((char *)here, rlen, f) == NULL)
  1570. return (here == start)? NULL : start;
  1571. }
  1572. dlen = (int)strlen((char *)here);
  1573. if (dlen > 0 && here[dlen - 1] == '\n') return start;
  1574. here += dlen;
  1575. }
  1576. else
  1577. {
  1578. int new_buffer_size = 2*buffer_size;
  1579. pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
  1580. pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
  1581. if (new_buffer == NULL || new_pbuffer == NULL)
  1582. {
  1583. fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
  1584. exit(1);
  1585. }
  1586. memcpy(new_buffer, buffer, buffer_size);
  1587. memcpy(new_pbuffer, pbuffer, buffer_size);
  1588. buffer_size = new_buffer_size;
  1589. start = new_buffer + (start - buffer);
  1590. here = new_buffer + (here - buffer);
  1591. free(buffer);
  1592. free(pbuffer);
  1593. buffer = new_buffer;
  1594. pbuffer = new_pbuffer;
  1595. }
  1596. }
  1597. /* Control never gets here */
  1598. }
  1599. /*************************************************
  1600. * Read number from string *
  1601. *************************************************/
  1602. /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
  1603. around with conditional compilation, just do the job by hand. It is only used
  1604. for unpicking arguments, so just keep it simple.
  1605. Arguments:
  1606. str string to be converted
  1607. endptr where to put the end pointer
  1608. Returns: the unsigned long
  1609. */
  1610. static int
  1611. get_value(pcre_uint8 *str, pcre_uint8 **endptr)
  1612. {
  1613. int result = 0;
  1614. while(*str != 0 && isspace(*str)) str++;
  1615. while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
  1616. *endptr = str;
  1617. return(result);
  1618. }
  1619. /*************************************************
  1620. * Print one character *
  1621. *************************************************/
  1622. /* Print a single character either literally, or as a hex escape. */
  1623. static int pchar(pcre_uint32 c, FILE *f)
  1624. {
  1625. int n = 0;
  1626. char tempbuffer[16];
  1627. if (PRINTOK(c))
  1628. {
  1629. if (f != NULL) fprintf(f, "%c", c);
  1630. return 1;
  1631. }
  1632. if (c < 0x100)
  1633. {
  1634. if (use_utf)
  1635. {
  1636. if (f != NULL) fprintf(f, "\\x{%02x}", c);
  1637. return 6;
  1638. }
  1639. else
  1640. {
  1641. if (f != NULL) fprintf(f, "\\x%02x", c);
  1642. return 4;
  1643. }
  1644. }
  1645. if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
  1646. else n = sprintf(tempbuffer, "\\x{%02x}", c);
  1647. return n >= 0 ? n : 0;
  1648. }
  1649. #ifdef SUPPORT_PCRE8
  1650. /*************************************************
  1651. * Print 8-bit character string *
  1652. *************************************************/
  1653. /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
  1654. If handed a NULL file, just counts chars without printing. */
  1655. static int pchars(pcre_uint8 *p, int length, FILE *f)
  1656. {
  1657. pcre_uint32 c = 0;
  1658. int yield = 0;
  1659. if (length < 0)
  1660. length = strlen((char *)p);
  1661. while (length-- > 0)
  1662. {
  1663. #if !defined NOUTF
  1664. if (use_utf)
  1665. {
  1666. int rc = utf82ord(p, &c);
  1667. if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
  1668. {
  1669. length -= rc - 1;
  1670. p += rc;
  1671. yield += pchar(c, f);
  1672. continue;
  1673. }
  1674. }
  1675. #endif
  1676. c = *p++;
  1677. yield += pchar(c, f);
  1678. }
  1679. return yield;
  1680. }
  1681. #endif
  1682. #ifdef SUPPORT_PCRE16
  1683. /*************************************************
  1684. * Find length of 0-terminated 16-bit string *
  1685. *************************************************/
  1686. static int strlen16(PCRE_SPTR16 p)
  1687. {
  1688. PCRE_SPTR16 pp = p;
  1689. while (*pp != 0) pp++;
  1690. return (int)(pp - p);
  1691. }
  1692. #endif /* SUPPORT_PCRE16 */
  1693. #ifdef SUPPORT_PCRE32
  1694. /*************************************************
  1695. * Find length of 0-terminated 32-bit string *
  1696. *************************************************/
  1697. static int strlen32(PCRE_SPTR32 p)
  1698. {
  1699. PCRE_SPTR32 pp = p;
  1700. while (*pp != 0) pp++;
  1701. return (int)(pp - p);
  1702. }
  1703. #endif /* SUPPORT_PCRE32 */
  1704. #ifdef SUPPORT_PCRE16
  1705. /*************************************************
  1706. * Print 16-bit character string *
  1707. *************************************************/
  1708. /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
  1709. If handed a NULL file, just counts chars without printing. */
  1710. static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
  1711. {
  1712. int yield = 0;
  1713. if (length < 0)
  1714. length = strlen16(p);
  1715. while (length-- > 0)
  1716. {
  1717. pcre_uint32 c = *p++ & 0xffff;
  1718. #if !defined NOUTF
  1719. if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
  1720. {
  1721. int d = *p & 0xffff;
  1722. if (d >= 0xDC00 && d <= 0xDFFF)
  1723. {
  1724. c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
  1725. length--;
  1726. p++;
  1727. }
  1728. }
  1729. #endif
  1730. yield += pchar(c, f);
  1731. }
  1732. return yield;
  1733. }
  1734. #endif /* SUPPORT_PCRE16 */
  1735. #ifdef SUPPORT_PCRE32
  1736. /*************************************************
  1737. * Print 32-bit character string *
  1738. *************************************************/
  1739. /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
  1740. If handed a NULL file, just counts chars without printing. */
  1741. static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
  1742. {
  1743. int yield = 0;
  1744. (void)(utf); /* Avoid compiler warning */
  1745. if (length < 0)
  1746. length = strlen32(p);
  1747. while (length-- > 0)
  1748. {
  1749. pcre_uint32 c = *p++;
  1750. yield += pchar(c, f);
  1751. }
  1752. return yield;
  1753. }
  1754. #endif /* SUPPORT_PCRE32 */
  1755. #ifdef SUPPORT_PCRE8
  1756. /*************************************************
  1757. * Read a capture name (8-bit) and check it *
  1758. *************************************************/
  1759. static pcre_uint8 *
  1760. read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
  1761. {
  1762. pcre_uint8 *npp = *pp;
  1763. while (isalnum(*p)) *npp++ = *p++;
  1764. *npp++ = 0;
  1765. *npp = 0;
  1766. if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
  1767. {
  1768. fprintf(outfile, "no parentheses with name \"");
  1769. PCHARSV(*pp, 0, -1, outfile);
  1770. fprintf(outfile, "\"\n");
  1771. }
  1772. *pp = npp;
  1773. return p;
  1774. }
  1775. #endif /* SUPPORT_PCRE8 */
  1776. #ifdef SUPPORT_PCRE16
  1777. /*************************************************
  1778. * Read a capture name (16-bit) and check it *
  1779. *************************************************/
  1780. /* Note that the text being read is 8-bit. */
  1781. static pcre_uint8 *
  1782. read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
  1783. {
  1784. pcre_uint16 *npp = *pp;
  1785. while (isalnum(*p)) *npp++ = *p++;
  1786. *npp++ = 0;
  1787. *npp = 0;
  1788. if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
  1789. {
  1790. fprintf(outfile, "no parentheses with name \"");
  1791. PCHARSV(*pp, 0, -1, outfile);
  1792. fprintf(outfile, "\"\n");
  1793. }
  1794. *pp = npp;
  1795. return p;
  1796. }
  1797. #endif /* SUPPORT_PCRE16 */
  1798. #ifdef SUPPORT_PCRE32
  1799. /*************************************************
  1800. * Read a capture name (32-bit) and check it *
  1801. *************************************************/
  1802. /* Note that the text being read is 8-bit. */
  1803. static pcre_uint8 *
  1804. read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
  1805. {
  1806. pcre_uint32 *npp = *pp;
  1807. while (isalnum(*p)) *npp++ = *p++;
  1808. *npp++ = 0;
  1809. *npp = 0;
  1810. if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
  1811. {
  1812. fprintf(outfile, "no parentheses with name \"");
  1813. PCHARSV(*pp, 0, -1, outfile);
  1814. fprintf(outfile, "\"\n");
  1815. }
  1816. *pp = npp;
  1817. return p;
  1818. }
  1819. #endif /* SUPPORT_PCRE32 */
  1820. /*************************************************
  1821. * Stack guard function *
  1822. *************************************************/
  1823. /* Called from PCRE when set in pcre_stack_guard. We give an error (non-zero)
  1824. return when a count overflows. */
  1825. static int stack_guard(void)
  1826. {
  1827. return stack_guard_return;
  1828. }
  1829. /*************************************************
  1830. * Callout function *
  1831. *************************************************/
  1832. /* Called from PCRE as a result of the (?C) item. We print out where we are in
  1833. the match. Yield zero unless more callouts than the fail count, or the callout
  1834. data is not zero. */
  1835. static int callout(pcre_callout_block *cb)
  1836. {
  1837. FILE *f = (first_callout | callout_extra)? outfile : NULL;
  1838. int i, current_position, pre_start, post_start, subject_length;
  1839. if (callout_extra)
  1840. {
  1841. fprintf(f, "Callout %d: last capture = %d\n",
  1842. cb->callout_number, cb->capture_last);
  1843. if (cb->offset_vector != NULL)
  1844. {
  1845. for (i = 0; i < cb->capture_top * 2; i += 2)
  1846. {
  1847. if (cb->offset_vector[i] < 0)
  1848. fprintf(f, "%2d: <unset>\n", i/2);
  1849. else
  1850. {
  1851. fprintf(f, "%2d: ", i/2);
  1852. PCHARSV(cb->subject, cb->offset_vector[i],
  1853. cb->offset_vector[i+1] - cb->offset_vector[i], f);
  1854. fprintf(f, "\n");
  1855. }
  1856. }
  1857. }
  1858. }
  1859. /* Re-print the subject in canonical form, the first time or if giving full
  1860. datails. On subsequent calls in the same match, we use pchars just to find the
  1861. printed lengths of the substrings. */
  1862. if (f != NULL) fprintf(f, "--->");
  1863. /* If a lookbehind is involved, the current position may be earlier than the
  1864. match start. If so, use the match start instead. */
  1865. current_position = (cb->current_position >= cb->start_match)?
  1866. cb->current_position : cb->start_match;
  1867. PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
  1868. PCHARS(post_start, cb->subject, cb->start_match,
  1869. current_position - cb->start_match, f);
  1870. PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
  1871. PCHARSV(cb->subject, current_position, cb->subject_length - current_position, f);
  1872. if (f != NULL) fprintf(f, "\n");
  1873. /* Always print appropriate indicators, with callout number if not already
  1874. shown. For automatic callouts, show the pattern offset. */
  1875. if (cb->callout_number == 255)
  1876. {
  1877. fprintf(outfile, "%+3d ", cb->pattern_position);
  1878. if (cb->pattern_position > 99) fprintf(outfile, "\n ");
  1879. }
  1880. else
  1881. {
  1882. if (callout_extra) fprintf(outfile, " ");
  1883. else fprintf(outfile, "%3d ", cb->callout_number);
  1884. }
  1885. for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
  1886. fprintf(outfile, "^");
  1887. if (post_start > 0)
  1888. {
  1889. for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
  1890. fprintf(outfile, "^");
  1891. }
  1892. for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
  1893. fprintf(outfile, " ");
  1894. fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
  1895. pbuffer + cb->pattern_position);
  1896. fprintf(outfile, "\n");
  1897. first_callout = 0;
  1898. if (cb->mark != last_callout_mark)
  1899. {
  1900. if (cb->mark == NULL)
  1901. fprintf(outfile, "Latest Mark: <unset>\n");
  1902. else
  1903. {
  1904. fprintf(outfile, "Latest Mark: ");
  1905. PCHARSV(cb->mark, 0, -1, outfile);
  1906. putc('\n', outfile);
  1907. }
  1908. last_callout_mark = cb->mark;
  1909. }
  1910. if (cb->callout_data != NULL)
  1911. {
  1912. int callout_data = *((int *)(cb->callout_data));
  1913. if (callout_data != 0)
  1914. {
  1915. fprintf(outfile, "Callout data = %d\n", callout_data);
  1916. return callout_data;
  1917. }
  1918. }
  1919. return (cb->callout_number != callout_fail_id)? 0 :
  1920. (++callout_count >= callout_fail_count)? 1 : 0;
  1921. }
  1922. /*************************************************
  1923. * Local malloc functions *
  1924. *************************************************/
  1925. /* Alternative malloc function, to test functionality and save the size of a
  1926. compiled re, which is the first store request that pcre_compile() makes. The
  1927. show_malloc variable is set only during matching. */
  1928. static void *new_malloc(size_t size)
  1929. {
  1930. void *block = malloc(size);
  1931. if (show_malloc)
  1932. fprintf(outfile, "malloc %3d %p\n", (int)size, block);
  1933. return block;
  1934. }
  1935. static void new_free(void *block)
  1936. {
  1937. if (show_malloc)
  1938. fprintf(outfile, "free %p\n", block);
  1939. free(block);
  1940. }
  1941. /* For recursion malloc/free, to test stacking calls */
  1942. static void *stack_malloc(size_t size)
  1943. {
  1944. void *block = malloc(size);
  1945. if (show_malloc)
  1946. fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
  1947. return block;
  1948. }
  1949. static void stack_free(void *block)
  1950. {
  1951. if (show_malloc)
  1952. fprintf(outfile, "stack_free %p\n", block);
  1953. free(block);
  1954. }
  1955. /*************************************************
  1956. * Call pcre_fullinfo() *
  1957. *************************************************/
  1958. /* Get one piece of information from the pcre_fullinfo() function. When only
  1959. one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
  1960. value, but the code is defensive.
  1961. Arguments:
  1962. re compiled regex
  1963. study study data
  1964. option PCRE_INFO_xxx option
  1965. ptr where to put the data
  1966. Returns: 0 when OK, < 0 on error
  1967. */
  1968. static int
  1969. new_info(pcre *re, pcre_extra *study, int option, void *ptr)
  1970. {
  1971. int rc;
  1972. if (pcre_mode == PCRE32_MODE)
  1973. #ifdef SUPPORT_PCRE32
  1974. rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
  1975. #else
  1976. rc = PCRE_ERROR_BADMODE;
  1977. #endif
  1978. else if (pcre_mode == PCRE16_MODE)
  1979. #ifdef SUPPORT_PCRE16
  1980. rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
  1981. #else
  1982. rc = PCRE_ERROR_BADMODE;
  1983. #endif
  1984. else
  1985. #ifdef SUPPORT_PCRE8
  1986. rc = pcre_fullinfo(re, study, option, ptr);
  1987. #else
  1988. rc = PCRE_ERROR_BADMODE;
  1989. #endif
  1990. if (rc < 0 && rc != PCRE_ERROR_UNSET)
  1991. {
  1992. fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
  1993. pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
  1994. if (rc == PCRE_ERROR_BADMODE)
  1995. fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
  1996. "%d-bit mode\n", 8 * CHAR_SIZE,
  1997. 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
  1998. }
  1999. return rc;
  2000. }
  2001. /*************************************************
  2002. * Swap byte functions *
  2003. *************************************************/
  2004. /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
  2005. value, respectively.
  2006. Arguments:
  2007. value any number
  2008. Returns: the byte swapped value
  2009. */
  2010. static pcre_uint32
  2011. swap_uint32(pcre_uint32 value)
  2012. {
  2013. return ((value & 0x000000ff) << 24) |
  2014. ((value & 0x0000ff00) << 8) |
  2015. ((value & 0x00ff0000) >> 8) |
  2016. (value >> 24);
  2017. }
  2018. static pcre_uint16
  2019. swap_uint16(pcre_uint16 value)
  2020. {
  2021. return (value >> 8) | (value << 8);
  2022. }
  2023. /*************************************************
  2024. * Flip bytes in a compiled pattern *
  2025. *************************************************/
  2026. /* This function is called if the 'F' option was present on a pattern that is
  2027. to be written to a file. We flip the bytes of all the integer fields in the
  2028. regex data block and the study block. In 16-bit mode this also flips relevant
  2029. bytes in the pattern itself. This is to make it possible to test PCRE's
  2030. ability to reload byte-flipped patterns, e.g. those compiled on a different
  2031. architecture. */
  2032. #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
  2033. static void
  2034. regexflip8_or_16(pcre *ere, pcre_extra *extra)
  2035. {
  2036. real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
  2037. #ifdef SUPPORT_PCRE16
  2038. int op;
  2039. pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
  2040. int length = re->name_count * re->name_entry_size;
  2041. #ifdef SUPPORT_UTF
  2042. BOOL utf = (re->options & PCRE_UTF16) != 0;
  2043. BOOL utf16_char = FALSE;
  2044. #endif /* SUPPORT_UTF */
  2045. #endif /* SUPPORT_PCRE16 */
  2046. /* Always flip the bytes in the main data block and study blocks. */
  2047. re->magic_number = REVERSED_MAGIC_NUMBER;
  2048. re->size = swap_uint32(re->size);
  2049. re->options = swap_uint32(re->options);
  2050. re->flags = swap_uint32(re->flags);
  2051. re->limit_match = swap_uint32(re->limit_match);
  2052. re->limit_recursion = swap_uint32(re->limit_recursion);
  2053. re->first_char = swap_uint16(re->first_char);
  2054. re->req_char = swap_uint16(re->req_char);
  2055. re->max_lookbehind = swap_uint16(re->max_lookbehind);
  2056. re->top_bracket = swap_uint16(re->top_bracket);
  2057. re->top_backref = swap_uint16(re->top_backref);
  2058. re->name_table_offset = swap_uint16(re->name_table_offset);
  2059. re->name_entry_size = swap_uint16(re->name_entry_size);
  2060. re->name_count = swap_uint16(re->name_count);
  2061. re->ref_count = swap_uint16(re->ref_count);
  2062. if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
  2063. {
  2064. pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
  2065. rsd->size = swap_uint32(rsd->size);
  2066. rsd->flags = swap_uint32(rsd->flags);
  2067. rsd->minlength = swap_uint32(rsd->minlength);
  2068. }
  2069. /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
  2070. in the name table, if present, and then in the pattern itself. */
  2071. #ifdef SUPPORT_PCRE16
  2072. if (pcre_mode != PCRE16_MODE) return;
  2073. while(TRUE)
  2074. {
  2075. /* Swap previous characters. */
  2076. while (length-- > 0)
  2077. {
  2078. *ptr = swap_uint16(*ptr);
  2079. ptr++;
  2080. }
  2081. #ifdef SUPPORT_UTF
  2082. if (utf16_char)
  2083. {
  2084. if ((ptr[-1] & 0xfc00) == 0xd800)
  2085. {
  2086. /* We know that there is only one extra character in UTF-16. */
  2087. *ptr = swap_uint16(*ptr);
  2088. ptr++;
  2089. }
  2090. }
  2091. utf16_char = FALSE;
  2092. #endif /* SUPPORT_UTF */
  2093. /* Get next opcode. */
  2094. length = 0;
  2095. op = *ptr;
  2096. *ptr++ = swap_uint16(op);
  2097. switch (op)
  2098. {
  2099. case OP_END:
  2100. return;
  2101. #ifdef SUPPORT_UTF
  2102. case OP_CHAR:
  2103. case OP_CHARI:
  2104. case OP_NOT:
  2105. case OP_NOTI:
  2106. case OP_STAR:
  2107. case OP_MINSTAR:
  2108. case OP_PLUS:
  2109. case OP_MINPLUS:
  2110. case OP_QUERY:
  2111. case OP_MINQUERY:
  2112. case OP_UPTO:
  2113. case OP_MINUPTO:
  2114. case OP_EXACT:
  2115. case OP_POSSTAR:
  2116. case OP_POSPLUS:
  2117. case OP_POSQUERY:
  2118. case OP_POSUPTO:
  2119. case OP_STARI:
  2120. case OP_MINSTARI:
  2121. case OP_PLUSI:
  2122. case OP_MINPLUSI:
  2123. case OP_QUERYI:
  2124. case OP_MINQUERYI:
  2125. case OP_UPTOI:
  2126. case OP_MINUPTOI:
  2127. case OP_EXACTI:
  2128. case OP_POSSTARI:
  2129. case OP_POSPLUSI:
  2130. case OP_POSQUERYI:
  2131. case OP_POSUPTOI:
  2132. case OP_NOTSTAR:
  2133. case OP_NOTMINSTAR:
  2134. case OP_NOTPLUS:
  2135. case OP_NOTMINPLUS:
  2136. case OP_NOTQUERY:
  2137. case OP_NOTMINQUERY:
  2138. case OP_NOTUPTO:
  2139. case OP_NOTMINUPTO:
  2140. case OP_NOTEXACT:
  2141. case OP_NOTPOSSTAR:
  2142. case OP_NOTPOSPLUS:
  2143. case OP_NOTPOSQUERY:
  2144. case OP_NOTPOSUPTO:
  2145. case OP_NOTSTARI:
  2146. case OP_NOTMINSTARI:
  2147. case OP_NOTPLUSI:
  2148. case OP_NOTMINPLUSI:
  2149. case OP_NOTQUERYI:
  2150. case OP_NOTMINQUERYI:
  2151. case OP_NOTUPTOI:
  2152. case OP_NOTMINUPTOI:
  2153. case OP_NOTEXACTI:
  2154. case OP_NOTPOSSTARI:
  2155. case OP_NOTPOSPLUSI:
  2156. case OP_NOTPOSQUERYI:
  2157. case OP_NOTPOSUPTOI:
  2158. if (utf) utf16_char = TRUE;
  2159. #endif
  2160. /* Fall through. */
  2161. default:
  2162. length = OP_lengths16[op] - 1;
  2163. break;
  2164. case OP_CLASS:
  2165. case OP_NCLASS:
  2166. /* Skip the character bit map. */
  2167. ptr += 32/sizeof(pcre_uint16);
  2168. length = 0;
  2169. break;
  2170. case OP_XCLASS:
  2171. /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
  2172. if (LINK_SIZE > 1)
  2173. length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
  2174. - (1 + LINK_SIZE + 1));
  2175. else
  2176. length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
  2177. /* Reverse the size of the XCLASS instance. */
  2178. *ptr = swap_uint16(*ptr);
  2179. ptr++;
  2180. if (LINK_SIZE > 1)
  2181. {
  2182. *ptr = swap_uint16(*ptr);
  2183. ptr++;
  2184. }
  2185. op = *ptr;
  2186. *ptr = swap_uint16(op);
  2187. ptr++;
  2188. if ((op & XCL_MAP) != 0)
  2189. {
  2190. /* Skip the character bit map. */
  2191. ptr += 32/sizeof(pcre_uint16);
  2192. length -= 32/sizeof(pcre_uint16);
  2193. }
  2194. break;
  2195. }
  2196. }
  2197. /* Control should never reach here in 16 bit mode. */
  2198. #endif /* SUPPORT_PCRE16 */
  2199. }
  2200. #endif /* SUPPORT_PCRE[8|16] */
  2201. #if defined SUPPORT_PCRE32
  2202. static void
  2203. regexflip_32(pcre *ere, pcre_extra *extra)
  2204. {
  2205. real_pcre32 *re = (real_pcre32 *)ere;
  2206. int op;
  2207. pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
  2208. int length = re->name_count * re->name_entry_size;
  2209. /* Always flip the bytes in the main data block and study blocks. */
  2210. re->magic_number = REVERSED_MAGIC_NUMBER;
  2211. re->size = swap_uint32(re->size);
  2212. re->options = swap_uint32(re->options);
  2213. re->flags = swap_uint32(re->flags);
  2214. re->limit_match = swap_uint32(re->limit_match);
  2215. re->limit_recursion = swap_uint32(re->limit_recursion);
  2216. re->first_char = swap_uint32(re->first_char);
  2217. re->req_char = swap_uint32(re->req_char);
  2218. re->max_lookbehind = swap_uint16(re->max_lookbehind);
  2219. re->top_bracket = swap_uint16(re->top_bracket);
  2220. re->top_backref = swap_uint16(re->top_backref);
  2221. re->name_table_offset = swap_uint16(re->name_table_offset);
  2222. re->name_entry_size = swap_uint16(re->name_entry_size);
  2223. re->name_count = swap_uint16(re->name_count);
  2224. re->ref_count = swap_uint16(re->ref_count);
  2225. if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
  2226. {
  2227. pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
  2228. rsd->size = swap_uint32(rsd->size);
  2229. rsd->flags = swap_uint32(rsd->flags);
  2230. rsd->minlength = swap_uint32(rsd->minlength);
  2231. }
  2232. /* In 32-bit mode we must swap bytes in the name table, if present, and then in
  2233. the pattern itself. */
  2234. while(TRUE)
  2235. {
  2236. /* Swap previous characters. */
  2237. while (length-- > 0)
  2238. {
  2239. *ptr = swap_uint32(*ptr);
  2240. ptr++;
  2241. }
  2242. /* Get next opcode. */
  2243. length = 0;
  2244. op = *ptr;
  2245. *ptr++ = swap_uint32(op);
  2246. switch (op)
  2247. {
  2248. case OP_END:
  2249. return;
  2250. default:
  2251. length = OP_lengths32[op] - 1;
  2252. break;
  2253. case OP_CLASS:
  2254. case OP_NCLASS:
  2255. /* Skip the character bit map. */
  2256. ptr += 32/sizeof(pcre_uint32);
  2257. length = 0;
  2258. break;
  2259. case OP_XCLASS:
  2260. /* LINK_SIZE can only be 1 in 32-bit mode. */
  2261. length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
  2262. /* Reverse the size of the XCLASS instance. */
  2263. *ptr = swap_uint32(*ptr);
  2264. ptr++;
  2265. op = *ptr;
  2266. *ptr = swap_uint32(op);
  2267. ptr++;
  2268. if ((op & XCL_MAP) != 0)
  2269. {
  2270. /* Skip the character bit map. */
  2271. ptr += 32/sizeof(pcre_uint32);
  2272. length -= 32/sizeof(pcre_uint32);
  2273. }
  2274. break;
  2275. }
  2276. }
  2277. /* Control should never reach here in 32 bit mode. */
  2278. }
  2279. #endif /* SUPPORT_PCRE32 */
  2280. static void
  2281. regexflip(pcre *ere, pcre_extra *extra)
  2282. {
  2283. #if defined SUPPORT_PCRE32
  2284. if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
  2285. regexflip_32(ere, extra);
  2286. #endif
  2287. #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
  2288. if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
  2289. regexflip8_or_16(ere, extra);
  2290. #endif
  2291. }
  2292. /*************************************************
  2293. * Check match or recursion limit *
  2294. *************************************************/
  2295. static int
  2296. check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
  2297. int start_offset, int options, int *use_offsets, int use_size_offsets,
  2298. int flag, unsigned long int *limit, int errnumber, const char *msg)
  2299. {
  2300. int count;
  2301. int min = 0;
  2302. int mid = 64;
  2303. int max = -1;
  2304. extra->flags |= flag;
  2305. for (;;)
  2306. {
  2307. *limit = mid;
  2308. PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
  2309. use_offsets, use_size_offsets);
  2310. if (count == errnumber)
  2311. {
  2312. /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
  2313. min = mid;
  2314. mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
  2315. }
  2316. else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
  2317. count == PCRE_ERROR_PARTIAL)
  2318. {
  2319. if (mid == min + 1)
  2320. {
  2321. fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
  2322. break;
  2323. }
  2324. /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
  2325. max = mid;
  2326. mid = (min + mid)/2;
  2327. }
  2328. else break; /* Some other error */
  2329. }
  2330. extra->flags &= ~flag;
  2331. return count;
  2332. }
  2333. /*************************************************
  2334. * Case-independent strncmp() function *
  2335. *************************************************/
  2336. /*
  2337. Arguments:
  2338. s first string
  2339. t second string
  2340. n number of characters to compare
  2341. Returns: < 0, = 0, or > 0, according to the comparison
  2342. */
  2343. static int
  2344. strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
  2345. {
  2346. while (n--)
  2347. {
  2348. int c = tolower(*s++) - tolower(*t++);
  2349. if (c) return c;
  2350. }
  2351. return 0;
  2352. }
  2353. /*************************************************
  2354. * Check multicharacter option *
  2355. *************************************************/
  2356. /* This is used both at compile and run-time to check for <xxx> escapes. Print
  2357. a message and return 0 if there is no match.
  2358. Arguments:
  2359. p points after the leading '<'
  2360. f file for error message
  2361. nl TRUE to check only for newline settings
  2362. stype "modifier" or "escape sequence"
  2363. Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
  2364. */
  2365. static int
  2366. check_mc_option(pcre_uint8 *p, FILE *f, BOOL nl, const char *stype)
  2367. {
  2368. if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
  2369. if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
  2370. if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
  2371. if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
  2372. if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
  2373. if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
  2374. if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
  2375. if (!nl)
  2376. {
  2377. if (strncmpic(p, (pcre_uint8 *)"JS>", 3) == 0) return PCRE_JAVASCRIPT_COMPAT;
  2378. }
  2379. fprintf(f, "Unknown %s at: <%s\n", stype, p);
  2380. return 0;
  2381. }
  2382. /*************************************************
  2383. * Usage function *
  2384. *************************************************/
  2385. static void
  2386. usage(void)
  2387. {
  2388. printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
  2389. printf("Input and output default to stdin and stdout.\n");
  2390. #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
  2391. printf("If input is a terminal, readline() is used to read from it.\n");
  2392. #else
  2393. printf("This version of pcretest is not linked with readline().\n");
  2394. #endif
  2395. printf("\nOptions:\n");
  2396. #ifdef SUPPORT_PCRE16
  2397. printf(" -16 use the 16-bit library\n");
  2398. #endif
  2399. #ifdef SUPPORT_PCRE32
  2400. printf(" -32 use the 32-bit library\n");
  2401. #endif
  2402. printf(" -b show compiled code\n");
  2403. printf(" -C show PCRE compile-time options and exit\n");
  2404. printf(" -C arg show a specific compile-time option and exit\n");
  2405. printf(" with its value if numeric (else 0). The arg can be:\n");
  2406. printf(" linksize internal link size [2, 3, 4]\n");
  2407. printf(" pcre8 8 bit library support enabled [0, 1]\n");
  2408. printf(" pcre16 16 bit library support enabled [0, 1]\n");
  2409. printf(" pcre32 32 bit library support enabled [0, 1]\n");
  2410. printf(" utf Unicode Transformation Format supported [0, 1]\n");
  2411. printf(" ucp Unicode Properties supported [0, 1]\n");
  2412. printf(" jit Just-in-time compiler supported [0, 1]\n");
  2413. printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
  2414. printf(" bsr \\R type [ANYCRLF, ANY]\n");
  2415. printf(" -d debug: show compiled code and information (-b and -i)\n");
  2416. #if !defined NODFA
  2417. printf(" -dfa force DFA matching for all subjects\n");
  2418. #endif
  2419. printf(" -help show usage information\n");
  2420. printf(" -i show information about compiled patterns\n"
  2421. " -M find MATCH_LIMIT minimum for each subject\n"
  2422. " -m output memory used information\n"
  2423. " -O set PCRE_NO_AUTO_POSSESS on each pattern\n"
  2424. " -o <n> set size of offsets vector to <n>\n");
  2425. #if !defined NOPOSIX
  2426. printf(" -p use POSIX interface\n");
  2427. #endif
  2428. printf(" -q quiet: do not output PCRE version number at start\n");
  2429. printf(" -S <n> set stack size to <n> megabytes\n");
  2430. printf(" -s force each pattern to be studied at basic level\n"
  2431. " -s+ force each pattern to be studied, using JIT if available\n"
  2432. " -s++ ditto, verifying when JIT was actually used\n"
  2433. " -s+n force each pattern to be studied, using JIT if available,\n"
  2434. " where 1 <= n <= 7 selects JIT options\n"
  2435. " -s++n ditto, verifying when JIT was actually used\n"
  2436. " -t time compilation and execution\n");
  2437. printf(" -t <n> time compilation and execution, repeating <n> times\n");
  2438. printf(" -tm time execution (matching) only\n");
  2439. printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
  2440. printf(" -T same as -t, but show total times at the end\n");
  2441. printf(" -TM same as -tm, but show total time at the end\n");
  2442. }
  2443. /*************************************************
  2444. * Main Program *
  2445. *************************************************/
  2446. /* Read lines from named file or stdin and write to named file or stdout; lines
  2447. consist of a regular expression, in delimiters and optionally followed by
  2448. options, followed by a set of test data, terminated by an empty line. */
  2449. int main(int argc, char **argv)
  2450. {
  2451. FILE *infile = stdin;
  2452. const char *version;
  2453. int options = 0;
  2454. int study_options = 0;
  2455. int default_find_match_limit = FALSE;
  2456. pcre_uint32 default_options = 0;
  2457. int op = 1;
  2458. int timeit = 0;
  2459. int timeitm = 0;
  2460. int showtotaltimes = 0;
  2461. int showinfo = 0;
  2462. int showstore = 0;
  2463. int force_study = -1;
  2464. int force_study_options = 0;
  2465. int quiet = 0;
  2466. int size_offsets = 45;
  2467. int size_offsets_max;
  2468. int *offsets = NULL;
  2469. int debug = 0;
  2470. int done = 0;
  2471. int all_use_dfa = 0;
  2472. int verify_jit = 0;
  2473. int yield = 0;
  2474. int stack_size;
  2475. pcre_uint8 *dbuffer = NULL;
  2476. pcre_uint8 lockout[24] = { 0 };
  2477. size_t dbuffer_size = 1u << 14;
  2478. clock_t total_compile_time = 0;
  2479. clock_t total_study_time = 0;
  2480. clock_t total_match_time = 0;
  2481. #if !defined NOPOSIX
  2482. int posix = 0;
  2483. #endif
  2484. #if !defined NODFA
  2485. int *dfa_workspace = NULL;
  2486. #endif
  2487. pcre_jit_stack *jit_stack = NULL;
  2488. /* These vectors store, end-to-end, a list of zero-terminated captured
  2489. substring names, each list itself being terminated by an empty name. Assume
  2490. that 1024 is plenty long enough for the few names we'll be testing. It is
  2491. easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
  2492. for the actual memory, to ensure alignment. */
  2493. pcre_uint32 copynames[1024];
  2494. pcre_uint32 getnames[1024];
  2495. #ifdef SUPPORT_PCRE32
  2496. pcre_uint32 *cn32ptr;
  2497. pcre_uint32 *gn32ptr;
  2498. #endif
  2499. #ifdef SUPPORT_PCRE16
  2500. pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
  2501. pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
  2502. pcre_uint16 *cn16ptr;
  2503. pcre_uint16 *gn16ptr;
  2504. #endif
  2505. #ifdef SUPPORT_PCRE8
  2506. pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
  2507. pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
  2508. pcre_uint8 *cn8ptr;
  2509. pcre_uint8 *gn8ptr;
  2510. #endif
  2511. /* Get buffers from malloc() so that valgrind will check their misuse when
  2512. debugging. They grow automatically when very long lines are read. The 16-
  2513. and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
  2514. buffer = (pcre_uint8 *)malloc(buffer_size);
  2515. pbuffer = (pcre_uint8 *)malloc(buffer_size);
  2516. /* The outfile variable is static so that new_malloc can use it. */
  2517. outfile = stdout;
  2518. /* The following _setmode() stuff is some Windows magic that tells its runtime
  2519. library to translate CRLF into a single LF character. At least, that's what
  2520. I've been told: never having used Windows I take this all on trust. Originally
  2521. it set 0x8000, but then I was advised that _O_BINARY was better. */
  2522. #if defined(_WIN32) || defined(WIN32)
  2523. _setmode( _fileno( stdout ), _O_BINARY );
  2524. #endif
  2525. /* Get the version number: both pcre_version() and pcre16_version() give the
  2526. same answer. We just need to ensure that we call one that is available. */
  2527. #if defined SUPPORT_PCRE8
  2528. version = pcre_version();
  2529. #elif defined SUPPORT_PCRE16
  2530. version = pcre16_version();
  2531. #elif defined SUPPORT_PCRE32
  2532. version = pcre32_version();
  2533. #endif
  2534. /* Scan options */
  2535. while (argc > 1 && argv[op][0] == '-')
  2536. {
  2537. pcre_uint8 *endptr;
  2538. char *arg = argv[op];
  2539. if (strcmp(arg, "-m") == 0) showstore = 1;
  2540. else if (strcmp(arg, "-s") == 0) force_study = 0;
  2541. else if (strncmp(arg, "-s+", 3) == 0)
  2542. {
  2543. arg += 3;
  2544. if (*arg == '+') { arg++; verify_jit = TRUE; }
  2545. force_study = 1;
  2546. if (*arg == 0)
  2547. force_study_options = jit_study_bits[6];
  2548. else if (*arg >= '1' && *arg <= '7')
  2549. force_study_options = jit_study_bits[*arg - '1'];
  2550. else goto BAD_ARG;
  2551. }
  2552. else if (strcmp(arg, "-8") == 0)
  2553. {
  2554. #ifdef SUPPORT_PCRE8
  2555. pcre_mode = PCRE8_MODE;
  2556. #else
  2557. printf("** This version of PCRE was built without 8-bit support\n");
  2558. exit(1);
  2559. #endif
  2560. }
  2561. else if (strcmp(arg, "-16") == 0)
  2562. {
  2563. #ifdef SUPPORT_PCRE16
  2564. pcre_mode = PCRE16_MODE;
  2565. #else
  2566. printf("** This version of PCRE was built without 16-bit support\n");
  2567. exit(1);
  2568. #endif
  2569. }
  2570. else if (strcmp(arg, "-32") == 0)
  2571. {
  2572. #ifdef SUPPORT_PCRE32
  2573. pcre_mode = PCRE32_MODE;
  2574. #else
  2575. printf("** This version of PCRE was built without 32-bit support\n");
  2576. exit(1);
  2577. #endif
  2578. }
  2579. else if (strcmp(arg, "-q") == 0) quiet = 1;
  2580. else if (strcmp(arg, "-b") == 0) debug = 1;
  2581. else if (strcmp(arg, "-i") == 0) showinfo = 1;
  2582. else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
  2583. else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
  2584. else if (strcmp(arg, "-O") == 0) default_options |= PCRE_NO_AUTO_POSSESS;
  2585. #if !defined NODFA
  2586. else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
  2587. #endif
  2588. else if (strcmp(arg, "-o") == 0 && argc > 2 &&
  2589. ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
  2590. *endptr == 0))
  2591. {
  2592. op++;
  2593. argc--;
  2594. }
  2595. else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
  2596. strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
  2597. {
  2598. int temp;
  2599. int both = arg[2] == 0;
  2600. showtotaltimes = arg[1] == 'T';
  2601. if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
  2602. *endptr == 0))
  2603. {
  2604. timeitm = temp;
  2605. op++;
  2606. argc--;
  2607. }
  2608. else timeitm = LOOPREPEAT;
  2609. if (both) timeit = timeitm;
  2610. }
  2611. else if (strcmp(arg, "-S") == 0 && argc > 2 &&
  2612. ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
  2613. *endptr == 0))
  2614. {
  2615. #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
  2616. printf("PCRE: -S not supported on this OS\n");
  2617. exit(1);
  2618. #else
  2619. int rc;
  2620. struct rlimit rlim;
  2621. getrlimit(RLIMIT_STACK, &rlim);
  2622. rlim.rlim_cur = stack_size * 1024 * 1024;
  2623. rc = setrlimit(RLIMIT_STACK, &rlim);
  2624. if (rc != 0)
  2625. {
  2626. printf("PCRE: setrlimit() failed with error %d\n", rc);
  2627. exit(1);
  2628. }
  2629. op++;
  2630. argc--;
  2631. #endif
  2632. }
  2633. #if !defined NOPOSIX
  2634. else if (strcmp(arg, "-p") == 0) posix = 1;
  2635. #endif
  2636. else if (strcmp(arg, "-C") == 0)
  2637. {
  2638. int rc;
  2639. unsigned long int lrc;
  2640. if (argc > 2)
  2641. {
  2642. if (strcmp(argv[op + 1], "linksize") == 0)
  2643. {
  2644. (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
  2645. printf("%d\n", rc);
  2646. yield = rc;
  2647. #ifdef __VMS
  2648. vms_setsymbol("LINKSIZE",0,yield );
  2649. #endif
  2650. }
  2651. else if (strcmp(argv[op + 1], "pcre8") == 0)
  2652. {
  2653. #ifdef SUPPORT_PCRE8
  2654. printf("1\n");
  2655. yield = 1;
  2656. #else
  2657. printf("0\n");
  2658. yield = 0;
  2659. #endif
  2660. #ifdef __VMS
  2661. vms_setsymbol("PCRE8",0,yield );
  2662. #endif
  2663. }
  2664. else if (strcmp(argv[op + 1], "pcre16") == 0)
  2665. {
  2666. #ifdef SUPPORT_PCRE16
  2667. printf("1\n");
  2668. yield = 1;
  2669. #else
  2670. printf("0\n");
  2671. yield = 0;
  2672. #endif
  2673. #ifdef __VMS
  2674. vms_setsymbol("PCRE16",0,yield );
  2675. #endif
  2676. }
  2677. else if (strcmp(argv[op + 1], "pcre32") == 0)
  2678. {
  2679. #ifdef SUPPORT_PCRE32
  2680. printf("1\n");
  2681. yield = 1;
  2682. #else
  2683. printf("0\n");
  2684. yield = 0;
  2685. #endif
  2686. #ifdef __VMS
  2687. vms_setsymbol("PCRE32",0,yield );
  2688. #endif
  2689. }
  2690. else if (strcmp(argv[op + 1], "utf") == 0)
  2691. {
  2692. #ifdef SUPPORT_PCRE8
  2693. if (pcre_mode == PCRE8_MODE)
  2694. (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
  2695. #endif
  2696. #ifdef SUPPORT_PCRE16
  2697. if (pcre_mode == PCRE16_MODE)
  2698. (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
  2699. #endif
  2700. #ifdef SUPPORT_PCRE32
  2701. if (pcre_mode == PCRE32_MODE)
  2702. (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
  2703. #endif
  2704. printf("%d\n", rc);
  2705. yield = rc;
  2706. #ifdef __VMS
  2707. vms_setsymbol("UTF",0,yield );
  2708. #endif
  2709. }
  2710. else if (strcmp(argv[op + 1], "ucp") == 0)
  2711. {
  2712. (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
  2713. printf("%d\n", rc);
  2714. yield = rc;
  2715. }
  2716. else if (strcmp(argv[op + 1], "jit") == 0)
  2717. {
  2718. (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
  2719. printf("%d\n", rc);
  2720. yield = rc;
  2721. }
  2722. else if (strcmp(argv[op + 1], "newline") == 0)
  2723. {
  2724. (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
  2725. print_newline_config(rc, TRUE);
  2726. }
  2727. else if (strcmp(argv[op + 1], "bsr") == 0)
  2728. {
  2729. (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
  2730. printf("%s\n", rc? "ANYCRLF" : "ANY");
  2731. }
  2732. else if (strcmp(argv[op + 1], "ebcdic") == 0)
  2733. {
  2734. #ifdef EBCDIC
  2735. printf("1\n");
  2736. yield = 1;
  2737. #else
  2738. printf("0\n");
  2739. #endif
  2740. }
  2741. else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
  2742. {
  2743. #ifdef EBCDIC
  2744. printf("0x%02x\n", CHAR_LF);
  2745. #else
  2746. printf("0\n");
  2747. #endif
  2748. }
  2749. else
  2750. {
  2751. printf("Unknown -C option: %s\n", argv[op + 1]);
  2752. }
  2753. goto EXIT;
  2754. }
  2755. /* No argument for -C: output all configuration information. */
  2756. printf("PCRE version %s\n", version);
  2757. printf("Compiled with\n");
  2758. #ifdef EBCDIC
  2759. printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
  2760. #endif
  2761. /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
  2762. are set, either both UTFs are supported or both are not supported. */
  2763. #ifdef SUPPORT_PCRE8
  2764. printf(" 8-bit support\n");
  2765. (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
  2766. printf (" %sUTF-8 support\n", rc ? "" : "No ");
  2767. #endif
  2768. #ifdef SUPPORT_PCRE16
  2769. printf(" 16-bit support\n");
  2770. (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
  2771. printf (" %sUTF-16 support\n", rc ? "" : "No ");
  2772. #endif
  2773. #ifdef SUPPORT_PCRE32
  2774. printf(" 32-bit support\n");
  2775. (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
  2776. printf (" %sUTF-32 support\n", rc ? "" : "No ");
  2777. #endif
  2778. (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
  2779. printf(" %sUnicode properties support\n", rc? "" : "No ");
  2780. (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
  2781. if (rc)
  2782. {
  2783. const char *arch;
  2784. (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
  2785. printf(" Just-in-time compiler support: %s\n", arch);
  2786. }
  2787. else
  2788. printf(" No just-in-time compiler support\n");
  2789. (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
  2790. print_newline_config(rc, FALSE);
  2791. (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
  2792. printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
  2793. "all Unicode newlines");
  2794. (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
  2795. printf(" Internal link size = %d\n", rc);
  2796. (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
  2797. printf(" POSIX malloc threshold = %d\n", rc);
  2798. (void)PCRE_CONFIG(PCRE_CONFIG_PARENS_LIMIT, &lrc);
  2799. printf(" Parentheses nest limit = %ld\n", lrc);
  2800. (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
  2801. printf(" Default match limit = %ld\n", lrc);
  2802. (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
  2803. printf(" Default recursion depth limit = %ld\n", lrc);
  2804. (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
  2805. printf(" Match recursion uses %s", rc? "stack" : "heap");
  2806. if (showstore)
  2807. {
  2808. PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
  2809. printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
  2810. }
  2811. printf("\n");
  2812. goto EXIT;
  2813. }
  2814. else if (strcmp(arg, "-help") == 0 ||
  2815. strcmp(arg, "--help") == 0)
  2816. {
  2817. usage();
  2818. goto EXIT;
  2819. }
  2820. else
  2821. {
  2822. BAD_ARG:
  2823. printf("** Unknown or malformed option %s\n", arg);
  2824. usage();
  2825. yield = 1;
  2826. goto EXIT;
  2827. }
  2828. op++;
  2829. argc--;
  2830. }
  2831. /* Get the store for the offsets vector, and remember what it was */
  2832. size_offsets_max = size_offsets;
  2833. offsets = (int *)malloc(size_offsets_max * sizeof(int));
  2834. if (offsets == NULL)
  2835. {
  2836. printf("** Failed to get %d bytes of memory for offsets vector\n",
  2837. (int)(size_offsets_max * sizeof(int)));
  2838. yield = 1;
  2839. goto EXIT;
  2840. }
  2841. /* Sort out the input and output files */
  2842. if (argc > 1)
  2843. {
  2844. infile = fopen(argv[op], INPUT_MODE);
  2845. if (infile == NULL)
  2846. {
  2847. printf("** Failed to open %s\n", argv[op]);
  2848. yield = 1;
  2849. goto EXIT;
  2850. }
  2851. }
  2852. if (argc > 2)
  2853. {
  2854. outfile = fopen(argv[op+1], OUTPUT_MODE);
  2855. if (outfile == NULL)
  2856. {
  2857. printf("** Failed to open %s\n", argv[op+1]);
  2858. yield = 1;
  2859. goto EXIT;
  2860. }
  2861. }
  2862. /* Set alternative malloc function */
  2863. #ifdef SUPPORT_PCRE8
  2864. pcre_malloc = new_malloc;
  2865. pcre_free = new_free;
  2866. pcre_stack_malloc = stack_malloc;
  2867. pcre_stack_free = stack_free;
  2868. #endif
  2869. #ifdef SUPPORT_PCRE16
  2870. pcre16_malloc = new_malloc;
  2871. pcre16_free = new_free;
  2872. pcre16_stack_malloc = stack_malloc;
  2873. pcre16_stack_free = stack_free;
  2874. #endif
  2875. #ifdef SUPPORT_PCRE32
  2876. pcre32_malloc = new_malloc;
  2877. pcre32_free = new_free;
  2878. pcre32_stack_malloc = stack_malloc;
  2879. pcre32_stack_free = stack_free;
  2880. #endif
  2881. /* Heading line unless quiet */
  2882. if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
  2883. /* Main loop */
  2884. while (!done)
  2885. {
  2886. pcre *re = NULL;
  2887. pcre_extra *extra = NULL;
  2888. #if !defined NOPOSIX /* There are still compilers that require no indent */
  2889. regex_t preg = { NULL, 0, 0} ;
  2890. int do_posix = 0;
  2891. #endif
  2892. const char *error;
  2893. pcre_uint8 *markptr;
  2894. pcre_uint8 *p, *pp, *ppp;
  2895. pcre_uint8 *to_file = NULL;
  2896. const pcre_uint8 *tables = NULL;
  2897. unsigned long int get_options;
  2898. unsigned long int true_size, true_study_size = 0;
  2899. size_t size;
  2900. int do_allcaps = 0;
  2901. int do_mark = 0;
  2902. int do_study = 0;
  2903. int no_force_study = 0;
  2904. int do_debug = debug;
  2905. int do_G = 0;
  2906. int do_g = 0;
  2907. int do_showinfo = showinfo;
  2908. int do_showrest = 0;
  2909. int do_showcaprest = 0;
  2910. int do_flip = 0;
  2911. int erroroffset, len, delimiter, poffset;
  2912. #if !defined NODFA
  2913. int dfa_matched = 0;
  2914. #endif
  2915. use_utf = 0;
  2916. debug_lengths = 1;
  2917. SET_PCRE_STACK_GUARD(NULL);
  2918. if (extend_inputline(infile, buffer, " re> ") == NULL) break;
  2919. if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
  2920. fflush(outfile);
  2921. p = buffer;
  2922. while (isspace(*p)) p++;
  2923. if (*p == 0) continue;
  2924. /* Handle option lock-out setting */
  2925. if (*p == '<' && p[1] == ' ')
  2926. {
  2927. p += 2;
  2928. while (isspace(*p)) p++;
  2929. if (strncmp((char *)p, "forbid ", 7) == 0)
  2930. {
  2931. p += 7;
  2932. while (isspace(*p)) p++;
  2933. pp = lockout;
  2934. while (!isspace(*p) && pp < lockout + sizeof(lockout) - 1)
  2935. *pp++ = *p++;
  2936. *pp = 0;
  2937. }
  2938. else
  2939. {
  2940. printf("** Unrecognized special command '%s'\n", p);
  2941. yield = 1;
  2942. goto EXIT;
  2943. }
  2944. continue;
  2945. }
  2946. /* See if the pattern is to be loaded pre-compiled from a file. */
  2947. if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
  2948. {
  2949. pcre_uint32 magic;
  2950. pcre_uint8 sbuf[8];
  2951. FILE *f;
  2952. p++;
  2953. if (*p == '!')
  2954. {
  2955. do_debug = TRUE;
  2956. do_showinfo = TRUE;
  2957. p++;
  2958. }
  2959. pp = p + (int)strlen((char *)p);
  2960. while (isspace(pp[-1])) pp--;
  2961. *pp = 0;
  2962. f = fopen((char *)p, "rb");
  2963. if (f == NULL)
  2964. {
  2965. fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
  2966. continue;
  2967. }
  2968. if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
  2969. true_size =
  2970. (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
  2971. true_study_size =
  2972. (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
  2973. re = (pcre *)new_malloc(true_size);
  2974. if (re == NULL)
  2975. {
  2976. printf("** Failed to get %d bytes of memory for pcre object\n",
  2977. (int)true_size);
  2978. yield = 1;
  2979. goto EXIT;
  2980. }
  2981. if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
  2982. magic = REAL_PCRE_MAGIC(re);
  2983. if (magic != MAGIC_NUMBER)
  2984. {
  2985. if (swap_uint32(magic) == MAGIC_NUMBER)
  2986. {
  2987. do_flip = 1;
  2988. }
  2989. else
  2990. {
  2991. fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
  2992. new_free(re);
  2993. fclose(f);
  2994. continue;
  2995. }
  2996. }
  2997. /* We hide the byte-invert info for little and big endian tests. */
  2998. fprintf(outfile, "Compiled pattern%s loaded from %s\n",
  2999. do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
  3000. /* Now see if there is any following study data. */
  3001. if (true_study_size != 0)
  3002. {
  3003. pcre_study_data *psd;
  3004. extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
  3005. extra->flags = PCRE_EXTRA_STUDY_DATA;
  3006. psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
  3007. extra->study_data = psd;
  3008. if (fread(psd, 1, true_study_size, f) != true_study_size)
  3009. {
  3010. FAIL_READ:
  3011. fprintf(outfile, "Failed to read data from %s\n", p);
  3012. if (extra != NULL)
  3013. {
  3014. PCRE_FREE_STUDY(extra);
  3015. }
  3016. new_free(re);
  3017. fclose(f);
  3018. continue;
  3019. }
  3020. fprintf(outfile, "Study data loaded from %s\n", p);
  3021. do_study = 1; /* To get the data output if requested */
  3022. }
  3023. else fprintf(outfile, "No study data\n");
  3024. /* Flip the necessary bytes. */
  3025. if (do_flip)
  3026. {
  3027. int rc;
  3028. PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
  3029. if (rc == PCRE_ERROR_BADMODE)
  3030. {
  3031. pcre_uint32 flags_in_host_byte_order;
  3032. if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
  3033. flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
  3034. else
  3035. flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
  3036. /* Simulate the result of the function call below. */
  3037. fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
  3038. pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
  3039. PCRE_INFO_OPTIONS);
  3040. fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
  3041. "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
  3042. new_free(re);
  3043. fclose(f);
  3044. continue;
  3045. }
  3046. }
  3047. /* Need to know if UTF-8 for printing data strings. */
  3048. if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
  3049. {
  3050. new_free(re);
  3051. fclose(f);
  3052. continue;
  3053. }
  3054. use_utf = (get_options & PCRE_UTF8) != 0;
  3055. fclose(f);
  3056. goto SHOW_INFO;
  3057. }
  3058. /* In-line pattern (the usual case). Get the delimiter and seek the end of
  3059. the pattern; if it isn't complete, read more. */
  3060. delimiter = *p++;
  3061. if (isalnum(delimiter) || delimiter == '\\')
  3062. {
  3063. fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
  3064. goto SKIP_DATA;
  3065. }
  3066. pp = p;
  3067. poffset = (int)(p - buffer);
  3068. for(;;)
  3069. {
  3070. while (*pp != 0)
  3071. {
  3072. if (*pp == '\\' && pp[1] != 0) pp++;
  3073. else if (*pp == delimiter) break;
  3074. pp++;
  3075. }
  3076. if (*pp != 0) break;
  3077. if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
  3078. {
  3079. fprintf(outfile, "** Unexpected EOF\n");
  3080. done = 1;
  3081. goto CONTINUE;
  3082. }
  3083. if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
  3084. }
  3085. /* The buffer may have moved while being extended; reset the start of data
  3086. pointer to the correct relative point in the buffer. */
  3087. p = buffer + poffset;
  3088. /* If the first character after the delimiter is backslash, make
  3089. the pattern end with backslash. This is purely to provide a way
  3090. of testing for the error message when a pattern ends with backslash. */
  3091. if (pp[1] == '\\') *pp++ = '\\';
  3092. /* Terminate the pattern at the delimiter, and save a copy of the pattern
  3093. for callouts. */
  3094. *pp++ = 0;
  3095. strcpy((char *)pbuffer, (char *)p);
  3096. /* Look for modifiers and options after the final delimiter. */
  3097. options = default_options;
  3098. study_options = force_study_options;
  3099. log_store = showstore; /* default from command line */
  3100. while (*pp != 0)
  3101. {
  3102. /* Check to see whether this modifier has been locked out for this file.
  3103. This is complicated for the multi-character options that begin with '<'.
  3104. If there is no '>' in the lockout string, all multi-character modifiers are
  3105. locked out. */
  3106. if (strchr((char *)lockout, *pp) != NULL)
  3107. {
  3108. if (*pp == '<' && strchr((char *)lockout, '>') != NULL)
  3109. {
  3110. int x = check_mc_option(pp+1, outfile, FALSE, "modifier");
  3111. if (x == 0) goto SKIP_DATA;
  3112. for (ppp = lockout; *ppp != 0; ppp++)
  3113. {
  3114. if (*ppp == '<')
  3115. {
  3116. int y = check_mc_option(ppp+1, outfile, FALSE, "modifier");
  3117. if (y == 0)
  3118. {
  3119. printf("** Error in modifier forbid data - giving up.\n");
  3120. yield = 1;
  3121. goto EXIT;
  3122. }
  3123. if (x == y)
  3124. {
  3125. ppp = pp;
  3126. while (*ppp != '>') ppp++;
  3127. printf("** The %.*s modifier is locked out - giving up.\n",
  3128. (int)(ppp - pp + 1), pp);
  3129. yield = 1;
  3130. goto EXIT;
  3131. }
  3132. }
  3133. }
  3134. }
  3135. /* The single-character modifiers are straightforward. */
  3136. else
  3137. {
  3138. printf("** The /%c modifier is locked out - giving up.\n", *pp);
  3139. yield = 1;
  3140. goto EXIT;
  3141. }
  3142. }
  3143. /* The modifier is not locked out; handle it. */
  3144. switch (*pp++)
  3145. {
  3146. case 'f': options |= PCRE_FIRSTLINE; break;
  3147. case 'g': do_g = 1; break;
  3148. case 'i': options |= PCRE_CASELESS; break;
  3149. case 'm': options |= PCRE_MULTILINE; break;
  3150. case 's': options |= PCRE_DOTALL; break;
  3151. case 'x': options |= PCRE_EXTENDED; break;
  3152. case '+':
  3153. if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
  3154. break;
  3155. case '=': do_allcaps = 1; break;
  3156. case 'A': options |= PCRE_ANCHORED; break;
  3157. case 'B': do_debug = 1; break;
  3158. case 'C': options |= PCRE_AUTO_CALLOUT; break;
  3159. case 'D': do_debug = do_showinfo = 1; break;
  3160. case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
  3161. case 'F': do_flip = 1; break;
  3162. case 'G': do_G = 1; break;
  3163. case 'I': do_showinfo = 1; break;
  3164. case 'J': options |= PCRE_DUPNAMES; break;
  3165. case 'K': do_mark = 1; break;
  3166. case 'M': log_store = 1; break;
  3167. case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
  3168. case 'O': options |= PCRE_NO_AUTO_POSSESS; break;
  3169. #if !defined NOPOSIX
  3170. case 'P': do_posix = 1; break;
  3171. #endif
  3172. case 'Q':
  3173. switch (*pp)
  3174. {
  3175. case '0':
  3176. case '1':
  3177. stack_guard_return = *pp++ - '0';
  3178. break;
  3179. default:
  3180. fprintf(outfile, "** Missing 0 or 1 after /Q\n");
  3181. goto SKIP_DATA;
  3182. }
  3183. SET_PCRE_STACK_GUARD(stack_guard);
  3184. break;
  3185. case 'S':
  3186. do_study = 1;
  3187. for (;;)
  3188. {
  3189. switch (*pp++)
  3190. {
  3191. case 'S':
  3192. do_study = 0;
  3193. no_force_study = 1;
  3194. break;
  3195. case '!':
  3196. study_options |= PCRE_STUDY_EXTRA_NEEDED;
  3197. break;
  3198. case '+':
  3199. if (*pp == '+')
  3200. {
  3201. verify_jit = TRUE;
  3202. pp++;
  3203. }
  3204. if (*pp >= '1' && *pp <= '7')
  3205. study_options |= jit_study_bits[*pp++ - '1'];
  3206. else
  3207. study_options |= jit_study_bits[6];
  3208. break;
  3209. case '-':
  3210. study_options &= ~PCRE_STUDY_ALLJIT;
  3211. break;
  3212. default:
  3213. pp--;
  3214. goto ENDLOOP;
  3215. }
  3216. }
  3217. ENDLOOP:
  3218. break;
  3219. case 'U': options |= PCRE_UNGREEDY; break;
  3220. case 'W': options |= PCRE_UCP; break;
  3221. case 'X': options |= PCRE_EXTRA; break;
  3222. case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
  3223. case 'Z': debug_lengths = 0; break;
  3224. case '8': options |= PCRE_UTF8; use_utf = 1; break;
  3225. case '9': options |= PCRE_NEVER_UTF; break;
  3226. case '?': options |= PCRE_NO_UTF8_CHECK; break;
  3227. case 'T':
  3228. switch (*pp++)
  3229. {
  3230. case '0': tables = tables0; break;
  3231. case '1': tables = tables1; break;
  3232. case '\r':
  3233. case '\n':
  3234. case ' ':
  3235. case 0:
  3236. fprintf(outfile, "** Missing table number after /T\n");
  3237. goto SKIP_DATA;
  3238. default:
  3239. fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
  3240. goto SKIP_DATA;
  3241. }
  3242. break;
  3243. case 'L':
  3244. ppp = pp;
  3245. /* The '\r' test here is so that it works on Windows. */
  3246. /* The '0' test is just in case this is an unterminated line. */
  3247. while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
  3248. *ppp = 0;
  3249. if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
  3250. {
  3251. fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
  3252. goto SKIP_DATA;
  3253. }
  3254. locale_set = 1;
  3255. tables = PCRE_MAKETABLES;
  3256. pp = ppp;
  3257. break;
  3258. case '>':
  3259. to_file = pp;
  3260. while (*pp != 0) pp++;
  3261. while (isspace(pp[-1])) pp--;
  3262. *pp = 0;
  3263. break;
  3264. case '<':
  3265. {
  3266. int x = check_mc_option(pp, outfile, FALSE, "modifier");
  3267. if (x == 0) goto SKIP_DATA;
  3268. options |= x;
  3269. while (*pp++ != '>');
  3270. }
  3271. break;
  3272. case '\r': /* So that it works in Windows */
  3273. case '\n':
  3274. case ' ':
  3275. break;
  3276. default:
  3277. fprintf(outfile, "** Unknown modifier '%c'\n", pp[-1]);
  3278. goto SKIP_DATA;
  3279. }
  3280. }
  3281. /* Handle compiling via the POSIX interface, which doesn't support the
  3282. timing, showing, or debugging options, nor the ability to pass over
  3283. local character tables. Neither does it have 16-bit support. */
  3284. #if !defined NOPOSIX
  3285. if (posix || do_posix)
  3286. {
  3287. int rc;
  3288. int cflags = 0;
  3289. if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
  3290. if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
  3291. if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
  3292. if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
  3293. if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
  3294. if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
  3295. if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
  3296. rc = regcomp(&preg, (char *)p, cflags);
  3297. /* Compilation failed; go back for another re, skipping to blank line
  3298. if non-interactive. */
  3299. if (rc != 0)
  3300. {
  3301. (void)regerror(rc, &preg, (char *)buffer, buffer_size);
  3302. fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
  3303. goto SKIP_DATA;
  3304. }
  3305. }
  3306. /* Handle compiling via the native interface */
  3307. else
  3308. #endif /* !defined NOPOSIX */
  3309. {
  3310. /* In 16- or 32-bit mode, convert the input. */
  3311. #ifdef SUPPORT_PCRE16
  3312. if (pcre_mode == PCRE16_MODE)
  3313. {
  3314. switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
  3315. {
  3316. case -1:
  3317. fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
  3318. "converted to UTF-16\n");
  3319. goto SKIP_DATA;
  3320. case -2:
  3321. fprintf(outfile, "**Failed: character value greater than 0x10ffff "
  3322. "cannot be converted to UTF-16\n");
  3323. goto SKIP_DATA;
  3324. case -3: /* "Impossible error" when to16 is called arg1 FALSE */
  3325. fprintf(outfile, "**Failed: character value greater than 0xffff "
  3326. "cannot be converted to 16-bit in non-UTF mode\n");
  3327. goto SKIP_DATA;
  3328. default:
  3329. break;
  3330. }
  3331. p = (pcre_uint8 *)buffer16;
  3332. }
  3333. #endif
  3334. #ifdef SUPPORT_PCRE32
  3335. if (pcre_mode == PCRE32_MODE)
  3336. {
  3337. switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
  3338. {
  3339. case -1:
  3340. fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
  3341. "converted to UTF-32\n");
  3342. goto SKIP_DATA;
  3343. case -2:
  3344. fprintf(outfile, "**Failed: character value greater than 0x10ffff "
  3345. "cannot be converted to UTF-32\n");
  3346. goto SKIP_DATA;
  3347. case -3:
  3348. fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
  3349. goto SKIP_DATA;
  3350. default:
  3351. break;
  3352. }
  3353. p = (pcre_uint8 *)buffer32;
  3354. }
  3355. #endif
  3356. /* Compile many times when timing */
  3357. if (timeit > 0)
  3358. {
  3359. register int i;
  3360. clock_t time_taken;
  3361. clock_t start_time = clock();
  3362. for (i = 0; i < timeit; i++)
  3363. {
  3364. PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
  3365. if (re != NULL) free(re);
  3366. }
  3367. total_compile_time += (time_taken = clock() - start_time);
  3368. fprintf(outfile, "Compile time %.4f milliseconds\n",
  3369. (((double)time_taken * 1000.0) / (double)timeit) /
  3370. (double)CLOCKS_PER_SEC);
  3371. }
  3372. PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
  3373. /* Compilation failed; go back for another re, skipping to blank line
  3374. if non-interactive. */
  3375. if (re == NULL)
  3376. {
  3377. fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
  3378. SKIP_DATA:
  3379. if (infile != stdin)
  3380. {
  3381. for (;;)
  3382. {
  3383. if (extend_inputline(infile, buffer, NULL) == NULL)
  3384. {
  3385. done = 1;
  3386. goto CONTINUE;
  3387. }
  3388. len = (int)strlen((char *)buffer);
  3389. while (len > 0 && isspace(buffer[len-1])) len--;
  3390. if (len == 0) break;
  3391. }
  3392. fprintf(outfile, "\n");
  3393. }
  3394. goto CONTINUE;
  3395. }
  3396. /* Compilation succeeded. It is now possible to set the UTF-8 option from
  3397. within the regex; check for this so that we know how to process the data
  3398. lines. */
  3399. if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
  3400. goto SKIP_DATA;
  3401. if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
  3402. /* Extract the size for possible writing before possibly flipping it,
  3403. and remember the store that was got. */
  3404. true_size = REAL_PCRE_SIZE(re);
  3405. /* Output code size information if requested */
  3406. if (log_store)
  3407. {
  3408. int name_count, name_entry_size, real_pcre_size;
  3409. new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
  3410. new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
  3411. real_pcre_size = 0;
  3412. #ifdef SUPPORT_PCRE8
  3413. if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
  3414. real_pcre_size = sizeof(real_pcre);
  3415. #endif
  3416. #ifdef SUPPORT_PCRE16
  3417. if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
  3418. real_pcre_size = sizeof(real_pcre16);
  3419. #endif
  3420. #ifdef SUPPORT_PCRE32
  3421. if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
  3422. real_pcre_size = sizeof(real_pcre32);
  3423. #endif
  3424. new_info(re, NULL, PCRE_INFO_SIZE, &size);
  3425. fprintf(outfile, "Memory allocation (code space): %d\n",
  3426. (int)(size - real_pcre_size - name_count * name_entry_size));
  3427. }
  3428. /* If -s or /S was present, study the regex to generate additional info to
  3429. help with the matching, unless the pattern has the SS option, which
  3430. suppresses the effect of /S (used for a few test patterns where studying is
  3431. never sensible). */
  3432. if (do_study || (force_study >= 0 && !no_force_study))
  3433. {
  3434. if (timeit > 0)
  3435. {
  3436. register int i;
  3437. clock_t time_taken;
  3438. clock_t start_time = clock();
  3439. for (i = 0; i < timeit; i++)
  3440. {
  3441. PCRE_STUDY(extra, re, study_options, &error);
  3442. }
  3443. total_study_time = (time_taken = clock() - start_time);
  3444. if (extra != NULL)
  3445. {
  3446. PCRE_FREE_STUDY(extra);
  3447. }
  3448. fprintf(outfile, " Study time %.4f milliseconds\n",
  3449. (((double)time_taken * 1000.0) / (double)timeit) /
  3450. (double)CLOCKS_PER_SEC);
  3451. }
  3452. PCRE_STUDY(extra, re, study_options, &error);
  3453. if (error != NULL)
  3454. fprintf(outfile, "Failed to study: %s\n", error);
  3455. else if (extra != NULL)
  3456. {
  3457. true_study_size = ((pcre_study_data *)(extra->study_data))->size;
  3458. if (log_store)
  3459. {
  3460. size_t jitsize;
  3461. if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
  3462. jitsize != 0)
  3463. fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
  3464. }
  3465. }
  3466. }
  3467. /* If /K was present, we set up for handling MARK data. */
  3468. if (do_mark)
  3469. {
  3470. if (extra == NULL)
  3471. {
  3472. extra = (pcre_extra *)malloc(sizeof(pcre_extra));
  3473. extra->flags = 0;
  3474. }
  3475. extra->mark = &markptr;
  3476. extra->flags |= PCRE_EXTRA_MARK;
  3477. }
  3478. /* Extract and display information from the compiled data if required. */
  3479. SHOW_INFO:
  3480. if (do_debug)
  3481. {
  3482. fprintf(outfile, "------------------------------------------------------------------\n");
  3483. PCRE_PRINTINT(re, outfile, debug_lengths);
  3484. }
  3485. /* We already have the options in get_options (see above) */
  3486. if (do_showinfo)
  3487. {
  3488. unsigned long int all_options;
  3489. pcre_uint32 first_char, need_char;
  3490. pcre_uint32 match_limit, recursion_limit;
  3491. int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
  3492. hascrorlf, maxlookbehind, match_empty;
  3493. int nameentrysize, namecount;
  3494. const pcre_uint8 *nametable;
  3495. if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
  3496. new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
  3497. new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
  3498. new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
  3499. new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
  3500. new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
  3501. new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
  3502. new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
  3503. new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
  3504. new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
  3505. new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
  3506. new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
  3507. new_info(re, NULL, PCRE_INFO_MATCH_EMPTY, &match_empty) +
  3508. new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
  3509. != 0)
  3510. goto SKIP_DATA;
  3511. fprintf(outfile, "Capturing subpattern count = %d\n", count);
  3512. if (backrefmax > 0)
  3513. fprintf(outfile, "Max back reference = %d\n", backrefmax);
  3514. if (maxlookbehind > 0)
  3515. fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
  3516. if (new_info(re, NULL, PCRE_INFO_MATCHLIMIT, &match_limit) == 0)
  3517. fprintf(outfile, "Match limit = %u\n", match_limit);
  3518. if (new_info(re, NULL, PCRE_INFO_RECURSIONLIMIT, &recursion_limit) == 0)
  3519. fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
  3520. if (namecount > 0)
  3521. {
  3522. fprintf(outfile, "Named capturing subpatterns:\n");
  3523. while (namecount-- > 0)
  3524. {
  3525. int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
  3526. int length = (int)STRLEN(nametable + imm2_size);
  3527. fprintf(outfile, " ");
  3528. PCHARSV(nametable, imm2_size, length, outfile);
  3529. while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
  3530. #ifdef SUPPORT_PCRE32
  3531. if (pcre_mode == PCRE32_MODE)
  3532. fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
  3533. #endif
  3534. #ifdef SUPPORT_PCRE16
  3535. if (pcre_mode == PCRE16_MODE)
  3536. fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
  3537. #endif
  3538. #ifdef SUPPORT_PCRE8
  3539. if (pcre_mode == PCRE8_MODE)
  3540. fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
  3541. #endif
  3542. nametable += nameentrysize * CHAR_SIZE;
  3543. }
  3544. }
  3545. if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
  3546. if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
  3547. if (match_empty) fprintf(outfile, "May match empty string\n");
  3548. all_options = REAL_PCRE_OPTIONS(re);
  3549. if (do_flip) all_options = swap_uint32(all_options);
  3550. if (get_options == 0) fprintf(outfile, "No options\n");
  3551. else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
  3552. ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
  3553. ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
  3554. ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
  3555. ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
  3556. ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
  3557. ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
  3558. ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
  3559. ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
  3560. ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
  3561. ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
  3562. ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
  3563. ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
  3564. ((get_options & PCRE_NO_AUTO_POSSESS) != 0)? " no_auto_possessify" : "",
  3565. ((get_options & PCRE_UTF8) != 0)? " utf" : "",
  3566. ((get_options & PCRE_UCP) != 0)? " ucp" : "",
  3567. ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
  3568. ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
  3569. ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "",
  3570. ((get_options & PCRE_NEVER_UTF) != 0)? " never_utf" : "");
  3571. if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
  3572. switch (get_options & PCRE_NEWLINE_BITS)
  3573. {
  3574. case PCRE_NEWLINE_CR:
  3575. fprintf(outfile, "Forced newline sequence: CR\n");
  3576. break;
  3577. case PCRE_NEWLINE_LF:
  3578. fprintf(outfile, "Forced newline sequence: LF\n");
  3579. break;
  3580. case PCRE_NEWLINE_CRLF:
  3581. fprintf(outfile, "Forced newline sequence: CRLF\n");
  3582. break;
  3583. case PCRE_NEWLINE_ANYCRLF:
  3584. fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
  3585. break;
  3586. case PCRE_NEWLINE_ANY:
  3587. fprintf(outfile, "Forced newline sequence: ANY\n");
  3588. break;
  3589. default:
  3590. break;
  3591. }
  3592. if (first_char_set == 2)
  3593. {
  3594. fprintf(outfile, "First char at start or follows newline\n");
  3595. }
  3596. else if (first_char_set == 1)
  3597. {
  3598. const char *caseless =
  3599. ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
  3600. "" : " (caseless)";
  3601. if (PRINTOK(first_char))
  3602. fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
  3603. else
  3604. {
  3605. fprintf(outfile, "First char = ");
  3606. pchar(first_char, outfile);
  3607. fprintf(outfile, "%s\n", caseless);
  3608. }
  3609. }
  3610. else
  3611. {
  3612. fprintf(outfile, "No first char\n");
  3613. }
  3614. if (need_char_set == 0)
  3615. {
  3616. fprintf(outfile, "No need char\n");
  3617. }
  3618. else
  3619. {
  3620. const char *caseless =
  3621. ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
  3622. "" : " (caseless)";
  3623. if (PRINTOK(need_char))
  3624. fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
  3625. else
  3626. {
  3627. fprintf(outfile, "Need char = ");
  3628. pchar(need_char, outfile);
  3629. fprintf(outfile, "%s\n", caseless);
  3630. }
  3631. }
  3632. /* Don't output study size; at present it is in any case a fixed
  3633. value, but it varies, depending on the computer architecture, and
  3634. so messes up the test suite. (And with the /F option, it might be
  3635. flipped.) If study was forced by an external -s, don't show this
  3636. information unless -i or -d was also present. This means that, except
  3637. when auto-callouts are involved, the output from runs with and without
  3638. -s should be identical. */
  3639. if (do_study || (force_study >= 0 && showinfo && !no_force_study))
  3640. {
  3641. if (extra == NULL)
  3642. fprintf(outfile, "Study returned NULL\n");
  3643. else
  3644. {
  3645. pcre_uint8 *start_bits = NULL;
  3646. int minlength;
  3647. if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
  3648. fprintf(outfile, "Subject length lower bound = %d\n", minlength);
  3649. if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
  3650. {
  3651. if (start_bits == NULL)
  3652. fprintf(outfile, "No starting char list\n");
  3653. else
  3654. {
  3655. int i;
  3656. int c = 24;
  3657. fprintf(outfile, "Starting chars: ");
  3658. for (i = 0; i < 256; i++)
  3659. {
  3660. if ((start_bits[i/8] & (1<<(i&7))) != 0)
  3661. {
  3662. if (c > 75)
  3663. {
  3664. fprintf(outfile, "\n ");
  3665. c = 2;
  3666. }
  3667. if (PRINTOK(i) && i != ' ')
  3668. {
  3669. fprintf(outfile, "%c ", i);
  3670. c += 2;
  3671. }
  3672. else
  3673. {
  3674. fprintf(outfile, "\\x%02x ", i);
  3675. c += 5;
  3676. }
  3677. }
  3678. }
  3679. fprintf(outfile, "\n");
  3680. }
  3681. }
  3682. }
  3683. /* Show this only if the JIT was set by /S, not by -s. */
  3684. if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
  3685. (force_study_options & PCRE_STUDY_ALLJIT) == 0)
  3686. {
  3687. int jit;
  3688. if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
  3689. {
  3690. if (jit)
  3691. fprintf(outfile, "JIT study was successful\n");
  3692. else
  3693. #ifdef SUPPORT_JIT
  3694. fprintf(outfile, "JIT study was not successful\n");
  3695. #else
  3696. fprintf(outfile, "JIT support is not available in this version of PCRE\n");
  3697. #endif
  3698. }
  3699. }
  3700. }
  3701. }
  3702. /* If the '>' option was present, we write out the regex to a file, and
  3703. that is all. The first 8 bytes of the file are the regex length and then
  3704. the study length, in big-endian order. */
  3705. if (to_file != NULL)
  3706. {
  3707. FILE *f = fopen((char *)to_file, "wb");
  3708. if (f == NULL)
  3709. {
  3710. fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
  3711. }
  3712. else
  3713. {
  3714. pcre_uint8 sbuf[8];
  3715. if (do_flip) regexflip(re, extra);
  3716. sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
  3717. sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
  3718. sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
  3719. sbuf[3] = (pcre_uint8)((true_size) & 255);
  3720. sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
  3721. sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
  3722. sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
  3723. sbuf[7] = (pcre_uint8)((true_study_size) & 255);
  3724. if (fwrite(sbuf, 1, 8, f) < 8 ||
  3725. fwrite(re, 1, true_size, f) < true_size)
  3726. {
  3727. fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
  3728. }
  3729. else
  3730. {
  3731. fprintf(outfile, "Compiled pattern written to %s\n", to_file);
  3732. /* If there is study data, write it. */
  3733. if (extra != NULL)
  3734. {
  3735. if (fwrite(extra->study_data, 1, true_study_size, f) <
  3736. true_study_size)
  3737. {
  3738. fprintf(outfile, "Write error on %s: %s\n", to_file,
  3739. strerror(errno));
  3740. }
  3741. else fprintf(outfile, "Study data written to %s\n", to_file);
  3742. }
  3743. }
  3744. fclose(f);
  3745. }
  3746. new_free(re);
  3747. if (extra != NULL)
  3748. {
  3749. PCRE_FREE_STUDY(extra);
  3750. }
  3751. if (locale_set)
  3752. {
  3753. new_free((void *)tables);
  3754. setlocale(LC_CTYPE, "C");
  3755. locale_set = 0;
  3756. }
  3757. continue; /* With next regex */
  3758. }
  3759. } /* End of non-POSIX compile */
  3760. /* Read data lines and test them */
  3761. for (;;)
  3762. {
  3763. #ifdef SUPPORT_PCRE8
  3764. pcre_uint8 *q8;
  3765. #endif
  3766. #ifdef SUPPORT_PCRE16
  3767. pcre_uint16 *q16;
  3768. #endif
  3769. #ifdef SUPPORT_PCRE32
  3770. pcre_uint32 *q32;
  3771. #endif
  3772. pcre_uint8 *bptr;
  3773. int *use_offsets = offsets;
  3774. int use_size_offsets = size_offsets;
  3775. int callout_data = 0;
  3776. int callout_data_set = 0;
  3777. int count;
  3778. pcre_uint32 c;
  3779. int copystrings = 0;
  3780. int find_match_limit = default_find_match_limit;
  3781. int getstrings = 0;
  3782. int getlist = 0;
  3783. int gmatched = 0;
  3784. int start_offset = 0;
  3785. int start_offset_sign = 1;
  3786. int g_notempty = 0;
  3787. int use_dfa = 0;
  3788. *copynames = 0;
  3789. *getnames = 0;
  3790. #ifdef SUPPORT_PCRE32
  3791. cn32ptr = copynames;
  3792. gn32ptr = getnames;
  3793. #endif
  3794. #ifdef SUPPORT_PCRE16
  3795. cn16ptr = copynames16;
  3796. gn16ptr = getnames16;
  3797. #endif
  3798. #ifdef SUPPORT_PCRE8
  3799. cn8ptr = copynames8;
  3800. gn8ptr = getnames8;
  3801. #endif
  3802. SET_PCRE_CALLOUT(callout);
  3803. first_callout = 1;
  3804. last_callout_mark = NULL;
  3805. callout_extra = 0;
  3806. callout_count = 0;
  3807. callout_fail_count = 999999;
  3808. callout_fail_id = -1;
  3809. show_malloc = 0;
  3810. options = 0;
  3811. if (extra != NULL) extra->flags &=
  3812. ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
  3813. len = 0;
  3814. for (;;)
  3815. {
  3816. if (extend_inputline(infile, buffer + len, "data> ") == NULL)
  3817. {
  3818. if (len > 0) /* Reached EOF without hitting a newline */
  3819. {
  3820. fprintf(outfile, "\n");
  3821. break;
  3822. }
  3823. done = 1;
  3824. goto CONTINUE;
  3825. }
  3826. if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
  3827. len = (int)strlen((char *)buffer);
  3828. if (buffer[len-1] == '\n') break;
  3829. }
  3830. while (len > 0 && isspace(buffer[len-1])) len--;
  3831. buffer[len] = 0;
  3832. if (len == 0) break;
  3833. p = buffer;
  3834. while (isspace(*p)) p++;
  3835. #ifndef NOUTF
  3836. /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
  3837. invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
  3838. if (use_utf)
  3839. {
  3840. pcre_uint8 *q;
  3841. pcre_uint32 cc;
  3842. int n = 1;
  3843. for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
  3844. if (n <= 0)
  3845. {
  3846. fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
  3847. goto NEXT_DATA;
  3848. }
  3849. }
  3850. #endif
  3851. #ifdef SUPPORT_VALGRIND
  3852. /* Mark the dbuffer as addressable but undefined again. */
  3853. if (dbuffer != NULL)
  3854. {
  3855. VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE);
  3856. }
  3857. #endif
  3858. /* Allocate a buffer to hold the data line; len+1 is an upper bound on
  3859. the number of pcre_uchar units that will be needed. */
  3860. while (dbuffer == NULL || (size_t)len >= dbuffer_size)
  3861. {
  3862. dbuffer_size *= 2;
  3863. dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
  3864. if (dbuffer == NULL)
  3865. {
  3866. fprintf(stderr, "pcretest: realloc(%d) failed\n", (int)dbuffer_size);
  3867. exit(1);
  3868. }
  3869. }
  3870. #ifdef SUPPORT_PCRE8
  3871. q8 = (pcre_uint8 *) dbuffer;
  3872. #endif
  3873. #ifdef SUPPORT_PCRE16
  3874. q16 = (pcre_uint16 *) dbuffer;
  3875. #endif
  3876. #ifdef SUPPORT_PCRE32
  3877. q32 = (pcre_uint32 *) dbuffer;
  3878. #endif
  3879. while ((c = *p++) != 0)
  3880. {
  3881. int i = 0;
  3882. int n = 0;
  3883. /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
  3884. In non-UTF mode, allow the value of the byte to fall through to later,
  3885. where values greater than 127 are turned into UTF-8 when running in
  3886. 16-bit or 32-bit mode. */
  3887. if (c != '\\')
  3888. {
  3889. #ifndef NOUTF
  3890. if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
  3891. #endif
  3892. }
  3893. /* Handle backslash escapes */
  3894. else switch ((c = *p++))
  3895. {
  3896. case 'a': c = CHAR_BEL; break;
  3897. case 'b': c = '\b'; break;
  3898. case 'e': c = CHAR_ESC; break;
  3899. case 'f': c = '\f'; break;
  3900. case 'n': c = '\n'; break;
  3901. case 'r': c = '\r'; break;
  3902. case 't': c = '\t'; break;
  3903. case 'v': c = '\v'; break;
  3904. case '0': case '1': case '2': case '3':
  3905. case '4': case '5': case '6': case '7':
  3906. c -= '0';
  3907. while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
  3908. c = c * 8 + *p++ - '0';
  3909. break;
  3910. case 'o':
  3911. if (*p == '{')
  3912. {
  3913. pcre_uint8 *pt = p;
  3914. c = 0;
  3915. for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
  3916. {
  3917. if (++i == 12)
  3918. fprintf(outfile, "** Too many octal digits in \\o{...} item; "
  3919. "using only the first twelve.\n");
  3920. else c = c * 8 + *pt - '0';
  3921. }
  3922. if (*pt == '}') p = pt + 1;
  3923. else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
  3924. }
  3925. break;
  3926. case 'x':
  3927. if (*p == '{')
  3928. {
  3929. pcre_uint8 *pt = p;
  3930. c = 0;
  3931. /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
  3932. when isxdigit() is a macro that refers to its argument more than
  3933. once. This is banned by the C Standard, but apparently happens in at
  3934. least one MacOS environment. */
  3935. for (pt++; isxdigit(*pt); pt++)
  3936. {
  3937. if (++i == 9)
  3938. fprintf(outfile, "** Too many hex digits in \\x{...} item; "
  3939. "using only the first eight.\n");
  3940. else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
  3941. }
  3942. if (*pt == '}')
  3943. {
  3944. p = pt + 1;
  3945. break;
  3946. }
  3947. /* Not correct form for \x{...}; fall through */
  3948. }
  3949. /* \x without {} always defines just one byte in 8-bit mode. This
  3950. allows UTF-8 characters to be constructed byte by byte, and also allows
  3951. invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
  3952. Otherwise, pass it down to later code so that it can be turned into
  3953. UTF-8 when running in 16/32-bit mode. */
  3954. c = 0;
  3955. while (i++ < 2 && isxdigit(*p))
  3956. {
  3957. c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
  3958. p++;
  3959. }
  3960. #if !defined NOUTF && defined SUPPORT_PCRE8
  3961. if (use_utf && (pcre_mode == PCRE8_MODE))
  3962. {
  3963. *q8++ = c;
  3964. continue;
  3965. }
  3966. #endif
  3967. break;
  3968. case 0: /* \ followed by EOF allows for an empty line */
  3969. p--;
  3970. continue;
  3971. case '>':
  3972. if (*p == '-')
  3973. {
  3974. start_offset_sign = -1;
  3975. p++;
  3976. }
  3977. while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
  3978. start_offset *= start_offset_sign;
  3979. continue;
  3980. case 'A': /* Option setting */
  3981. options |= PCRE_ANCHORED;
  3982. continue;
  3983. case 'B':
  3984. options |= PCRE_NOTBOL;
  3985. continue;
  3986. case 'C':
  3987. if (isdigit(*p)) /* Set copy string */
  3988. {
  3989. while(isdigit(*p)) n = n * 10 + *p++ - '0';
  3990. copystrings |= 1 << n;
  3991. }
  3992. else if (isalnum(*p))
  3993. {
  3994. READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
  3995. }
  3996. else if (*p == '+')
  3997. {
  3998. callout_extra = 1;
  3999. p++;
  4000. }
  4001. else if (*p == '-')
  4002. {
  4003. SET_PCRE_CALLOUT(NULL);
  4004. p++;
  4005. }
  4006. else if (*p == '!')
  4007. {
  4008. callout_fail_id = 0;
  4009. p++;
  4010. while(isdigit(*p))
  4011. callout_fail_id = callout_fail_id * 10 + *p++ - '0';
  4012. callout_fail_count = 0;
  4013. if (*p == '!')
  4014. {
  4015. p++;
  4016. while(isdigit(*p))
  4017. callout_fail_count = callout_fail_count * 10 + *p++ - '0';
  4018. }
  4019. }
  4020. else if (*p == '*')
  4021. {
  4022. int sign = 1;
  4023. callout_data = 0;
  4024. if (*(++p) == '-') { sign = -1; p++; }
  4025. while(isdigit(*p))
  4026. callout_data = callout_data * 10 + *p++ - '0';
  4027. callout_data *= sign;
  4028. callout_data_set = 1;
  4029. }
  4030. continue;
  4031. #if !defined NODFA
  4032. case 'D':
  4033. #if !defined NOPOSIX
  4034. if (posix || do_posix)
  4035. printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
  4036. else
  4037. #endif
  4038. use_dfa = 1;
  4039. continue;
  4040. #endif
  4041. #if !defined NODFA
  4042. case 'F':
  4043. options |= PCRE_DFA_SHORTEST;
  4044. continue;
  4045. #endif
  4046. case 'G':
  4047. if (isdigit(*p))
  4048. {
  4049. while(isdigit(*p)) n = n * 10 + *p++ - '0';
  4050. getstrings |= 1 << n;
  4051. }
  4052. else if (isalnum(*p))
  4053. {
  4054. READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
  4055. }
  4056. continue;
  4057. case 'J':
  4058. while(isdigit(*p)) n = n * 10 + *p++ - '0';
  4059. if (extra != NULL
  4060. && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
  4061. && extra->executable_jit != NULL)
  4062. {
  4063. if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
  4064. jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
  4065. PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
  4066. }
  4067. continue;
  4068. case 'L':
  4069. getlist = 1;
  4070. continue;
  4071. case 'M':
  4072. find_match_limit = 1;
  4073. continue;
  4074. case 'N':
  4075. if ((options & PCRE_NOTEMPTY) != 0)
  4076. options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
  4077. else
  4078. options |= PCRE_NOTEMPTY;
  4079. continue;
  4080. case 'O':
  4081. while(isdigit(*p))
  4082. {
  4083. if (n > (INT_MAX-10)/10) /* Hack to stop fuzzers */
  4084. {
  4085. printf("** \\O argument is too big\n");
  4086. yield = 1;
  4087. goto EXIT;
  4088. }
  4089. n = n * 10 + *p++ - '0';
  4090. }
  4091. if (n > size_offsets_max)
  4092. {
  4093. size_offsets_max = n;
  4094. free(offsets);
  4095. use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
  4096. if (offsets == NULL)
  4097. {
  4098. printf("** Failed to get %d bytes of memory for offsets vector\n",
  4099. (int)(size_offsets_max * sizeof(int)));
  4100. yield = 1;
  4101. goto EXIT;
  4102. }
  4103. }
  4104. use_size_offsets = n;
  4105. if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
  4106. else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
  4107. continue;
  4108. case 'P':
  4109. options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
  4110. PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
  4111. continue;
  4112. case 'Q':
  4113. while(isdigit(*p)) n = n * 10 + *p++ - '0';
  4114. if (extra == NULL)
  4115. {
  4116. extra = (pcre_extra *)malloc(sizeof(pcre_extra));
  4117. extra->flags = 0;
  4118. }
  4119. extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
  4120. extra->match_limit_recursion = n;
  4121. continue;
  4122. case 'q':
  4123. while(isdigit(*p)) n = n * 10 + *p++ - '0';
  4124. if (extra == NULL)
  4125. {
  4126. extra = (pcre_extra *)malloc(sizeof(pcre_extra));
  4127. extra->flags = 0;
  4128. }
  4129. extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
  4130. extra->match_limit = n;
  4131. continue;
  4132. #if !defined NODFA
  4133. case 'R':
  4134. options |= PCRE_DFA_RESTART;
  4135. continue;
  4136. #endif
  4137. case 'S':
  4138. show_malloc = 1;
  4139. continue;
  4140. case 'Y':
  4141. options |= PCRE_NO_START_OPTIMIZE;
  4142. continue;
  4143. case 'Z':
  4144. options |= PCRE_NOTEOL;
  4145. continue;
  4146. case '?':
  4147. options |= PCRE_NO_UTF8_CHECK;
  4148. continue;
  4149. case '<':
  4150. {
  4151. int x = check_mc_option(p, outfile, TRUE, "escape sequence");
  4152. if (x == 0) goto NEXT_DATA;
  4153. options |= x;
  4154. while (*p++ != '>');
  4155. }
  4156. continue;
  4157. }
  4158. /* We now have a character value in c that may be greater than 255.
  4159. In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
  4160. than 127 in UTF mode must have come from \x{...} or octal constructs
  4161. because values from \x.. get this far only in non-UTF mode. */
  4162. #ifdef SUPPORT_PCRE8
  4163. if (pcre_mode == PCRE8_MODE)
  4164. {
  4165. #ifndef NOUTF
  4166. if (use_utf)
  4167. {
  4168. if (c > 0x7fffffff)
  4169. {
  4170. fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
  4171. "and so cannot be converted to UTF-8\n", c);
  4172. goto NEXT_DATA;
  4173. }
  4174. q8 += ord2utf8(c, q8);
  4175. }
  4176. else
  4177. #endif
  4178. {
  4179. if (c > 0xffu)
  4180. {
  4181. fprintf(outfile, "** Character \\x{%x} is greater than 255 "
  4182. "and UTF-8 mode is not enabled.\n", c);
  4183. fprintf(outfile, "** Truncation will probably give the wrong "
  4184. "result.\n");
  4185. }
  4186. *q8++ = c;
  4187. }
  4188. }
  4189. #endif
  4190. #ifdef SUPPORT_PCRE16
  4191. if (pcre_mode == PCRE16_MODE)
  4192. {
  4193. #ifndef NOUTF
  4194. if (use_utf)
  4195. {
  4196. if (c > 0x10ffffu)
  4197. {
  4198. fprintf(outfile, "** Failed: character \\x{%x} is greater than "
  4199. "0x10ffff and so cannot be converted to UTF-16\n", c);
  4200. goto NEXT_DATA;
  4201. }
  4202. else if (c >= 0x10000u)
  4203. {
  4204. c-= 0x10000u;
  4205. *q16++ = 0xD800 | (c >> 10);
  4206. *q16++ = 0xDC00 | (c & 0x3ff);
  4207. }
  4208. else
  4209. *q16++ = c;
  4210. }
  4211. else
  4212. #endif
  4213. {
  4214. if (c > 0xffffu)
  4215. {
  4216. fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
  4217. "and UTF-16 mode is not enabled.\n", c);
  4218. fprintf(outfile, "** Truncation will probably give the wrong "
  4219. "result.\n");
  4220. }
  4221. *q16++ = c;
  4222. }
  4223. }
  4224. #endif
  4225. #ifdef SUPPORT_PCRE32
  4226. if (pcre_mode == PCRE32_MODE)
  4227. {
  4228. *q32++ = c;
  4229. }
  4230. #endif
  4231. }
  4232. /* Reached end of subject string */
  4233. #ifdef SUPPORT_PCRE8
  4234. if (pcre_mode == PCRE8_MODE)
  4235. {
  4236. *q8 = 0;
  4237. len = (int)(q8 - (pcre_uint8 *)dbuffer);
  4238. }
  4239. #endif
  4240. #ifdef SUPPORT_PCRE16
  4241. if (pcre_mode == PCRE16_MODE)
  4242. {
  4243. *q16 = 0;
  4244. len = (int)(q16 - (pcre_uint16 *)dbuffer);
  4245. }
  4246. #endif
  4247. #ifdef SUPPORT_PCRE32
  4248. if (pcre_mode == PCRE32_MODE)
  4249. {
  4250. *q32 = 0;
  4251. len = (int)(q32 - (pcre_uint32 *)dbuffer);
  4252. }
  4253. #endif
  4254. /* If we're compiling with explicit valgrind support, Mark the data from after
  4255. its end to the end of the buffer as unaddressable, so that a read over the end
  4256. of the buffer will be seen by valgrind, even if it doesn't cause a crash.
  4257. If we're not building with valgrind support, at least move the data to the end
  4258. of the buffer so that it might at least cause a crash.
  4259. If we are using the POSIX interface, we must include the terminating zero. */
  4260. bptr = dbuffer;
  4261. #if !defined NOPOSIX
  4262. if (posix || do_posix)
  4263. {
  4264. #ifdef SUPPORT_VALGRIND
  4265. VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1));
  4266. #else
  4267. memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
  4268. bptr += dbuffer_size - len - 1;
  4269. #endif
  4270. }
  4271. else
  4272. #endif
  4273. {
  4274. #ifdef SUPPORT_VALGRIND
  4275. VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE);
  4276. #else
  4277. bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
  4278. #endif
  4279. }
  4280. if ((all_use_dfa || use_dfa) && find_match_limit)
  4281. {
  4282. printf("** Match limit not relevant for DFA matching: ignored\n");
  4283. find_match_limit = 0;
  4284. }
  4285. /* Handle matching via the POSIX interface, which does not
  4286. support timing or playing with the match limit or callout data. */
  4287. #if !defined NOPOSIX
  4288. if (posix || do_posix)
  4289. {
  4290. int rc;
  4291. int eflags = 0;
  4292. regmatch_t *pmatch = NULL;
  4293. if (use_size_offsets > 0)
  4294. pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
  4295. if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
  4296. if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
  4297. if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
  4298. rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
  4299. if (rc != 0)
  4300. {
  4301. (void)regerror(rc, &preg, (char *)buffer, buffer_size);
  4302. fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
  4303. }
  4304. else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
  4305. {
  4306. fprintf(outfile, "Matched with REG_NOSUB\n");
  4307. }
  4308. else
  4309. {
  4310. size_t i;
  4311. for (i = 0; i < (size_t)use_size_offsets; i++)
  4312. {
  4313. if (pmatch[i].rm_so >= 0)
  4314. {
  4315. fprintf(outfile, "%2d: ", (int)i);
  4316. PCHARSV(dbuffer, pmatch[i].rm_so,
  4317. pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
  4318. fprintf(outfile, "\n");
  4319. if (do_showcaprest || (i == 0 && do_showrest))
  4320. {
  4321. fprintf(outfile, "%2d+ ", (int)i);
  4322. PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
  4323. outfile);
  4324. fprintf(outfile, "\n");
  4325. }
  4326. }
  4327. }
  4328. }
  4329. free(pmatch);
  4330. goto NEXT_DATA;
  4331. }
  4332. #endif /* !defined NOPOSIX */
  4333. /* Handle matching via the native interface - repeats for /g and /G */
  4334. /* Ensure that there is a JIT callback if we want to verify that JIT was
  4335. actually used. If jit_stack == NULL, no stack has yet been assigned. */
  4336. if (verify_jit && jit_stack == NULL && extra != NULL)
  4337. { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
  4338. for (;; gmatched++) /* Loop for /g or /G */
  4339. {
  4340. markptr = NULL;
  4341. jit_was_used = FALSE;
  4342. if (timeitm > 0)
  4343. {
  4344. register int i;
  4345. clock_t time_taken;
  4346. clock_t start_time = clock();
  4347. #if !defined NODFA
  4348. if (all_use_dfa || use_dfa)
  4349. {
  4350. if ((options & PCRE_DFA_RESTART) != 0)
  4351. {
  4352. fprintf(outfile, "Timing DFA restarts is not supported\n");
  4353. break;
  4354. }
  4355. if (dfa_workspace == NULL)
  4356. dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
  4357. for (i = 0; i < timeitm; i++)
  4358. {
  4359. PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
  4360. (options | g_notempty), use_offsets, use_size_offsets,
  4361. dfa_workspace, DFA_WS_DIMENSION);
  4362. }
  4363. }
  4364. else
  4365. #endif
  4366. for (i = 0; i < timeitm; i++)
  4367. {
  4368. PCRE_EXEC(count, re, extra, bptr, len, start_offset,
  4369. (options | g_notempty), use_offsets, use_size_offsets);
  4370. }
  4371. total_match_time += (time_taken = clock() - start_time);
  4372. fprintf(outfile, "Execute time %.4f milliseconds\n",
  4373. (((double)time_taken * 1000.0) / (double)timeitm) /
  4374. (double)CLOCKS_PER_SEC);
  4375. }
  4376. /* If find_match_limit is set, we want to do repeated matches with
  4377. varying limits in order to find the minimum value for the match limit and
  4378. for the recursion limit. The match limits are relevant only to the normal
  4379. running of pcre_exec(), so disable the JIT optimization. This makes it
  4380. possible to run the same set of tests with and without JIT externally
  4381. requested. */
  4382. if (find_match_limit)
  4383. {
  4384. if (extra != NULL) { PCRE_FREE_STUDY(extra); }
  4385. extra = (pcre_extra *)malloc(sizeof(pcre_extra));
  4386. extra->flags = 0;
  4387. (void)check_match_limit(re, extra, bptr, len, start_offset,
  4388. options|g_notempty, use_offsets, use_size_offsets,
  4389. PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
  4390. PCRE_ERROR_MATCHLIMIT, "match()");
  4391. count = check_match_limit(re, extra, bptr, len, start_offset,
  4392. options|g_notempty, use_offsets, use_size_offsets,
  4393. PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
  4394. PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
  4395. }
  4396. /* If callout_data is set, use the interface with additional data */
  4397. else if (callout_data_set)
  4398. {
  4399. if (extra == NULL)
  4400. {
  4401. extra = (pcre_extra *)malloc(sizeof(pcre_extra));
  4402. extra->flags = 0;
  4403. }
  4404. extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
  4405. extra->callout_data = &callout_data;
  4406. PCRE_EXEC(count, re, extra, bptr, len, start_offset,
  4407. options | g_notempty, use_offsets, use_size_offsets);
  4408. extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
  4409. }
  4410. /* The normal case is just to do the match once, with the default
  4411. value of match_limit. */
  4412. #if !defined NODFA
  4413. else if (all_use_dfa || use_dfa)
  4414. {
  4415. if (dfa_workspace == NULL)
  4416. dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
  4417. if (dfa_matched++ == 0)
  4418. dfa_workspace[0] = -1; /* To catch bad restart */
  4419. PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
  4420. (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
  4421. DFA_WS_DIMENSION);
  4422. if (count == 0)
  4423. {
  4424. fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
  4425. count = use_size_offsets/2;
  4426. }
  4427. }
  4428. #endif
  4429. else
  4430. {
  4431. PCRE_EXEC(count, re, extra, bptr, len, start_offset,
  4432. options | g_notempty, use_offsets, use_size_offsets);
  4433. if (count == 0)
  4434. {
  4435. fprintf(outfile, "Matched, but too many substrings\n");
  4436. /* 2 is a special case; match can be returned */
  4437. count = (use_size_offsets == 2)? 1 : use_size_offsets/3;
  4438. }
  4439. }
  4440. /* Matched */
  4441. if (count >= 0)
  4442. {
  4443. int i, maxcount;
  4444. void *cnptr, *gnptr;
  4445. #if !defined NODFA
  4446. if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
  4447. #endif
  4448. /* 2 is a special case; match can be returned */
  4449. maxcount = (use_size_offsets == 2)? 1 : use_size_offsets/3;
  4450. /* This is a check against a lunatic return value. */
  4451. if (count > maxcount)
  4452. {
  4453. fprintf(outfile,
  4454. "** PCRE error: returned count %d is too big for offset size %d\n",
  4455. count, use_size_offsets);
  4456. count = use_size_offsets/3;
  4457. if (do_g || do_G)
  4458. {
  4459. fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
  4460. do_g = do_G = FALSE; /* Break g/G loop */
  4461. }
  4462. }
  4463. /* do_allcaps requests showing of all captures in the pattern, to check
  4464. unset ones at the end. */
  4465. if (do_allcaps)
  4466. {
  4467. if (all_use_dfa || use_dfa)
  4468. {
  4469. fprintf(outfile, "** Show all captures ignored after DFA matching\n");
  4470. }
  4471. else
  4472. {
  4473. if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
  4474. goto SKIP_DATA;
  4475. count++; /* Allow for full match */
  4476. if (count * 2 > use_size_offsets) count = use_size_offsets/2;
  4477. }
  4478. }
  4479. /* Output the captured substrings. Note that, for the matched string,
  4480. the use of \K in an assertion can make the start later than the end. */
  4481. for (i = 0; i < count * 2; i += 2)
  4482. {
  4483. if (use_offsets[i] < 0)
  4484. {
  4485. if (use_offsets[i] != -1)
  4486. fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
  4487. use_offsets[i], i);
  4488. if (use_offsets[i+1] != -1)
  4489. fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
  4490. use_offsets[i+1], i+1);
  4491. fprintf(outfile, "%2d: <unset>\n", i/2);
  4492. }
  4493. else
  4494. {
  4495. int start = use_offsets[i];
  4496. int end = use_offsets[i+1];
  4497. if (start > end)
  4498. {
  4499. start = use_offsets[i+1];
  4500. end = use_offsets[i];
  4501. fprintf(outfile, "Start of matched string is beyond its end - "
  4502. "displaying from end to start.\n");
  4503. }
  4504. fprintf(outfile, "%2d: ", i/2);
  4505. PCHARSV(bptr, start, end - start, outfile);
  4506. if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
  4507. fprintf(outfile, "\n");
  4508. /* Note: don't use the start/end variables here because we want to
  4509. show the text from what is reported as the end. */
  4510. if (do_showcaprest || (i == 0 && do_showrest))
  4511. {
  4512. fprintf(outfile, "%2d+ ", i/2);
  4513. PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
  4514. outfile);
  4515. fprintf(outfile, "\n");
  4516. }
  4517. }
  4518. }
  4519. if (markptr != NULL)
  4520. {
  4521. fprintf(outfile, "MK: ");
  4522. PCHARSV(markptr, 0, -1, outfile);
  4523. fprintf(outfile, "\n");
  4524. }
  4525. for (i = 0; i < 32; i++)
  4526. {
  4527. if ((copystrings & (1 << i)) != 0)
  4528. {
  4529. int rc;
  4530. char copybuffer[256];
  4531. PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
  4532. copybuffer, sizeof(copybuffer));
  4533. if (rc < 0)
  4534. fprintf(outfile, "copy substring %d failed %d\n", i, rc);
  4535. else
  4536. {
  4537. fprintf(outfile, "%2dC ", i);
  4538. PCHARSV(copybuffer, 0, rc, outfile);
  4539. fprintf(outfile, " (%d)\n", rc);
  4540. }
  4541. }
  4542. }
  4543. cnptr = copynames;
  4544. for (;;)
  4545. {
  4546. int rc;
  4547. char copybuffer[256];
  4548. #ifdef SUPPORT_PCRE32
  4549. if (pcre_mode == PCRE32_MODE)
  4550. {
  4551. if (*(pcre_uint32 *)cnptr == 0) break;
  4552. }
  4553. #endif
  4554. #ifdef SUPPORT_PCRE16
  4555. if (pcre_mode == PCRE16_MODE)
  4556. {
  4557. if (*(pcre_uint16 *)cnptr == 0) break;
  4558. }
  4559. #endif
  4560. #ifdef SUPPORT_PCRE8
  4561. if (pcre_mode == PCRE8_MODE)
  4562. {
  4563. if (*(pcre_uint8 *)cnptr == 0) break;
  4564. }
  4565. #endif
  4566. PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
  4567. cnptr, copybuffer, sizeof(copybuffer));
  4568. if (rc < 0)
  4569. {
  4570. fprintf(outfile, "copy substring ");
  4571. PCHARSV(cnptr, 0, -1, outfile);
  4572. fprintf(outfile, " failed %d\n", rc);
  4573. }
  4574. else
  4575. {
  4576. fprintf(outfile, " C ");
  4577. PCHARSV(copybuffer, 0, rc, outfile);
  4578. fprintf(outfile, " (%d) ", rc);
  4579. PCHARSV(cnptr, 0, -1, outfile);
  4580. putc('\n', outfile);
  4581. }
  4582. cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
  4583. }
  4584. for (i = 0; i < 32; i++)
  4585. {
  4586. if ((getstrings & (1 << i)) != 0)
  4587. {
  4588. int rc;
  4589. const char *substring;
  4590. PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
  4591. if (rc < 0)
  4592. fprintf(outfile, "get substring %d failed %d\n", i, rc);
  4593. else
  4594. {
  4595. fprintf(outfile, "%2dG ", i);
  4596. PCHARSV(substring, 0, rc, outfile);
  4597. fprintf(outfile, " (%d)\n", rc);
  4598. PCRE_FREE_SUBSTRING(substring);
  4599. }
  4600. }
  4601. }
  4602. gnptr = getnames;
  4603. for (;;)
  4604. {
  4605. int rc;
  4606. const char *substring;
  4607. #ifdef SUPPORT_PCRE32
  4608. if (pcre_mode == PCRE32_MODE)
  4609. {
  4610. if (*(pcre_uint32 *)gnptr == 0) break;
  4611. }
  4612. #endif
  4613. #ifdef SUPPORT_PCRE16
  4614. if (pcre_mode == PCRE16_MODE)
  4615. {
  4616. if (*(pcre_uint16 *)gnptr == 0) break;
  4617. }
  4618. #endif
  4619. #ifdef SUPPORT_PCRE8
  4620. if (pcre_mode == PCRE8_MODE)
  4621. {
  4622. if (*(pcre_uint8 *)gnptr == 0) break;
  4623. }
  4624. #endif
  4625. PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
  4626. gnptr, &substring);
  4627. if (rc < 0)
  4628. {
  4629. fprintf(outfile, "get substring ");
  4630. PCHARSV(gnptr, 0, -1, outfile);
  4631. fprintf(outfile, " failed %d\n", rc);
  4632. }
  4633. else
  4634. {
  4635. fprintf(outfile, " G ");
  4636. PCHARSV(substring, 0, rc, outfile);
  4637. fprintf(outfile, " (%d) ", rc);
  4638. PCHARSV(gnptr, 0, -1, outfile);
  4639. PCRE_FREE_SUBSTRING(substring);
  4640. putc('\n', outfile);
  4641. }
  4642. gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
  4643. }
  4644. if (getlist)
  4645. {
  4646. int rc;
  4647. const char **stringlist;
  4648. PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
  4649. if (rc < 0)
  4650. fprintf(outfile, "get substring list failed %d\n", rc);
  4651. else
  4652. {
  4653. for (i = 0; i < count; i++)
  4654. {
  4655. fprintf(outfile, "%2dL ", i);
  4656. PCHARSV(stringlist[i], 0, -1, outfile);
  4657. putc('\n', outfile);
  4658. }
  4659. if (stringlist[i] != NULL)
  4660. fprintf(outfile, "string list not terminated by NULL\n");
  4661. PCRE_FREE_SUBSTRING_LIST(stringlist);
  4662. }
  4663. }
  4664. }
  4665. /* There was a partial match. If the bumpalong point is not the same as
  4666. the first inspected character, show the offset explicitly. */
  4667. else if (count == PCRE_ERROR_PARTIAL)
  4668. {
  4669. fprintf(outfile, "Partial match");
  4670. if (use_size_offsets > 2 && use_offsets[0] != use_offsets[2])
  4671. fprintf(outfile, " at offset %d", use_offsets[2]);
  4672. if (markptr != NULL)
  4673. {
  4674. fprintf(outfile, ", mark=");
  4675. PCHARSV(markptr, 0, -1, outfile);
  4676. }
  4677. if (use_size_offsets > 1)
  4678. {
  4679. fprintf(outfile, ": ");
  4680. PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
  4681. outfile);
  4682. }
  4683. if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
  4684. fprintf(outfile, "\n");
  4685. break; /* Out of the /g loop */
  4686. }
  4687. /* Failed to match. If this is a /g or /G loop and we previously set
  4688. g_notempty after a null match, this is not necessarily the end. We want
  4689. to advance the start offset, and continue. We won't be at the end of the
  4690. string - that was checked before setting g_notempty.
  4691. Complication arises in the case when the newline convention is "any",
  4692. "crlf", or "anycrlf". If the previous match was at the end of a line
  4693. terminated by CRLF, an advance of one character just passes the \r,
  4694. whereas we should prefer the longer newline sequence, as does the code in
  4695. pcre_exec(). Fudge the offset value to achieve this. We check for a
  4696. newline setting in the pattern; if none was set, use PCRE_CONFIG() to
  4697. find the default.
  4698. Otherwise, in the case of UTF-8 matching, the advance must be one
  4699. character, not one byte. */
  4700. else
  4701. {
  4702. if (g_notempty != 0)
  4703. {
  4704. int onechar = 1;
  4705. unsigned int obits = REAL_PCRE_OPTIONS(re);
  4706. use_offsets[0] = start_offset;
  4707. if ((obits & PCRE_NEWLINE_BITS) == 0)
  4708. {
  4709. int d;
  4710. (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
  4711. /* Note that these values are always the ASCII ones, even in
  4712. EBCDIC environments. CR = 13, NL = 10. */
  4713. obits = (d == 13)? PCRE_NEWLINE_CR :
  4714. (d == 10)? PCRE_NEWLINE_LF :
  4715. (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
  4716. (d == -2)? PCRE_NEWLINE_ANYCRLF :
  4717. (d == -1)? PCRE_NEWLINE_ANY : 0;
  4718. }
  4719. if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
  4720. (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
  4721. (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
  4722. &&
  4723. start_offset < len - 1 && (
  4724. #ifdef SUPPORT_PCRE8
  4725. (pcre_mode == PCRE8_MODE &&
  4726. bptr[start_offset] == '\r' &&
  4727. bptr[start_offset + 1] == '\n') ||
  4728. #endif
  4729. #ifdef SUPPORT_PCRE16
  4730. (pcre_mode == PCRE16_MODE &&
  4731. ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
  4732. ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
  4733. #endif
  4734. #ifdef SUPPORT_PCRE32
  4735. (pcre_mode == PCRE32_MODE &&
  4736. ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
  4737. ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
  4738. #endif
  4739. 0))
  4740. onechar++;
  4741. else if (use_utf)
  4742. {
  4743. while (start_offset + onechar < len)
  4744. {
  4745. if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
  4746. onechar++;
  4747. }
  4748. }
  4749. use_offsets[1] = start_offset + onechar;
  4750. }
  4751. else
  4752. {
  4753. switch(count)
  4754. {
  4755. case PCRE_ERROR_NOMATCH:
  4756. if (gmatched == 0)
  4757. {
  4758. if (markptr == NULL)
  4759. {
  4760. fprintf(outfile, "No match");
  4761. }
  4762. else
  4763. {
  4764. fprintf(outfile, "No match, mark = ");
  4765. PCHARSV(markptr, 0, -1, outfile);
  4766. }
  4767. if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
  4768. putc('\n', outfile);
  4769. }
  4770. break;
  4771. case PCRE_ERROR_BADUTF8:
  4772. case PCRE_ERROR_SHORTUTF8:
  4773. fprintf(outfile, "Error %d (%s UTF-%d string)", count,
  4774. (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
  4775. 8 * CHAR_SIZE);
  4776. if (use_size_offsets >= 2)
  4777. fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
  4778. use_offsets[1]);
  4779. fprintf(outfile, "\n");
  4780. break;
  4781. case PCRE_ERROR_BADUTF8_OFFSET:
  4782. fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
  4783. 8 * CHAR_SIZE);
  4784. break;
  4785. default:
  4786. if (count < 0 &&
  4787. (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
  4788. fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
  4789. else
  4790. fprintf(outfile, "Error %d (Unexpected value)\n", count);
  4791. break;
  4792. }
  4793. break; /* Out of the /g loop */
  4794. }
  4795. }
  4796. /* If not /g or /G we are done */
  4797. if (!do_g && !do_G) break;
  4798. if (use_offsets == NULL)
  4799. {
  4800. fprintf(outfile, "Cannot do global matching without an ovector\n");
  4801. break;
  4802. }
  4803. if (use_size_offsets < 2)
  4804. {
  4805. fprintf(outfile, "Cannot do global matching with an ovector size < 2\n");
  4806. break;
  4807. }
  4808. /* If we have matched an empty string, first check to see if we are at
  4809. the end of the subject. If so, the /g loop is over. Otherwise, mimic what
  4810. Perl's /g options does. This turns out to be rather cunning. First we set
  4811. PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
  4812. same point. If this fails (picked up above) we advance to the next
  4813. character. */
  4814. g_notempty = 0;
  4815. if (use_offsets[0] == use_offsets[1])
  4816. {
  4817. if (use_offsets[0] == len) break;
  4818. g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
  4819. }
  4820. /* For /g, update the start offset, leaving the rest alone. There is a
  4821. tricky case when \K is used in a positive lookbehind assertion. This can
  4822. cause the end of the match to be less than or equal to the start offset.
  4823. In this case we restart at one past the start offset. This may return the
  4824. same match if the original start offset was bumped along during the
  4825. match, but eventually the new start offset will hit the actual start
  4826. offset. (In PCRE2 the true start offset is available, and this can be
  4827. done better. It is not worth doing more than making sure we do not loop
  4828. at this stage in the life of PCRE1.) */
  4829. if (do_g)
  4830. {
  4831. if (g_notempty == 0 && use_offsets[1] <= start_offset)
  4832. {
  4833. if (start_offset >= len) break; /* End of subject */
  4834. start_offset++;
  4835. if (use_utf)
  4836. {
  4837. while (start_offset < len)
  4838. {
  4839. if ((bptr[start_offset] & 0xc0) != 0x80) break;
  4840. start_offset++;
  4841. }
  4842. }
  4843. }
  4844. else start_offset = use_offsets[1];
  4845. }
  4846. /* For /G, update the pointer and length */
  4847. else
  4848. {
  4849. bptr += use_offsets[1] * CHAR_SIZE;
  4850. len -= use_offsets[1];
  4851. }
  4852. } /* End of loop for /g and /G */
  4853. NEXT_DATA: continue;
  4854. } /* End of loop for data lines */
  4855. CONTINUE:
  4856. #if !defined NOPOSIX
  4857. if ((posix || do_posix) && preg.re_pcre != 0) regfree(&preg);
  4858. #endif
  4859. if (re != NULL) new_free(re);
  4860. if (extra != NULL)
  4861. {
  4862. PCRE_FREE_STUDY(extra);
  4863. }
  4864. if (locale_set)
  4865. {
  4866. new_free((void *)tables);
  4867. setlocale(LC_CTYPE, "C");
  4868. locale_set = 0;
  4869. }
  4870. if (jit_stack != NULL)
  4871. {
  4872. PCRE_JIT_STACK_FREE(jit_stack);
  4873. jit_stack = NULL;
  4874. }
  4875. }
  4876. if (infile == stdin) fprintf(outfile, "\n");
  4877. if (showtotaltimes)
  4878. {
  4879. fprintf(outfile, "--------------------------------------\n");
  4880. if (timeit > 0)
  4881. {
  4882. fprintf(outfile, "Total compile time %.4f milliseconds\n",
  4883. (((double)total_compile_time * 1000.0) / (double)timeit) /
  4884. (double)CLOCKS_PER_SEC);
  4885. fprintf(outfile, "Total study time %.4f milliseconds\n",
  4886. (((double)total_study_time * 1000.0) / (double)timeit) /
  4887. (double)CLOCKS_PER_SEC);
  4888. }
  4889. fprintf(outfile, "Total execute time %.4f milliseconds\n",
  4890. (((double)total_match_time * 1000.0) / (double)timeitm) /
  4891. (double)CLOCKS_PER_SEC);
  4892. }
  4893. EXIT:
  4894. if (infile != NULL && infile != stdin) fclose(infile);
  4895. if (outfile != NULL && outfile != stdout) fclose(outfile);
  4896. free(buffer);
  4897. free(dbuffer);
  4898. free(pbuffer);
  4899. free(offsets);
  4900. #ifdef SUPPORT_PCRE16
  4901. if (buffer16 != NULL) free(buffer16);
  4902. #endif
  4903. #ifdef SUPPORT_PCRE32
  4904. if (buffer32 != NULL) free(buffer32);
  4905. #endif
  4906. #if !defined NODFA
  4907. if (dfa_workspace != NULL)
  4908. free(dfa_workspace);
  4909. #endif
  4910. #if defined(__VMS)
  4911. yield = SS$_NORMAL; /* Return values via DCL symbols */
  4912. #endif
  4913. return yield;
  4914. }
  4915. /* End of pcretest.c */