mbstring.c 122 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377
  1. /*
  2. +----------------------------------------------------------------------+
  3. | Copyright (c) The PHP Group |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | https://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
  14. | Rui Hirokawa <hirokawa@php.net> |
  15. | Hironori Sato <satoh@jpnnet.com> |
  16. | Shigeru Kanemoto <sgk@happysize.co.jp> |
  17. +----------------------------------------------------------------------+
  18. */
  19. /* {{{ includes */
  20. #include "libmbfl/config.h"
  21. #include "php.h"
  22. #include "php_ini.h"
  23. #include "php_variables.h"
  24. #include "mbstring.h"
  25. #include "ext/standard/php_string.h"
  26. #include "ext/standard/php_mail.h"
  27. #include "ext/standard/exec.h"
  28. #include "ext/standard/url.h"
  29. #include "main/php_output.h"
  30. #include "ext/standard/info.h"
  31. #include "ext/pcre/php_pcre.h"
  32. #include "libmbfl/mbfl/mbfilter_8bit.h"
  33. #include "libmbfl/mbfl/mbfilter_pass.h"
  34. #include "libmbfl/mbfl/mbfilter_wchar.h"
  35. #include "libmbfl/filters/mbfilter_base64.h"
  36. #include "libmbfl/filters/mbfilter_qprint.h"
  37. #include "libmbfl/filters/mbfilter_ucs4.h"
  38. #include "libmbfl/filters/mbfilter_utf8.h"
  39. #include "libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.h"
  40. #include "libmbfl/filters/mbfilter_singlebyte.h"
  41. #include "php_variables.h"
  42. #include "php_globals.h"
  43. #include "rfc1867.h"
  44. #include "php_content_types.h"
  45. #include "SAPI.h"
  46. #include "php_unicode.h"
  47. #include "TSRM.h"
  48. #include "mb_gpc.h"
  49. #ifdef HAVE_MBREGEX
  50. # include "php_mbregex.h"
  51. #endif
  52. #include "zend_multibyte.h"
  53. #include "mbstring_arginfo.h"
  54. /* }}} */
  55. /* {{{ prototypes */
  56. ZEND_DECLARE_MODULE_GLOBALS(mbstring)
  57. static PHP_GINIT_FUNCTION(mbstring);
  58. static PHP_GSHUTDOWN_FUNCTION(mbstring);
  59. static void php_mb_populate_current_detect_order_list(void);
  60. static int php_mb_encoding_translation(void);
  61. static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
  62. static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
  63. static inline bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
  64. static inline bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
  65. /* }}} */
  66. /* {{{ php_mb_default_identify_list */
  67. typedef struct _php_mb_nls_ident_list {
  68. enum mbfl_no_language lang;
  69. const enum mbfl_no_encoding *list;
  70. size_t list_size;
  71. } php_mb_nls_ident_list;
  72. static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
  73. mbfl_no_encoding_ascii,
  74. mbfl_no_encoding_jis,
  75. mbfl_no_encoding_utf8,
  76. mbfl_no_encoding_euc_jp,
  77. mbfl_no_encoding_sjis
  78. };
  79. static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
  80. mbfl_no_encoding_ascii,
  81. mbfl_no_encoding_utf8,
  82. mbfl_no_encoding_euc_cn,
  83. mbfl_no_encoding_cp936
  84. };
  85. static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
  86. mbfl_no_encoding_ascii,
  87. mbfl_no_encoding_utf8,
  88. mbfl_no_encoding_euc_tw,
  89. mbfl_no_encoding_big5
  90. };
  91. static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
  92. mbfl_no_encoding_ascii,
  93. mbfl_no_encoding_utf8,
  94. mbfl_no_encoding_euc_kr,
  95. mbfl_no_encoding_uhc
  96. };
  97. static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
  98. mbfl_no_encoding_ascii,
  99. mbfl_no_encoding_utf8,
  100. mbfl_no_encoding_koi8r,
  101. mbfl_no_encoding_cp1251,
  102. mbfl_no_encoding_cp866
  103. };
  104. static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
  105. mbfl_no_encoding_ascii,
  106. mbfl_no_encoding_utf8,
  107. mbfl_no_encoding_armscii8
  108. };
  109. static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
  110. mbfl_no_encoding_ascii,
  111. mbfl_no_encoding_utf8,
  112. mbfl_no_encoding_cp1254,
  113. mbfl_no_encoding_8859_9
  114. };
  115. static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
  116. mbfl_no_encoding_ascii,
  117. mbfl_no_encoding_utf8,
  118. mbfl_no_encoding_koi8u
  119. };
  120. static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
  121. mbfl_no_encoding_ascii,
  122. mbfl_no_encoding_utf8
  123. };
  124. static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
  125. { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
  126. { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
  127. { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
  128. { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
  129. { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
  130. { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
  131. { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
  132. { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
  133. { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
  134. };
  135. /* }}} */
  136. /* {{{ mbstring_deps[] */
  137. static const zend_module_dep mbstring_deps[] = {
  138. ZEND_MOD_REQUIRED("pcre")
  139. ZEND_MOD_END
  140. };
  141. /* }}} */
  142. /* {{{ zend_module_entry mbstring_module_entry */
  143. zend_module_entry mbstring_module_entry = {
  144. STANDARD_MODULE_HEADER_EX,
  145. NULL,
  146. mbstring_deps,
  147. "mbstring",
  148. ext_functions,
  149. PHP_MINIT(mbstring),
  150. PHP_MSHUTDOWN(mbstring),
  151. PHP_RINIT(mbstring),
  152. PHP_RSHUTDOWN(mbstring),
  153. PHP_MINFO(mbstring),
  154. PHP_MBSTRING_VERSION,
  155. PHP_MODULE_GLOBALS(mbstring),
  156. PHP_GINIT(mbstring),
  157. PHP_GSHUTDOWN(mbstring),
  158. NULL,
  159. STANDARD_MODULE_PROPERTIES_EX
  160. };
  161. /* }}} */
  162. /* {{{ static sapi_post_entry php_post_entries[] */
  163. static const sapi_post_entry php_post_entries[] = {
  164. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
  165. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  166. { NULL, 0, NULL, NULL }
  167. };
  168. /* }}} */
  169. #ifdef COMPILE_DL_MBSTRING
  170. #ifdef ZTS
  171. ZEND_TSRMLS_CACHE_DEFINE()
  172. #endif
  173. ZEND_GET_MODULE(mbstring)
  174. #endif
  175. /* {{{ static sapi_post_entry mbstr_post_entries[] */
  176. static const sapi_post_entry mbstr_post_entries[] = {
  177. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
  178. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  179. { NULL, 0, NULL, NULL }
  180. };
  181. /* }}} */
  182. static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name, uint32_t arg_num) {
  183. if (encoding_name) {
  184. const mbfl_encoding *encoding;
  185. zend_string *last_encoding_name = MBSTRG(last_used_encoding_name);
  186. if (last_encoding_name && (last_encoding_name == encoding_name
  187. || zend_string_equals_ci(encoding_name, last_encoding_name))) {
  188. return MBSTRG(last_used_encoding);
  189. }
  190. encoding = mbfl_name2encoding(ZSTR_VAL(encoding_name));
  191. if (!encoding) {
  192. zend_argument_value_error(arg_num, "must be a valid encoding, \"%s\" given", ZSTR_VAL(encoding_name));
  193. return NULL;
  194. }
  195. if (last_encoding_name) {
  196. zend_string_release(last_encoding_name);
  197. }
  198. MBSTRG(last_used_encoding_name) = zend_string_copy(encoding_name);
  199. MBSTRG(last_used_encoding) = encoding;
  200. return encoding;
  201. } else {
  202. return MBSTRG(current_internal_encoding);
  203. }
  204. }
  205. static const mbfl_encoding *php_mb_get_encoding_or_pass(const char *encoding_name) {
  206. if (strcmp(encoding_name, "pass") == 0) {
  207. return &mbfl_encoding_pass;
  208. }
  209. return mbfl_name2encoding(encoding_name);
  210. }
  211. static size_t count_commas(const char *p, const char *end) {
  212. size_t count = 0;
  213. while ((p = memchr(p, ',', end - p))) {
  214. count++;
  215. p++;
  216. }
  217. return count;
  218. }
  219. /* {{{ static zend_result php_mb_parse_encoding_list()
  220. * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
  221. * Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
  222. */
  223. static zend_result php_mb_parse_encoding_list(const char *value, size_t value_length,
  224. const mbfl_encoding ***return_list, size_t *return_size, bool persistent, uint32_t arg_num,
  225. bool allow_pass_encoding)
  226. {
  227. if (value == NULL || value_length == 0) {
  228. *return_list = NULL;
  229. *return_size = 0;
  230. return SUCCESS;
  231. } else {
  232. bool included_auto;
  233. size_t n, size;
  234. char *p1, *endp, *tmpstr;
  235. const mbfl_encoding **entry, **list;
  236. /* copy the value string for work */
  237. if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
  238. tmpstr = (char *)estrndup(value+1, value_length-2);
  239. value_length -= 2;
  240. } else {
  241. tmpstr = (char *)estrndup(value, value_length);
  242. }
  243. endp = tmpstr + value_length;
  244. size = 1 + count_commas(tmpstr, endp) + MBSTRG(default_detect_order_list_size);
  245. list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
  246. entry = list;
  247. n = 0;
  248. included_auto = 0;
  249. p1 = tmpstr;
  250. while (1) {
  251. char *comma = (char *) php_memnstr(p1, ",", 1, endp);
  252. char *p = comma ? comma : endp;
  253. *p = '\0';
  254. /* trim spaces */
  255. while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
  256. p1++;
  257. }
  258. p--;
  259. while (p > p1 && (*p == ' ' || *p == '\t')) {
  260. *p = '\0';
  261. p--;
  262. }
  263. /* convert to the encoding number and check encoding */
  264. if (strcasecmp(p1, "auto") == 0) {
  265. if (!included_auto) {
  266. const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
  267. const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
  268. size_t i;
  269. included_auto = 1;
  270. for (i = 0; i < identify_list_size; i++) {
  271. *entry++ = mbfl_no2encoding(*src++);
  272. n++;
  273. }
  274. }
  275. } else {
  276. const mbfl_encoding *encoding =
  277. allow_pass_encoding ? php_mb_get_encoding_or_pass(p1) : mbfl_name2encoding(p1);
  278. if (!encoding) {
  279. /* Called from an INI setting modification */
  280. if (arg_num == 0) {
  281. php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%s\"", p1);
  282. } else {
  283. zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", p1);
  284. }
  285. efree(tmpstr);
  286. pefree(ZEND_VOIDP(list), persistent);
  287. return FAILURE;
  288. }
  289. *entry++ = encoding;
  290. n++;
  291. }
  292. if (n >= size || comma == NULL) {
  293. break;
  294. }
  295. p1 = comma + 1;
  296. }
  297. *return_list = list;
  298. *return_size = n;
  299. efree(tmpstr);
  300. }
  301. return SUCCESS;
  302. }
  303. /* }}} */
  304. /* {{{ static int php_mb_parse_encoding_array()
  305. * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
  306. * Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
  307. */
  308. static int php_mb_parse_encoding_array(HashTable *target_hash, const mbfl_encoding ***return_list,
  309. size_t *return_size, uint32_t arg_num)
  310. {
  311. /* Allocate enough space to include the default detect order if "auto" is used. */
  312. size_t size = zend_hash_num_elements(target_hash) + MBSTRG(default_detect_order_list_size);
  313. const mbfl_encoding **list = ecalloc(size, sizeof(mbfl_encoding*));
  314. const mbfl_encoding **entry = list;
  315. bool included_auto = 0;
  316. size_t n = 0;
  317. zval *hash_entry;
  318. ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
  319. zend_string *encoding_str = zval_try_get_string(hash_entry);
  320. if (UNEXPECTED(!encoding_str)) {
  321. efree(ZEND_VOIDP(list));
  322. return FAILURE;
  323. }
  324. if (zend_string_equals_literal_ci(encoding_str, "auto")) {
  325. if (!included_auto) {
  326. const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
  327. const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
  328. size_t j;
  329. included_auto = 1;
  330. for (j = 0; j < identify_list_size; j++) {
  331. *entry++ = mbfl_no2encoding(*src++);
  332. n++;
  333. }
  334. }
  335. } else {
  336. const mbfl_encoding *encoding = mbfl_name2encoding(ZSTR_VAL(encoding_str));
  337. if (encoding) {
  338. *entry++ = encoding;
  339. n++;
  340. } else {
  341. zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", ZSTR_VAL(encoding_str));
  342. zend_string_release(encoding_str);
  343. efree(ZEND_VOIDP(list));
  344. return FAILURE;
  345. }
  346. }
  347. zend_string_release(encoding_str);
  348. } ZEND_HASH_FOREACH_END();
  349. *return_list = list;
  350. *return_size = n;
  351. return SUCCESS;
  352. }
  353. /* }}} */
  354. /* {{{ zend_multibyte interface */
  355. static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
  356. {
  357. return (const zend_encoding*)mbfl_name2encoding(encoding_name);
  358. }
  359. static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
  360. {
  361. return ((const mbfl_encoding *)encoding)->name;
  362. }
  363. static bool php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
  364. {
  365. const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
  366. return !(encoding->flag & MBFL_ENCTYPE_GL_UNSAFE);
  367. }
  368. static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
  369. {
  370. mbfl_string string;
  371. if (!list) {
  372. list = (const zend_encoding **)MBSTRG(current_detect_order_list);
  373. list_size = MBSTRG(current_detect_order_list_size);
  374. }
  375. mbfl_string_init(&string);
  376. string.val = (unsigned char *)arg_string;
  377. string.len = arg_length;
  378. return (const zend_encoding *) mbfl_identify_encoding(&string, (const mbfl_encoding **)list, list_size, 0);
  379. }
  380. static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
  381. {
  382. mbfl_string string, result;
  383. mbfl_buffer_converter *convd;
  384. /* new encoding */
  385. /* initialize string */
  386. string.encoding = (const mbfl_encoding*)encoding_from;
  387. string.val = (unsigned char*)from;
  388. string.len = from_length;
  389. /* initialize converter */
  390. convd = mbfl_buffer_converter_new((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
  391. if (convd == NULL) {
  392. return (size_t) -1;
  393. }
  394. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  395. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  396. /* do it */
  397. size_t loc = mbfl_buffer_converter_feed(convd, &string);
  398. mbfl_buffer_converter_flush(convd);
  399. mbfl_string_init(&result);
  400. if (!mbfl_buffer_converter_result(convd, &result)) {
  401. mbfl_buffer_converter_delete(convd);
  402. return (size_t)-1;
  403. }
  404. *to = result.val;
  405. *to_length = result.len;
  406. mbfl_buffer_converter_delete(convd);
  407. return loc;
  408. }
  409. static zend_result php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, bool persistent)
  410. {
  411. return php_mb_parse_encoding_list(
  412. encoding_list, encoding_list_len,
  413. (const mbfl_encoding ***)return_list, return_size,
  414. persistent, /* arg_num */ 0, /* allow_pass_encoding */ 1);
  415. }
  416. static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
  417. {
  418. return (const zend_encoding *)MBSTRG(internal_encoding);
  419. }
  420. static zend_result php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
  421. {
  422. MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
  423. return SUCCESS;
  424. }
  425. static zend_multibyte_functions php_mb_zend_multibyte_functions = {
  426. "mbstring",
  427. php_mb_zend_encoding_fetcher,
  428. php_mb_zend_encoding_name_getter,
  429. php_mb_zend_encoding_lexer_compatibility_checker,
  430. php_mb_zend_encoding_detector,
  431. php_mb_zend_encoding_converter,
  432. php_mb_zend_encoding_list_parser,
  433. php_mb_zend_internal_encoding_getter,
  434. php_mb_zend_internal_encoding_setter
  435. };
  436. /* }}} */
  437. /* {{{ _php_mb_compile_regex */
  438. static void *_php_mb_compile_regex(const char *pattern)
  439. {
  440. pcre2_code *retval;
  441. PCRE2_SIZE err_offset;
  442. int errnum;
  443. if (!(retval = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
  444. PCRE2_CASELESS, &errnum, &err_offset, php_pcre_cctx()))) {
  445. PCRE2_UCHAR err_str[128];
  446. pcre2_get_error_message(errnum, err_str, sizeof(err_str));
  447. php_error_docref(NULL, E_WARNING, "%s (offset=%zu): %s", pattern, err_offset, err_str);
  448. }
  449. return retval;
  450. }
  451. /* }}} */
  452. /* {{{ _php_mb_match_regex */
  453. static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
  454. {
  455. int res;
  456. pcre2_match_data *match_data = php_pcre_create_match_data(0, opaque);
  457. if (NULL == match_data) {
  458. pcre2_code_free(opaque);
  459. php_error_docref(NULL, E_WARNING, "Cannot allocate match data");
  460. return FAILURE;
  461. }
  462. res = pcre2_match(opaque, (PCRE2_SPTR)str, str_len, 0, 0, match_data, php_pcre_mctx()) >= 0;
  463. php_pcre_free_match_data(match_data);
  464. return res;
  465. }
  466. /* }}} */
  467. /* {{{ _php_mb_free_regex */
  468. static void _php_mb_free_regex(void *opaque)
  469. {
  470. pcre2_code_free(opaque);
  471. }
  472. /* }}} */
  473. /* {{{ php_mb_nls_get_default_detect_order_list */
  474. static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
  475. {
  476. size_t i;
  477. *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  478. *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  479. for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
  480. if (php_mb_default_identify_list[i].lang == lang) {
  481. *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
  482. *plist_size = php_mb_default_identify_list[i].list_size;
  483. return 1;
  484. }
  485. }
  486. return 0;
  487. }
  488. /* }}} */
  489. static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, size_t len, char quote)
  490. {
  491. char *result = emalloc(len + 2);
  492. char *resp = result;
  493. size_t i;
  494. for (i = 0; i < len && start[i] != quote; ++i) {
  495. if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
  496. *resp++ = start[++i];
  497. } else {
  498. size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
  499. while (j-- > 0 && i < len) {
  500. *resp++ = start[i++];
  501. }
  502. --i;
  503. }
  504. }
  505. *resp = '\0';
  506. return result;
  507. }
  508. static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
  509. {
  510. char *pos = *line, quote;
  511. char *res;
  512. while (*pos && *pos != stop) {
  513. if ((quote = *pos) == '"' || quote == '\'') {
  514. ++pos;
  515. while (*pos && *pos != quote) {
  516. if (*pos == '\\' && pos[1] && pos[1] == quote) {
  517. pos += 2;
  518. } else {
  519. ++pos;
  520. }
  521. }
  522. if (*pos) {
  523. ++pos;
  524. }
  525. } else {
  526. pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
  527. }
  528. }
  529. if (*pos == '\0') {
  530. res = estrdup(*line);
  531. *line += strlen(*line);
  532. return res;
  533. }
  534. res = estrndup(*line, pos - *line);
  535. while (*pos == stop) {
  536. pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
  537. }
  538. *line = pos;
  539. return res;
  540. }
  541. /* }}} */
  542. static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
  543. {
  544. while (*str && isspace(*(unsigned char *)str)) {
  545. ++str;
  546. }
  547. if (!*str) {
  548. return estrdup("");
  549. }
  550. if (*str == '"' || *str == '\'') {
  551. char quote = *str;
  552. str++;
  553. return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
  554. } else {
  555. char *strend = str;
  556. while (*strend && !isspace(*(unsigned char *)strend)) {
  557. ++strend;
  558. }
  559. return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
  560. }
  561. }
  562. /* }}} */
  563. static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
  564. {
  565. char *s, *s2;
  566. const size_t filename_len = strlen(filename);
  567. /* The \ check should technically be needed for win32 systems only where
  568. * it is a valid path separator. However, IE in all it's wisdom always sends
  569. * the full path of the file on the user's filesystem, which means that unless
  570. * the user does basename() they get a bogus file name. Until IE's user base drops
  571. * to nill or problem is fixed this code must remain enabled for all systems. */
  572. s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
  573. s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
  574. if (s && s2) {
  575. if (s > s2) {
  576. return ++s;
  577. } else {
  578. return ++s2;
  579. }
  580. } else if (s) {
  581. return ++s;
  582. } else if (s2) {
  583. return ++s2;
  584. } else {
  585. return filename;
  586. }
  587. }
  588. /* }}} */
  589. /* {{{ php.ini directive handler */
  590. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
  591. static PHP_INI_MH(OnUpdate_mbstring_language)
  592. {
  593. enum mbfl_no_language no_language;
  594. no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
  595. if (no_language == mbfl_no_language_invalid) {
  596. MBSTRG(language) = mbfl_no_language_neutral;
  597. return FAILURE;
  598. }
  599. MBSTRG(language) = no_language;
  600. php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
  601. return SUCCESS;
  602. }
  603. /* }}} */
  604. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
  605. static PHP_INI_MH(OnUpdate_mbstring_detect_order)
  606. {
  607. const mbfl_encoding **list;
  608. size_t size;
  609. if (!new_value) {
  610. if (MBSTRG(detect_order_list)) {
  611. pefree(ZEND_VOIDP(MBSTRG(detect_order_list)), 1);
  612. }
  613. MBSTRG(detect_order_list) = NULL;
  614. MBSTRG(detect_order_list_size) = 0;
  615. return SUCCESS;
  616. }
  617. if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 0) || size == 0) {
  618. return FAILURE;
  619. }
  620. if (MBSTRG(detect_order_list)) {
  621. pefree(ZEND_VOIDP(MBSTRG(detect_order_list)), 1);
  622. }
  623. MBSTRG(detect_order_list) = list;
  624. MBSTRG(detect_order_list_size) = size;
  625. return SUCCESS;
  626. }
  627. /* }}} */
  628. static int _php_mb_ini_mbstring_http_input_set(const char *new_value, size_t new_value_length) {
  629. const mbfl_encoding **list;
  630. size_t size;
  631. if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 1) || size == 0) {
  632. return FAILURE;
  633. }
  634. if (MBSTRG(http_input_list)) {
  635. pefree(ZEND_VOIDP(MBSTRG(http_input_list)), 1);
  636. }
  637. MBSTRG(http_input_list) = list;
  638. MBSTRG(http_input_list_size) = size;
  639. return SUCCESS;
  640. }
  641. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
  642. static PHP_INI_MH(OnUpdate_mbstring_http_input)
  643. {
  644. if (new_value) {
  645. php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
  646. }
  647. if (!new_value || !ZSTR_VAL(new_value)) {
  648. const char *encoding = php_get_input_encoding();
  649. MBSTRG(http_input_set) = 0;
  650. _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
  651. return SUCCESS;
  652. }
  653. MBSTRG(http_input_set) = 1;
  654. return _php_mb_ini_mbstring_http_input_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
  655. }
  656. /* }}} */
  657. static int _php_mb_ini_mbstring_http_output_set(const char *new_value) {
  658. const mbfl_encoding *encoding = php_mb_get_encoding_or_pass(new_value);
  659. if (!encoding) {
  660. return FAILURE;
  661. }
  662. MBSTRG(http_output_encoding) = encoding;
  663. MBSTRG(current_http_output_encoding) = encoding;
  664. return SUCCESS;
  665. }
  666. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
  667. static PHP_INI_MH(OnUpdate_mbstring_http_output)
  668. {
  669. if (new_value) {
  670. php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
  671. }
  672. if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
  673. MBSTRG(http_output_set) = 0;
  674. _php_mb_ini_mbstring_http_output_set(php_get_output_encoding());
  675. return SUCCESS;
  676. }
  677. MBSTRG(http_output_set) = 1;
  678. return _php_mb_ini_mbstring_http_output_set(ZSTR_VAL(new_value));
  679. }
  680. /* }}} */
  681. /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
  682. static int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, size_t new_value_length)
  683. {
  684. const mbfl_encoding *encoding;
  685. if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
  686. /* falls back to UTF-8 if an unknown encoding name is given */
  687. if (new_value) {
  688. php_error_docref("ref.mbstring", E_WARNING, "Unknown encoding \"%s\" in ini setting", new_value);
  689. }
  690. encoding = &mbfl_encoding_utf8;
  691. }
  692. MBSTRG(internal_encoding) = encoding;
  693. MBSTRG(current_internal_encoding) = encoding;
  694. #ifdef HAVE_MBREGEX
  695. {
  696. const char *enc_name = new_value;
  697. if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
  698. /* falls back to UTF-8 if an unknown encoding name is given */
  699. enc_name = "UTF-8";
  700. php_mb_regex_set_default_mbctype(enc_name);
  701. }
  702. php_mb_regex_set_mbctype(new_value);
  703. }
  704. #endif
  705. return SUCCESS;
  706. }
  707. /* }}} */
  708. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
  709. static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
  710. {
  711. if (new_value) {
  712. php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
  713. }
  714. if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
  715. return FAILURE;
  716. }
  717. if (new_value && ZSTR_LEN(new_value)) {
  718. MBSTRG(internal_encoding_set) = 1;
  719. return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
  720. } else {
  721. const char *encoding = php_get_internal_encoding();
  722. MBSTRG(internal_encoding_set) = 0;
  723. return _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
  724. }
  725. }
  726. /* }}} */
  727. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
  728. static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
  729. {
  730. int c;
  731. char *endptr = NULL;
  732. if (new_value != NULL) {
  733. if (zend_string_equals_literal_ci(new_value, "none")) {
  734. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  735. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  736. } else if (zend_string_equals_literal_ci(new_value, "long")) {
  737. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  738. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  739. } else if (zend_string_equals_literal_ci(new_value, "entity")) {
  740. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  741. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  742. } else {
  743. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  744. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  745. if (ZSTR_LEN(new_value) > 0) {
  746. c = strtol(ZSTR_VAL(new_value), &endptr, 0);
  747. if (*endptr == '\0') {
  748. MBSTRG(filter_illegal_substchar) = c;
  749. MBSTRG(current_filter_illegal_substchar) = c;
  750. }
  751. }
  752. }
  753. } else {
  754. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  755. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  756. MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
  757. MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
  758. }
  759. return SUCCESS;
  760. }
  761. /* }}} */
  762. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
  763. static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
  764. {
  765. if (new_value == NULL) {
  766. return FAILURE;
  767. }
  768. OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
  769. if (MBSTRG(encoding_translation)) {
  770. sapi_unregister_post_entry(php_post_entries);
  771. sapi_register_post_entries(mbstr_post_entries);
  772. } else {
  773. sapi_unregister_post_entry(mbstr_post_entries);
  774. sapi_register_post_entries(php_post_entries);
  775. }
  776. return SUCCESS;
  777. }
  778. /* }}} */
  779. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
  780. static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
  781. {
  782. zend_string *tmp;
  783. void *re = NULL;
  784. if (!new_value) {
  785. new_value = entry->orig_value;
  786. }
  787. tmp = php_trim(new_value, NULL, 0, 3);
  788. if (ZSTR_LEN(tmp) > 0) {
  789. if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
  790. zend_string_release_ex(tmp, 0);
  791. return FAILURE;
  792. }
  793. }
  794. if (MBSTRG(http_output_conv_mimetypes)) {
  795. _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
  796. }
  797. MBSTRG(http_output_conv_mimetypes) = re;
  798. zend_string_release_ex(tmp, 0);
  799. return SUCCESS;
  800. }
  801. /* }}} */
  802. /* }}} */
  803. /* {{{ php.ini directive registration */
  804. PHP_INI_BEGIN()
  805. PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
  806. PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
  807. PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
  808. PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
  809. STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
  810. PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
  811. STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
  812. PHP_INI_SYSTEM | PHP_INI_PERDIR,
  813. OnUpdate_mbstring_encoding_translation,
  814. encoding_translation, zend_mbstring_globals, mbstring_globals)
  815. PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
  816. "^(text/|application/xhtml\\+xml)",
  817. PHP_INI_ALL,
  818. OnUpdate_mbstring_http_output_conv_mimetypes)
  819. STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
  820. PHP_INI_ALL,
  821. OnUpdateBool,
  822. strict_detection, zend_mbstring_globals, mbstring_globals)
  823. #ifdef HAVE_MBREGEX
  824. STD_PHP_INI_ENTRY("mbstring.regex_stack_limit", "100000",PHP_INI_ALL, OnUpdateLong, regex_stack_limit, zend_mbstring_globals, mbstring_globals)
  825. STD_PHP_INI_ENTRY("mbstring.regex_retry_limit", "1000000",PHP_INI_ALL, OnUpdateLong, regex_retry_limit, zend_mbstring_globals, mbstring_globals)
  826. #endif
  827. PHP_INI_END()
  828. /* }}} */
  829. static void mbstring_internal_encoding_changed_hook(void) {
  830. /* One of the internal_encoding / input_encoding / output_encoding ini settings changed. */
  831. if (!MBSTRG(internal_encoding_set)) {
  832. const char *encoding = php_get_internal_encoding();
  833. _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
  834. }
  835. if (!MBSTRG(http_output_set)) {
  836. const char *encoding = php_get_output_encoding();
  837. _php_mb_ini_mbstring_http_output_set(encoding);
  838. }
  839. if (!MBSTRG(http_input_set)) {
  840. const char *encoding = php_get_input_encoding();
  841. _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
  842. }
  843. }
  844. /* {{{ module global initialize handler */
  845. static PHP_GINIT_FUNCTION(mbstring)
  846. {
  847. #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
  848. ZEND_TSRMLS_CACHE_UPDATE();
  849. #endif
  850. mbstring_globals->language = mbfl_no_language_uni;
  851. mbstring_globals->internal_encoding = NULL;
  852. mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
  853. mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
  854. mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
  855. mbstring_globals->http_input_identify = NULL;
  856. mbstring_globals->http_input_identify_get = NULL;
  857. mbstring_globals->http_input_identify_post = NULL;
  858. mbstring_globals->http_input_identify_cookie = NULL;
  859. mbstring_globals->http_input_identify_string = NULL;
  860. mbstring_globals->http_input_list = NULL;
  861. mbstring_globals->http_input_list_size = 0;
  862. mbstring_globals->detect_order_list = NULL;
  863. mbstring_globals->detect_order_list_size = 0;
  864. mbstring_globals->current_detect_order_list = NULL;
  865. mbstring_globals->current_detect_order_list_size = 0;
  866. mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  867. mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  868. mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  869. mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
  870. mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  871. mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
  872. mbstring_globals->illegalchars = 0;
  873. mbstring_globals->encoding_translation = 0;
  874. mbstring_globals->strict_detection = 0;
  875. mbstring_globals->outconv = NULL;
  876. mbstring_globals->http_output_conv_mimetypes = NULL;
  877. #ifdef HAVE_MBREGEX
  878. mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
  879. #endif
  880. mbstring_globals->last_used_encoding_name = NULL;
  881. mbstring_globals->last_used_encoding = NULL;
  882. mbstring_globals->internal_encoding_set = 0;
  883. mbstring_globals->http_output_set = 0;
  884. mbstring_globals->http_input_set = 0;
  885. }
  886. /* }}} */
  887. /* {{{ PHP_GSHUTDOWN_FUNCTION */
  888. static PHP_GSHUTDOWN_FUNCTION(mbstring)
  889. {
  890. if (mbstring_globals->http_input_list) {
  891. free(ZEND_VOIDP(mbstring_globals->http_input_list));
  892. }
  893. if (mbstring_globals->detect_order_list) {
  894. free(ZEND_VOIDP(mbstring_globals->detect_order_list));
  895. }
  896. if (mbstring_globals->http_output_conv_mimetypes) {
  897. _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
  898. }
  899. #ifdef HAVE_MBREGEX
  900. php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
  901. #endif
  902. }
  903. /* }}} */
  904. /* {{{ PHP_MINIT_FUNCTION(mbstring) */
  905. PHP_MINIT_FUNCTION(mbstring)
  906. {
  907. #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
  908. ZEND_TSRMLS_CACHE_UPDATE();
  909. #endif
  910. REGISTER_INI_ENTRIES();
  911. /* We assume that we're the only user of the hook. */
  912. ZEND_ASSERT(php_internal_encoding_changed == NULL);
  913. php_internal_encoding_changed = mbstring_internal_encoding_changed_hook;
  914. mbstring_internal_encoding_changed_hook();
  915. /* This is a global handler. Should not be set in a per-request handler. */
  916. sapi_register_treat_data(mbstr_treat_data);
  917. /* Post handlers are stored in the thread-local context. */
  918. if (MBSTRG(encoding_translation)) {
  919. sapi_register_post_entries(mbstr_post_entries);
  920. }
  921. REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
  922. REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
  923. REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
  924. REGISTER_LONG_CONSTANT("MB_CASE_FOLD", PHP_UNICODE_CASE_FOLD, CONST_CS | CONST_PERSISTENT);
  925. REGISTER_LONG_CONSTANT("MB_CASE_UPPER_SIMPLE", PHP_UNICODE_CASE_UPPER_SIMPLE, CONST_CS | CONST_PERSISTENT);
  926. REGISTER_LONG_CONSTANT("MB_CASE_LOWER_SIMPLE", PHP_UNICODE_CASE_LOWER_SIMPLE, CONST_CS | CONST_PERSISTENT);
  927. REGISTER_LONG_CONSTANT("MB_CASE_TITLE_SIMPLE", PHP_UNICODE_CASE_TITLE_SIMPLE, CONST_CS | CONST_PERSISTENT);
  928. REGISTER_LONG_CONSTANT("MB_CASE_FOLD_SIMPLE", PHP_UNICODE_CASE_FOLD_SIMPLE, CONST_CS | CONST_PERSISTENT);
  929. #ifdef HAVE_MBREGEX
  930. PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  931. #endif
  932. if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
  933. return FAILURE;
  934. }
  935. php_rfc1867_set_multibyte_callbacks(
  936. php_mb_encoding_translation,
  937. php_mb_gpc_get_detect_order,
  938. php_mb_gpc_set_input_encoding,
  939. php_mb_rfc1867_getword,
  940. php_mb_rfc1867_getword_conf,
  941. php_mb_rfc1867_basename);
  942. return SUCCESS;
  943. }
  944. /* }}} */
  945. /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
  946. PHP_MSHUTDOWN_FUNCTION(mbstring)
  947. {
  948. UNREGISTER_INI_ENTRIES();
  949. zend_multibyte_restore_functions();
  950. #ifdef HAVE_MBREGEX
  951. PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  952. #endif
  953. php_internal_encoding_changed = NULL;
  954. return SUCCESS;
  955. }
  956. /* }}} */
  957. /* {{{ PHP_RINIT_FUNCTION(mbstring) */
  958. PHP_RINIT_FUNCTION(mbstring)
  959. {
  960. MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
  961. MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
  962. MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
  963. MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
  964. MBSTRG(illegalchars) = 0;
  965. php_mb_populate_current_detect_order_list();
  966. #ifdef HAVE_MBREGEX
  967. PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  968. #endif
  969. zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
  970. return SUCCESS;
  971. }
  972. /* }}} */
  973. /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
  974. PHP_RSHUTDOWN_FUNCTION(mbstring)
  975. {
  976. if (MBSTRG(current_detect_order_list) != NULL) {
  977. efree(ZEND_VOIDP(MBSTRG(current_detect_order_list)));
  978. MBSTRG(current_detect_order_list) = NULL;
  979. MBSTRG(current_detect_order_list_size) = 0;
  980. }
  981. if (MBSTRG(outconv) != NULL) {
  982. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  983. mbfl_buffer_converter_delete(MBSTRG(outconv));
  984. MBSTRG(outconv) = NULL;
  985. }
  986. /* clear http input identification. */
  987. MBSTRG(http_input_identify) = NULL;
  988. MBSTRG(http_input_identify_post) = NULL;
  989. MBSTRG(http_input_identify_get) = NULL;
  990. MBSTRG(http_input_identify_cookie) = NULL;
  991. MBSTRG(http_input_identify_string) = NULL;
  992. if (MBSTRG(last_used_encoding_name)) {
  993. zend_string_release(MBSTRG(last_used_encoding_name));
  994. MBSTRG(last_used_encoding_name) = NULL;
  995. }
  996. MBSTRG(internal_encoding_set) = 0;
  997. MBSTRG(http_output_set) = 0;
  998. MBSTRG(http_input_set) = 0;
  999. #ifdef HAVE_MBREGEX
  1000. PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1001. #endif
  1002. return SUCCESS;
  1003. }
  1004. /* }}} */
  1005. /* {{{ PHP_MINFO_FUNCTION(mbstring) */
  1006. PHP_MINFO_FUNCTION(mbstring)
  1007. {
  1008. php_info_print_table_start();
  1009. php_info_print_table_row(2, "Multibyte Support", "enabled");
  1010. php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
  1011. php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
  1012. {
  1013. char tmp[256];
  1014. snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
  1015. php_info_print_table_row(2, "libmbfl version", tmp);
  1016. }
  1017. php_info_print_table_end();
  1018. php_info_print_table_start();
  1019. php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
  1020. php_info_print_table_end();
  1021. #ifdef HAVE_MBREGEX
  1022. PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
  1023. #endif
  1024. DISPLAY_INI_ENTRIES();
  1025. }
  1026. /* }}} */
  1027. /* {{{ Sets the current language or Returns the current language as a string */
  1028. PHP_FUNCTION(mb_language)
  1029. {
  1030. zend_string *name = NULL;
  1031. ZEND_PARSE_PARAMETERS_START(0, 1)
  1032. Z_PARAM_OPTIONAL
  1033. Z_PARAM_STR_OR_NULL(name)
  1034. ZEND_PARSE_PARAMETERS_END();
  1035. if (name == NULL) {
  1036. RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
  1037. } else {
  1038. zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
  1039. if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
  1040. zend_argument_value_error(1, "must be a valid language, \"%s\" given", ZSTR_VAL(name));
  1041. zend_string_release_ex(ini_name, 0);
  1042. RETURN_THROWS();
  1043. }
  1044. // TODO Make return void
  1045. RETVAL_TRUE;
  1046. zend_string_release_ex(ini_name, 0);
  1047. }
  1048. }
  1049. /* }}} */
  1050. /* {{{ Sets the current internal encoding or Returns the current internal encoding as a string */
  1051. PHP_FUNCTION(mb_internal_encoding)
  1052. {
  1053. char *name = NULL;
  1054. size_t name_len;
  1055. const mbfl_encoding *encoding;
  1056. ZEND_PARSE_PARAMETERS_START(0, 1)
  1057. Z_PARAM_OPTIONAL
  1058. Z_PARAM_STRING_OR_NULL(name, name_len)
  1059. ZEND_PARSE_PARAMETERS_END();
  1060. if (name == NULL) {
  1061. ZEND_ASSERT(MBSTRG(current_internal_encoding));
  1062. RETURN_STRING(MBSTRG(current_internal_encoding)->name);
  1063. } else {
  1064. encoding = mbfl_name2encoding(name);
  1065. if (!encoding) {
  1066. zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
  1067. RETURN_THROWS();
  1068. } else {
  1069. MBSTRG(current_internal_encoding) = encoding;
  1070. MBSTRG(internal_encoding_set) = 1;
  1071. /* TODO Return old encoding */
  1072. RETURN_TRUE;
  1073. }
  1074. }
  1075. }
  1076. /* }}} */
  1077. /* {{{ Returns the input encoding */
  1078. PHP_FUNCTION(mb_http_input)
  1079. {
  1080. char *type = NULL;
  1081. size_t type_len = 0, n;
  1082. const mbfl_encoding **entry;
  1083. const mbfl_encoding *encoding;
  1084. ZEND_PARSE_PARAMETERS_START(0, 1)
  1085. Z_PARAM_OPTIONAL
  1086. Z_PARAM_STRING_OR_NULL(type, type_len)
  1087. ZEND_PARSE_PARAMETERS_END();
  1088. if (type == NULL) {
  1089. encoding = MBSTRG(http_input_identify);
  1090. } else {
  1091. switch (*type) {
  1092. case 'G':
  1093. case 'g':
  1094. encoding = MBSTRG(http_input_identify_get);
  1095. break;
  1096. case 'P':
  1097. case 'p':
  1098. encoding = MBSTRG(http_input_identify_post);
  1099. break;
  1100. case 'C':
  1101. case 'c':
  1102. encoding = MBSTRG(http_input_identify_cookie);
  1103. break;
  1104. case 'S':
  1105. case 's':
  1106. encoding = MBSTRG(http_input_identify_string);
  1107. break;
  1108. case 'I':
  1109. case 'i':
  1110. entry = MBSTRG(http_input_list);
  1111. n = MBSTRG(http_input_list_size);
  1112. array_init(return_value);
  1113. for (size_t i = 0; i < n; i++, entry++) {
  1114. add_next_index_string(return_value, (*entry)->name);
  1115. }
  1116. return;
  1117. case 'L':
  1118. case 'l':
  1119. entry = MBSTRG(http_input_list);
  1120. n = MBSTRG(http_input_list_size);
  1121. if (n == 0) {
  1122. // TODO should return empty string?
  1123. RETURN_FALSE;
  1124. }
  1125. // TODO Use smart_str instead.
  1126. mbfl_string result;
  1127. mbfl_memory_device device;
  1128. mbfl_memory_device_init(&device, n * 12, 0);
  1129. for (size_t i = 0; i < n; i++, entry++) {
  1130. mbfl_memory_device_strcat(&device, (*entry)->name);
  1131. mbfl_memory_device_output(',', &device);
  1132. }
  1133. mbfl_memory_device_unput(&device); /* Remove trailing comma */
  1134. mbfl_memory_device_result(&device, &result);
  1135. RETVAL_STRINGL((const char*)result.val, result.len);
  1136. mbfl_string_clear(&result);
  1137. return;
  1138. default:
  1139. zend_argument_value_error(1,
  1140. "must be one of \"G\", \"P\", \"C\", \"S\", \"I\", or \"L\"");
  1141. RETURN_THROWS();
  1142. }
  1143. }
  1144. if (encoding) {
  1145. RETURN_STRING(encoding->name);
  1146. } else {
  1147. RETURN_FALSE;
  1148. }
  1149. }
  1150. /* }}} */
  1151. /* {{{ Sets the current output_encoding or returns the current output_encoding as a string */
  1152. PHP_FUNCTION(mb_http_output)
  1153. {
  1154. char *name = NULL;
  1155. size_t name_len;
  1156. ZEND_PARSE_PARAMETERS_START(0, 1)
  1157. Z_PARAM_OPTIONAL
  1158. Z_PARAM_STRING_OR_NULL(name, name_len)
  1159. ZEND_PARSE_PARAMETERS_END();
  1160. if (name == NULL) {
  1161. ZEND_ASSERT(MBSTRG(current_http_output_encoding));
  1162. RETURN_STRING(MBSTRG(current_http_output_encoding)->name);
  1163. } else {
  1164. const mbfl_encoding *encoding = php_mb_get_encoding_or_pass(name);
  1165. if (!encoding) {
  1166. zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
  1167. RETURN_THROWS();
  1168. } else {
  1169. MBSTRG(http_output_set) = 1;
  1170. MBSTRG(current_http_output_encoding) = encoding;
  1171. /* TODO Return previous encoding? */
  1172. RETURN_TRUE;
  1173. }
  1174. }
  1175. }
  1176. /* }}} */
  1177. /* {{{ Sets the current detect_order or Return the current detect_order as a array */
  1178. PHP_FUNCTION(mb_detect_order)
  1179. {
  1180. zend_string *order_str = NULL;
  1181. HashTable *order_ht = NULL;
  1182. ZEND_PARSE_PARAMETERS_START(0, 1)
  1183. Z_PARAM_OPTIONAL
  1184. Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(order_ht, order_str)
  1185. ZEND_PARSE_PARAMETERS_END();
  1186. if (!order_str && !order_ht) {
  1187. size_t n = MBSTRG(current_detect_order_list_size);
  1188. const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
  1189. array_init(return_value);
  1190. for (size_t i = 0; i < n; i++) {
  1191. add_next_index_string(return_value, (*entry)->name);
  1192. entry++;
  1193. }
  1194. } else {
  1195. const mbfl_encoding **list;
  1196. size_t size;
  1197. if (order_ht) {
  1198. if (FAILURE == php_mb_parse_encoding_array(order_ht, &list, &size, 1)) {
  1199. RETURN_THROWS();
  1200. }
  1201. } else {
  1202. if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(order_str), ZSTR_LEN(order_str), &list, &size, /* persistent */ 0, /* arg_num */ 1, /* allow_pass_encoding */ 0)) {
  1203. RETURN_THROWS();
  1204. }
  1205. }
  1206. if (size == 0) {
  1207. efree(ZEND_VOIDP(list));
  1208. zend_argument_value_error(1, "must specify at least one encoding");
  1209. RETURN_THROWS();
  1210. }
  1211. if (MBSTRG(current_detect_order_list)) {
  1212. efree(ZEND_VOIDP(MBSTRG(current_detect_order_list)));
  1213. }
  1214. MBSTRG(current_detect_order_list) = list;
  1215. MBSTRG(current_detect_order_list_size) = size;
  1216. RETURN_TRUE;
  1217. }
  1218. }
  1219. /* }}} */
  1220. static inline int php_mb_check_code_point(zend_long cp)
  1221. {
  1222. if (cp < 0 || cp >= 0x110000) {
  1223. /* Out of Unicode range */
  1224. return 0;
  1225. }
  1226. if (cp >= 0xd800 && cp <= 0xdfff) {
  1227. /* Surrogate code-point. These are never valid on their own and we only allow a single
  1228. * substitute character. */
  1229. return 0;
  1230. }
  1231. /* As we do not know the target encoding of the conversion operation that is going to
  1232. * use the substitution character, we cannot check whether the codepoint is actually mapped
  1233. * in the given encoding at this point. Thus we have to accept everything. */
  1234. return 1;
  1235. }
  1236. /* {{{ Sets the current substitute_character or returns the current substitute_character */
  1237. PHP_FUNCTION(mb_substitute_character)
  1238. {
  1239. zend_string *substitute_character = NULL;
  1240. zend_long substitute_codepoint;
  1241. bool substitute_is_null = 1;
  1242. ZEND_PARSE_PARAMETERS_START(0, 1)
  1243. Z_PARAM_OPTIONAL
  1244. Z_PARAM_STR_OR_LONG_OR_NULL(substitute_character, substitute_codepoint, substitute_is_null)
  1245. ZEND_PARSE_PARAMETERS_END();
  1246. if (substitute_is_null) {
  1247. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  1248. RETURN_STRING("none");
  1249. }
  1250. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  1251. RETURN_STRING("long");
  1252. }
  1253. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
  1254. RETURN_STRING("entity");
  1255. }
  1256. RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
  1257. }
  1258. if (substitute_character != NULL) {
  1259. if (zend_string_equals_literal_ci(substitute_character, "none")) {
  1260. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  1261. RETURN_TRUE;
  1262. }
  1263. if (zend_string_equals_literal_ci(substitute_character, "long")) {
  1264. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  1265. RETURN_TRUE;
  1266. }
  1267. if (zend_string_equals_literal_ci(substitute_character, "entity")) {
  1268. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  1269. RETURN_TRUE;
  1270. }
  1271. /* Invalid string value */
  1272. zend_argument_value_error(1, "must be \"none\", \"long\", \"entity\" or a valid codepoint");
  1273. RETURN_THROWS();
  1274. }
  1275. /* Integer codepoint passed */
  1276. if (!php_mb_check_code_point(substitute_codepoint)) {
  1277. zend_argument_value_error(1, "is not a valid codepoint");
  1278. RETURN_THROWS();
  1279. }
  1280. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1281. MBSTRG(current_filter_illegal_substchar) = substitute_codepoint;
  1282. RETURN_TRUE;
  1283. }
  1284. /* }}} */
  1285. /* {{{ Return the preferred MIME name (charset) as a string */
  1286. PHP_FUNCTION(mb_preferred_mime_name)
  1287. {
  1288. enum mbfl_no_encoding no_encoding;
  1289. char *name = NULL;
  1290. size_t name_len;
  1291. ZEND_PARSE_PARAMETERS_START(1, 1)
  1292. Z_PARAM_STRING(name, name_len)
  1293. ZEND_PARSE_PARAMETERS_END();
  1294. no_encoding = mbfl_name2no_encoding(name);
  1295. if (no_encoding == mbfl_no_encoding_invalid) {
  1296. zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
  1297. RETURN_THROWS();
  1298. }
  1299. const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
  1300. if (preferred_name == NULL || *preferred_name == '\0') {
  1301. php_error_docref(NULL, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
  1302. RETVAL_FALSE;
  1303. } else {
  1304. RETVAL_STRING((char *)preferred_name);
  1305. }
  1306. }
  1307. /* }}} */
  1308. /* {{{ Parses GET/POST/COOKIE data and sets global variables */
  1309. PHP_FUNCTION(mb_parse_str)
  1310. {
  1311. zval *track_vars_array = NULL;
  1312. char *encstr;
  1313. size_t encstr_len;
  1314. php_mb_encoding_handler_info_t info;
  1315. const mbfl_encoding *detected;
  1316. ZEND_PARSE_PARAMETERS_START(2, 2)
  1317. Z_PARAM_STRING(encstr, encstr_len)
  1318. Z_PARAM_ZVAL(track_vars_array)
  1319. ZEND_PARSE_PARAMETERS_END();
  1320. track_vars_array = zend_try_array_init(track_vars_array);
  1321. if (!track_vars_array) {
  1322. RETURN_THROWS();
  1323. }
  1324. encstr = estrndup(encstr, encstr_len);
  1325. info.data_type = PARSE_STRING;
  1326. info.separator = PG(arg_separator).input;
  1327. info.report_errors = 1;
  1328. info.to_encoding = MBSTRG(current_internal_encoding);
  1329. info.to_language = MBSTRG(language);
  1330. info.from_encodings = MBSTRG(http_input_list);
  1331. info.num_from_encodings = MBSTRG(http_input_list_size);
  1332. info.from_language = MBSTRG(language);
  1333. detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr);
  1334. MBSTRG(http_input_identify) = detected;
  1335. RETVAL_BOOL(detected);
  1336. if (encstr != NULL) efree(encstr);
  1337. }
  1338. /* }}} */
  1339. /* {{{ Returns string in output buffer converted to the http_output encoding */
  1340. PHP_FUNCTION(mb_output_handler)
  1341. {
  1342. char *arg_string;
  1343. size_t arg_string_len;
  1344. zend_long arg_status;
  1345. mbfl_string string, result;
  1346. const char *charset;
  1347. char *p;
  1348. const mbfl_encoding *encoding;
  1349. int last_feed;
  1350. size_t len;
  1351. unsigned char send_text_mimetype = 0;
  1352. char *s, *mimetype = NULL;
  1353. ZEND_PARSE_PARAMETERS_START(2, 2)
  1354. Z_PARAM_STRING(arg_string, arg_string_len)
  1355. Z_PARAM_LONG(arg_status)
  1356. ZEND_PARSE_PARAMETERS_END();
  1357. encoding = MBSTRG(current_http_output_encoding);
  1358. /* start phase only */
  1359. if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
  1360. /* delete the converter just in case. */
  1361. if (MBSTRG(outconv)) {
  1362. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  1363. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1364. MBSTRG(outconv) = NULL;
  1365. }
  1366. if (encoding == &mbfl_encoding_pass) {
  1367. RETURN_STRINGL(arg_string, arg_string_len);
  1368. }
  1369. /* analyze mime type */
  1370. if (SG(sapi_headers).mimetype &&
  1371. _php_mb_match_regex(
  1372. MBSTRG(http_output_conv_mimetypes),
  1373. SG(sapi_headers).mimetype,
  1374. strlen(SG(sapi_headers).mimetype))) {
  1375. if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL) {
  1376. mimetype = estrdup(SG(sapi_headers).mimetype);
  1377. } else {
  1378. mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
  1379. }
  1380. send_text_mimetype = 1;
  1381. } else if (SG(sapi_headers).send_default_content_type) {
  1382. mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
  1383. }
  1384. /* if content-type is not yet set, set it and activate the converter */
  1385. if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
  1386. charset = encoding->mime_name;
  1387. if (charset) {
  1388. len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
  1389. if (sapi_add_header(p, len, 0) != FAILURE) {
  1390. SG(sapi_headers).send_default_content_type = 0;
  1391. }
  1392. }
  1393. /* activate the converter */
  1394. MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
  1395. if (send_text_mimetype){
  1396. efree(mimetype);
  1397. }
  1398. }
  1399. }
  1400. /* just return if the converter is not activated. */
  1401. if (MBSTRG(outconv) == NULL) {
  1402. RETURN_STRINGL(arg_string, arg_string_len);
  1403. }
  1404. /* flag */
  1405. last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
  1406. /* mode */
  1407. mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
  1408. mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
  1409. /* feed the string */
  1410. mbfl_string_init(&string);
  1411. /* these are not needed. convd has encoding info.
  1412. string.encoding = MBSTRG(current_internal_encoding);
  1413. */
  1414. string.val = (unsigned char *)arg_string;
  1415. string.len = arg_string_len;
  1416. mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
  1417. if (last_feed) {
  1418. mbfl_buffer_converter_flush(MBSTRG(outconv));
  1419. }
  1420. /* get the converter output, and return it */
  1421. mbfl_buffer_converter_result(MBSTRG(outconv), &result);
  1422. // TODO: avoid reallocation ???
  1423. RETVAL_STRINGL((char *)result.val, result.len); /* the string is already strdup()'ed */
  1424. efree(result.val);
  1425. /* delete the converter if it is the last feed. */
  1426. if (last_feed) {
  1427. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  1428. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1429. MBSTRG(outconv) = NULL;
  1430. }
  1431. }
  1432. /* }}} */
  1433. /* {{{ Convert a multibyte string to an array. If split_length is specified,
  1434. break the string down into chunks each split_length characters long. */
  1435. /* structure to pass split params to the callback */
  1436. struct mbfl_split_params {
  1437. zval *return_value; /* php function return value structure pointer */
  1438. mbfl_string *result_string; /* string to store result chunk */
  1439. size_t mb_chunk_length; /* actual chunk length in chars */
  1440. size_t split_length; /* split length in chars */
  1441. mbfl_convert_filter *next_filter; /* widechar to encoding converter */
  1442. };
  1443. /* callback function to fill split array */
  1444. static int mbfl_split_output(int c, void *data)
  1445. {
  1446. struct mbfl_split_params *params = (struct mbfl_split_params *)data; /* cast passed data */
  1447. (*params->next_filter->filter_function)(c, params->next_filter); /* decoder filter */
  1448. if (params->split_length == ++params->mb_chunk_length) { /* if current chunk size reached defined chunk size or last char reached */
  1449. mbfl_convert_filter_flush(params->next_filter);/* concatenate separate decoded chars to the solid string */
  1450. mbfl_memory_device *device = (mbfl_memory_device *)params->next_filter->data; /* chars container */
  1451. mbfl_string *chunk = params->result_string;
  1452. mbfl_memory_device_result(device, chunk); /* make chunk */
  1453. add_next_index_stringl(params->return_value, (const char *)chunk->val, chunk->len); /* add chunk to the array */
  1454. efree(chunk->val);
  1455. params->mb_chunk_length = 0; /* reset mb_chunk size */
  1456. }
  1457. return 0;
  1458. }
  1459. PHP_FUNCTION(mb_str_split)
  1460. {
  1461. zend_string *str, *encoding = NULL;
  1462. size_t mb_len, chunks, chunk_len;
  1463. const char *p, *last; /* pointer for the string cursor and last string char */
  1464. mbfl_string string, result_string;
  1465. const mbfl_encoding *mbfl_encoding;
  1466. zend_long split_length = 1;
  1467. ZEND_PARSE_PARAMETERS_START(1, 3)
  1468. Z_PARAM_STR(str)
  1469. Z_PARAM_OPTIONAL
  1470. Z_PARAM_LONG(split_length)
  1471. Z_PARAM_STR_OR_NULL(encoding)
  1472. ZEND_PARSE_PARAMETERS_END();
  1473. if (split_length <= 0) {
  1474. zend_argument_value_error(2, "must be greater than 0");
  1475. RETURN_THROWS();
  1476. }
  1477. /* fill mbfl_string structure */
  1478. string.val = (unsigned char *) ZSTR_VAL(str);
  1479. string.len = ZSTR_LEN(str);
  1480. string.encoding = php_mb_get_encoding(encoding, 3);
  1481. if (!string.encoding) {
  1482. RETURN_THROWS();
  1483. }
  1484. p = ZSTR_VAL(str); /* string cursor pointer */
  1485. last = ZSTR_VAL(str) + ZSTR_LEN(str); /* last string char pointer */
  1486. mbfl_encoding = string.encoding;
  1487. /* first scenario: 1,2,4-bytes fixed width encodings (head part) */
  1488. if (mbfl_encoding->flag & MBFL_ENCTYPE_SBCS) { /* 1 byte */
  1489. mb_len = string.len;
  1490. chunk_len = (size_t)split_length; /* chunk length in bytes */
  1491. } else if (mbfl_encoding->flag & MBFL_ENCTYPE_WCS2) { /* 2 bytes */
  1492. mb_len = string.len / 2;
  1493. chunk_len = split_length * 2;
  1494. } else if (mbfl_encoding->flag & MBFL_ENCTYPE_WCS4) { /* 4 bytes */
  1495. mb_len = string.len / 4;
  1496. chunk_len = split_length * 4;
  1497. } else if (mbfl_encoding->mblen_table != NULL) {
  1498. /* second scenario: variable width encodings with length table */
  1499. char unsigned const *mbtab = mbfl_encoding->mblen_table;
  1500. /* assume that we have 1-bytes characters */
  1501. array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
  1502. while (p < last) { /* split cycle work until the cursor has reached the last byte */
  1503. char const *chunk_p = p; /* chunk first byte pointer */
  1504. chunk_len = 0; /* chunk length in bytes */
  1505. zend_long char_count;
  1506. for (char_count = 0; char_count < split_length && p < last; ++char_count) {
  1507. char unsigned const m = mbtab[*(const unsigned char *)p]; /* single character length table */
  1508. chunk_len += m;
  1509. p += m;
  1510. }
  1511. if (p >= last) chunk_len -= p - last; /* check if chunk is in bounds */
  1512. add_next_index_stringl(return_value, chunk_p, chunk_len);
  1513. }
  1514. return;
  1515. } else {
  1516. /* third scenario: other multibyte encodings */
  1517. mbfl_convert_filter *filter, *decoder;
  1518. /* assume that we have 1-bytes characters */
  1519. array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
  1520. /* decoder filter to decode wchar to encoding */
  1521. mbfl_memory_device device;
  1522. mbfl_memory_device_init(&device, split_length + 1, 0);
  1523. decoder = mbfl_convert_filter_new(
  1524. &mbfl_encoding_wchar,
  1525. string.encoding,
  1526. mbfl_memory_device_output,
  1527. NULL,
  1528. &device);
  1529. /* assert that nothing is wrong with the decoder */
  1530. ZEND_ASSERT(decoder != NULL);
  1531. /* wchar filter */
  1532. mbfl_string_init(&result_string); /* mbfl_string to store chunk in the callback */
  1533. struct mbfl_split_params params = { /* init callback function params structure */
  1534. .return_value = return_value,
  1535. .result_string = &result_string,
  1536. .mb_chunk_length = 0,
  1537. .split_length = (size_t)split_length,
  1538. .next_filter = decoder,
  1539. };
  1540. filter = mbfl_convert_filter_new(
  1541. string.encoding,
  1542. &mbfl_encoding_wchar,
  1543. mbfl_split_output,
  1544. NULL,
  1545. &params);
  1546. /* assert that nothing is wrong with the filter */
  1547. ZEND_ASSERT(filter != NULL);
  1548. while (p < last - 1) { /* cycle each byte except last with callback function */
  1549. (*filter->filter_function)(*p++, filter);
  1550. }
  1551. params.mb_chunk_length = split_length - 1; /* force to finish current chunk */
  1552. (*filter->filter_function)(*p++, filter); /* process last char */
  1553. mbfl_convert_filter_delete(decoder);
  1554. mbfl_convert_filter_delete(filter);
  1555. mbfl_memory_device_clear(&device);
  1556. return;
  1557. }
  1558. /* first scenario: 1,2,4-bytes fixed width encodings (tail part) */
  1559. chunks = (mb_len + split_length - 1) / split_length; /* (round up idiom) */
  1560. array_init_size(return_value, chunks);
  1561. if (chunks != 0) {
  1562. zend_long i;
  1563. for (i = 0; i < chunks - 1; p += chunk_len, ++i) {
  1564. add_next_index_stringl(return_value, p, chunk_len);
  1565. }
  1566. add_next_index_stringl(return_value, p, last - p);
  1567. }
  1568. }
  1569. /* }}} */
  1570. /* {{{ Get character numbers of a string */
  1571. PHP_FUNCTION(mb_strlen)
  1572. {
  1573. mbfl_string string;
  1574. char *str;
  1575. zend_string *enc_name = NULL;
  1576. ZEND_PARSE_PARAMETERS_START(1, 2)
  1577. Z_PARAM_STRING(str, string.len)
  1578. Z_PARAM_OPTIONAL
  1579. Z_PARAM_STR_OR_NULL(enc_name)
  1580. ZEND_PARSE_PARAMETERS_END();
  1581. string.val = (unsigned char*)str;
  1582. string.encoding = php_mb_get_encoding(enc_name, 2);
  1583. if (!string.encoding) {
  1584. RETURN_THROWS();
  1585. }
  1586. size_t n = mbfl_strlen(&string);
  1587. /* Only way this can fail is if the conversion creation fails
  1588. * this would imply some sort of memory allocation failure which is a bug */
  1589. ZEND_ASSERT(!mbfl_is_error(n));
  1590. RETVAL_LONG(n);
  1591. }
  1592. /* }}} */
  1593. static void handle_strpos_error(size_t error) {
  1594. switch (error) {
  1595. case MBFL_ERROR_NOT_FOUND:
  1596. break;
  1597. case MBFL_ERROR_ENCODING:
  1598. php_error_docref(NULL, E_WARNING, "Conversion error");
  1599. break;
  1600. case MBFL_ERROR_OFFSET:
  1601. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1602. break;
  1603. default:
  1604. zend_value_error("mb_strpos(): Unknown error");
  1605. break;
  1606. }
  1607. }
  1608. /* {{{ Find position of first occurrence of a string within another */
  1609. PHP_FUNCTION(mb_strpos)
  1610. {
  1611. int reverse = 0;
  1612. zend_long offset = 0;
  1613. char *haystack_val, *needle_val;
  1614. mbfl_string haystack, needle;
  1615. zend_string *enc_name = NULL;
  1616. ZEND_PARSE_PARAMETERS_START(2, 4)
  1617. Z_PARAM_STRING(haystack_val, haystack.len)
  1618. Z_PARAM_STRING(needle_val, needle.len)
  1619. Z_PARAM_OPTIONAL
  1620. Z_PARAM_LONG(offset)
  1621. Z_PARAM_STR_OR_NULL(enc_name)
  1622. ZEND_PARSE_PARAMETERS_END();
  1623. haystack.val = (unsigned char*)haystack_val;
  1624. needle.val = (unsigned char*)needle_val;
  1625. haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
  1626. if (!haystack.encoding) {
  1627. RETURN_THROWS();
  1628. }
  1629. size_t n = mbfl_strpos(&haystack, &needle, offset, reverse);
  1630. if (!mbfl_is_error(n)) {
  1631. RETVAL_LONG(n);
  1632. } else {
  1633. handle_strpos_error(n);
  1634. RETVAL_FALSE;
  1635. }
  1636. }
  1637. /* }}} */
  1638. /* {{{ Find position of last occurrence of a string within another */
  1639. PHP_FUNCTION(mb_strrpos)
  1640. {
  1641. mbfl_string haystack, needle;
  1642. char *haystack_val, *needle_val;
  1643. zend_string *enc_name = NULL;
  1644. zend_long offset = 0;
  1645. ZEND_PARSE_PARAMETERS_START(2, 4)
  1646. Z_PARAM_STRING(haystack_val, haystack.len)
  1647. Z_PARAM_STRING(needle_val, needle.len)
  1648. Z_PARAM_OPTIONAL
  1649. Z_PARAM_LONG(offset)
  1650. Z_PARAM_STR_OR_NULL(enc_name)
  1651. ZEND_PARSE_PARAMETERS_END();
  1652. haystack.val = (unsigned char*)haystack_val;
  1653. needle.val = (unsigned char*)needle_val;
  1654. haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
  1655. if (!haystack.encoding) {
  1656. RETURN_THROWS();
  1657. }
  1658. size_t n = mbfl_strpos(&haystack, &needle, offset, 1);
  1659. if (!mbfl_is_error(n)) {
  1660. RETVAL_LONG(n);
  1661. } else {
  1662. handle_strpos_error(n);
  1663. RETVAL_FALSE;
  1664. }
  1665. }
  1666. /* }}} */
  1667. /* {{{ Finds position of first occurrence of a string within another, case insensitive */
  1668. PHP_FUNCTION(mb_stripos)
  1669. {
  1670. zend_long offset = 0;
  1671. mbfl_string haystack, needle;
  1672. char *haystack_val, *needle_val;
  1673. zend_string *from_encoding = NULL;
  1674. ZEND_PARSE_PARAMETERS_START(2, 4)
  1675. Z_PARAM_STRING(haystack_val, haystack.len)
  1676. Z_PARAM_STRING(needle_val, needle.len)
  1677. Z_PARAM_OPTIONAL
  1678. Z_PARAM_LONG(offset)
  1679. Z_PARAM_STR_OR_NULL(from_encoding)
  1680. ZEND_PARSE_PARAMETERS_END();
  1681. haystack.val = (unsigned char*)haystack_val;
  1682. needle.val = (unsigned char*)needle_val;
  1683. const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 4);
  1684. if (!enc) {
  1685. RETURN_THROWS();
  1686. }
  1687. size_t n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc);
  1688. if (!mbfl_is_error(n)) {
  1689. RETVAL_LONG(n);
  1690. } else {
  1691. handle_strpos_error(n);
  1692. RETVAL_FALSE;
  1693. }
  1694. }
  1695. /* }}} */
  1696. /* {{{ Finds position of last occurrence of a string within another, case insensitive */
  1697. PHP_FUNCTION(mb_strripos)
  1698. {
  1699. zend_long offset = 0;
  1700. mbfl_string haystack, needle;
  1701. char *haystack_val, *needle_val;
  1702. zend_string *from_encoding = NULL;
  1703. ZEND_PARSE_PARAMETERS_START(2, 4)
  1704. Z_PARAM_STRING(haystack_val, haystack.len)
  1705. Z_PARAM_STRING(needle_val, needle.len)
  1706. Z_PARAM_OPTIONAL
  1707. Z_PARAM_LONG(offset)
  1708. Z_PARAM_STR_OR_NULL(from_encoding)
  1709. ZEND_PARSE_PARAMETERS_END();
  1710. haystack.val = (unsigned char*)haystack_val;
  1711. needle.val = (unsigned char*)needle_val;
  1712. const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 4);
  1713. if (!enc) {
  1714. RETURN_THROWS();
  1715. }
  1716. size_t n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc);
  1717. if (!mbfl_is_error(n)) {
  1718. RETVAL_LONG(n);
  1719. } else {
  1720. handle_strpos_error(n);
  1721. RETVAL_FALSE;
  1722. }
  1723. }
  1724. /* }}} */
  1725. #define MB_STRSTR 1
  1726. #define MB_STRRCHR 2
  1727. #define MB_STRISTR 3
  1728. #define MB_STRRICHR 4
  1729. /* {{{ php_mb_strstr_variants */
  1730. static void php_mb_strstr_variants(INTERNAL_FUNCTION_PARAMETERS, unsigned int variant)
  1731. {
  1732. int reverse_mode = 0;
  1733. size_t n;
  1734. char *haystack_val, *needle_val;
  1735. mbfl_string haystack, needle, result, *ret = NULL;
  1736. zend_string *encoding_name = NULL;
  1737. bool part = 0;
  1738. ZEND_PARSE_PARAMETERS_START(2, 4)
  1739. Z_PARAM_STRING(haystack_val, haystack.len)
  1740. Z_PARAM_STRING(needle_val, needle.len)
  1741. Z_PARAM_OPTIONAL
  1742. Z_PARAM_BOOL(part)
  1743. Z_PARAM_STR_OR_NULL(encoding_name)
  1744. ZEND_PARSE_PARAMETERS_END();
  1745. haystack.val = (unsigned char*)haystack_val;
  1746. needle.val = (unsigned char*)needle_val;
  1747. haystack.encoding = needle.encoding = php_mb_get_encoding(encoding_name, 4);
  1748. if (!haystack.encoding) {
  1749. RETURN_THROWS();
  1750. }
  1751. if (variant == MB_STRRCHR || variant == MB_STRRICHR) { reverse_mode = 1; }
  1752. if (variant == MB_STRISTR || variant == MB_STRRICHR) {
  1753. n = php_mb_stripos(reverse_mode, (char *)haystack.val, haystack.len, (char *)needle.val,
  1754. needle.len, 0, needle.encoding);
  1755. } else {
  1756. n = mbfl_strpos(&haystack, &needle, 0, reverse_mode);
  1757. }
  1758. if (!mbfl_is_error(n)) {
  1759. if (part) {
  1760. ret = mbfl_substr(&haystack, &result, 0, n);
  1761. ZEND_ASSERT(ret != NULL);
  1762. // TODO: avoid reallocation ???
  1763. RETVAL_STRINGL((char *)ret->val, ret->len);
  1764. efree(ret->val);
  1765. } else {
  1766. ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
  1767. ZEND_ASSERT(ret != NULL);
  1768. // TODO: avoid reallocation ???
  1769. RETVAL_STRINGL((char *)ret->val, ret->len);
  1770. efree(ret->val);
  1771. }
  1772. } else {
  1773. // FIXME use handle_strpos_error(n)
  1774. RETVAL_FALSE;
  1775. }
  1776. }
  1777. /* {{{ Finds first occurrence of a string within another */
  1778. PHP_FUNCTION(mb_strstr)
  1779. {
  1780. php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRSTR);
  1781. }
  1782. /* }}} */
  1783. /* {{{ Finds the last occurrence of a character in a string within another */
  1784. PHP_FUNCTION(mb_strrchr)
  1785. {
  1786. php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRRCHR);
  1787. }
  1788. /* }}} */
  1789. /* {{{ Finds first occurrence of a string within another, case insensitive */
  1790. PHP_FUNCTION(mb_stristr)
  1791. {
  1792. php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRISTR);
  1793. }
  1794. /* }}} */
  1795. /* {{{ Finds the last occurrence of a character in a string within another, case insensitive */
  1796. PHP_FUNCTION(mb_strrichr)
  1797. {
  1798. php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRRICHR);
  1799. }
  1800. /* }}} */
  1801. #undef MB_STRSTR
  1802. #undef MB_STRRCHR
  1803. #undef MB_STRISTR
  1804. #undef MB_STRRICHR
  1805. /* {{{ Count the number of substring occurrences */
  1806. PHP_FUNCTION(mb_substr_count)
  1807. {
  1808. mbfl_string haystack, needle;
  1809. char *haystack_val, *needle_val;
  1810. zend_string *enc_name = NULL;
  1811. ZEND_PARSE_PARAMETERS_START(2, 3)
  1812. Z_PARAM_STRING(haystack_val, haystack.len)
  1813. Z_PARAM_STRING(needle_val, needle.len)
  1814. Z_PARAM_OPTIONAL
  1815. Z_PARAM_STR_OR_NULL(enc_name)
  1816. ZEND_PARSE_PARAMETERS_END();
  1817. haystack.val = (unsigned char*)haystack_val;
  1818. needle.val = (unsigned char*)needle_val;
  1819. if (needle.len == 0) {
  1820. zend_argument_value_error(2, "must not be empty");
  1821. RETURN_THROWS();
  1822. }
  1823. haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 3);
  1824. if (!haystack.encoding) {
  1825. RETURN_THROWS();
  1826. }
  1827. size_t n = mbfl_substr_count(&haystack, &needle);
  1828. /* An error can only occur if needle is empty,
  1829. * an encoding error happens (which should not happen at this stage and is a bug)
  1830. * or the haystack is more than sizeof(size_t) bytes
  1831. * If one of these things occur this is a bug and should be flagged as such */
  1832. ZEND_ASSERT(!mbfl_is_error(n));
  1833. RETVAL_LONG(n);
  1834. }
  1835. /* }}} */
  1836. /* {{{ Returns part of a string */
  1837. PHP_FUNCTION(mb_substr)
  1838. {
  1839. char *str;
  1840. zend_string *encoding = NULL;
  1841. zend_long from, len;
  1842. size_t real_from, real_len;
  1843. size_t str_len;
  1844. bool len_is_null = 1;
  1845. mbfl_string string, result, *ret;
  1846. ZEND_PARSE_PARAMETERS_START(2, 4)
  1847. Z_PARAM_STRING(str, str_len)
  1848. Z_PARAM_LONG(from)
  1849. Z_PARAM_OPTIONAL
  1850. Z_PARAM_LONG_OR_NULL(len, len_is_null)
  1851. Z_PARAM_STR_OR_NULL(encoding)
  1852. ZEND_PARSE_PARAMETERS_END();
  1853. string.encoding = php_mb_get_encoding(encoding, 4);
  1854. if (!string.encoding) {
  1855. RETURN_THROWS();
  1856. }
  1857. string.val = (unsigned char *)str;
  1858. string.len = str_len;
  1859. /* measures length */
  1860. size_t mblen = 0;
  1861. if (from < 0 || (!len_is_null && len < 0)) {
  1862. mblen = mbfl_strlen(&string);
  1863. }
  1864. /* if "from" position is negative, count start position from the end
  1865. * of the string
  1866. */
  1867. if (from >= 0) {
  1868. real_from = (size_t) from;
  1869. } else if (-from < mblen) {
  1870. real_from = mblen + from;
  1871. } else {
  1872. real_from = 0;
  1873. }
  1874. /* if "length" position is negative, set it to the length
  1875. * needed to stop that many chars from the end of the string
  1876. */
  1877. if (len_is_null) {
  1878. real_len = MBFL_SUBSTR_UNTIL_END;
  1879. } else if (len >= 0) {
  1880. real_len = (size_t) len;
  1881. } else if (real_from < mblen && -len < mblen - real_from) {
  1882. real_len = (mblen - real_from) + len;
  1883. } else {
  1884. real_len = 0;
  1885. }
  1886. ret = mbfl_substr(&string, &result, real_from, real_len);
  1887. ZEND_ASSERT(ret != NULL);
  1888. // TODO: avoid reallocation ???
  1889. RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
  1890. efree(ret->val);
  1891. }
  1892. /* }}} */
  1893. /* {{{ Returns part of a string */
  1894. PHP_FUNCTION(mb_strcut)
  1895. {
  1896. zend_string *encoding = NULL;
  1897. char *string_val;
  1898. zend_long from, len;
  1899. bool len_is_null = 1;
  1900. mbfl_string string, result, *ret;
  1901. ZEND_PARSE_PARAMETERS_START(2, 4)
  1902. Z_PARAM_STRING(string_val, string.len)
  1903. Z_PARAM_LONG(from)
  1904. Z_PARAM_OPTIONAL
  1905. Z_PARAM_LONG_OR_NULL(len, len_is_null)
  1906. Z_PARAM_STR_OR_NULL(encoding)
  1907. ZEND_PARSE_PARAMETERS_END();
  1908. string.val = (unsigned char*)string_val;
  1909. string.encoding = php_mb_get_encoding(encoding, 4);
  1910. if (!string.encoding) {
  1911. RETURN_THROWS();
  1912. }
  1913. if (len_is_null) {
  1914. len = string.len;
  1915. }
  1916. /* if "from" position is negative, count start position from the end
  1917. * of the string
  1918. */
  1919. if (from < 0) {
  1920. from = string.len + from;
  1921. if (from < 0) {
  1922. from = 0;
  1923. }
  1924. }
  1925. /* if "length" position is negative, set it to the length
  1926. * needed to stop that many chars from the end of the string
  1927. */
  1928. if (len < 0) {
  1929. len = (string.len - from) + len;
  1930. if (len < 0) {
  1931. len = 0;
  1932. }
  1933. }
  1934. if (from > string.len) {
  1935. RETURN_EMPTY_STRING();
  1936. }
  1937. ret = mbfl_strcut(&string, &result, from, len);
  1938. ZEND_ASSERT(ret != NULL);
  1939. // TODO: avoid reallocation ???
  1940. RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
  1941. efree(ret->val);
  1942. }
  1943. /* }}} */
  1944. /* {{{ Gets terminal width of a string */
  1945. PHP_FUNCTION(mb_strwidth)
  1946. {
  1947. char *string_val;
  1948. mbfl_string string;
  1949. zend_string *enc_name = NULL;
  1950. ZEND_PARSE_PARAMETERS_START(1, 2)
  1951. Z_PARAM_STRING(string_val, string.len)
  1952. Z_PARAM_OPTIONAL
  1953. Z_PARAM_STR_OR_NULL(enc_name)
  1954. ZEND_PARSE_PARAMETERS_END();
  1955. string.val = (unsigned char*)string_val;
  1956. string.encoding = php_mb_get_encoding(enc_name, 2);
  1957. if (!string.encoding) {
  1958. RETURN_THROWS();
  1959. }
  1960. size_t n = mbfl_strwidth(&string);
  1961. ZEND_ASSERT(n != (size_t) -1);
  1962. RETVAL_LONG(n);
  1963. }
  1964. /* }}} */
  1965. /* {{{ Trim the string in terminal width */
  1966. PHP_FUNCTION(mb_strimwidth)
  1967. {
  1968. char *str, *trimmarker = NULL;
  1969. zend_string *encoding = NULL;
  1970. zend_long from, width, swidth = 0;
  1971. size_t str_len, trimmarker_len;
  1972. mbfl_string string, result, marker, *ret;
  1973. ZEND_PARSE_PARAMETERS_START(3, 5)
  1974. Z_PARAM_STRING(str, str_len)
  1975. Z_PARAM_LONG(from)
  1976. Z_PARAM_LONG(width)
  1977. Z_PARAM_OPTIONAL
  1978. Z_PARAM_STRING(trimmarker, trimmarker_len)
  1979. Z_PARAM_STR_OR_NULL(encoding)
  1980. ZEND_PARSE_PARAMETERS_END();
  1981. string.encoding = marker.encoding = php_mb_get_encoding(encoding, 5);
  1982. if (!string.encoding) {
  1983. RETURN_THROWS();
  1984. }
  1985. string.val = (unsigned char *)str;
  1986. string.len = str_len;
  1987. marker.val = NULL;
  1988. marker.len = 0;
  1989. if ((from < 0) || (width < 0)) {
  1990. swidth = mbfl_strwidth(&string);
  1991. }
  1992. if (from < 0) {
  1993. from += swidth;
  1994. }
  1995. if (from < 0 || (size_t)from > str_len) {
  1996. zend_argument_value_error(2, "is out of range");
  1997. RETURN_THROWS();
  1998. }
  1999. if (width < 0) {
  2000. width = swidth + width - from;
  2001. }
  2002. if (width < 0) {
  2003. zend_argument_value_error(3, "is out of range");
  2004. RETURN_THROWS();
  2005. }
  2006. if (trimmarker) {
  2007. marker.val = (unsigned char *)trimmarker;
  2008. marker.len = trimmarker_len;
  2009. }
  2010. ret = mbfl_strimwidth(&string, &marker, &result, from, width);
  2011. ZEND_ASSERT(ret != NULL);
  2012. // TODO: avoid reallocation ???
  2013. RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
  2014. efree(ret->val);
  2015. }
  2016. /* }}} */
  2017. /* See mbfl_no_encoding definition for list of unsupported encodings */
  2018. static inline bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
  2019. {
  2020. return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint)
  2021. || (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap)
  2022. || (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms)
  2023. || (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222));
  2024. }
  2025. /* See mbfl_no_encoding definition for list of UTF-8 encodings */
  2026. static inline bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
  2027. {
  2028. return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
  2029. }
  2030. MBSTRING_API char *php_mb_convert_encoding_ex(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding *from_encoding, size_t *output_len)
  2031. {
  2032. mbfl_string string, result, *ret;
  2033. mbfl_buffer_converter *convd;
  2034. char *output = NULL;
  2035. if (output_len) {
  2036. *output_len = 0;
  2037. }
  2038. /* initialize string */
  2039. string.encoding = from_encoding;
  2040. string.val = (unsigned char *)input;
  2041. string.len = length;
  2042. /* initialize converter */
  2043. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  2044. /* If this assertion fails this means some memory allocation failure which is a bug */
  2045. ZEND_ASSERT(convd != NULL);
  2046. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  2047. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  2048. /* do it */
  2049. mbfl_string_init(&result);
  2050. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  2051. if (ret) {
  2052. if (output_len) {
  2053. *output_len = ret->len;
  2054. }
  2055. output = (char *)ret->val;
  2056. }
  2057. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  2058. mbfl_buffer_converter_delete(convd);
  2059. return output;
  2060. }
  2061. /* }}} */
  2062. /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
  2063. MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding **from_encodings, size_t num_from_encodings, size_t *output_len)
  2064. {
  2065. const mbfl_encoding *from_encoding;
  2066. if (output_len) {
  2067. *output_len = 0;
  2068. }
  2069. /* pre-conversion encoding */
  2070. ZEND_ASSERT(num_from_encodings >= 1);
  2071. if (num_from_encodings == 1) {
  2072. from_encoding = *from_encodings;
  2073. } else {
  2074. /* auto detect */
  2075. mbfl_string string;
  2076. mbfl_string_init(&string);
  2077. string.val = (unsigned char *)input;
  2078. string.len = length;
  2079. from_encoding = mbfl_identify_encoding(
  2080. &string, from_encodings, num_from_encodings, MBSTRG(strict_detection));
  2081. if (!from_encoding) {
  2082. php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
  2083. return NULL;
  2084. }
  2085. }
  2086. return php_mb_convert_encoding_ex(input, length, to_encoding, from_encoding, output_len);
  2087. }
  2088. /* }}} */
  2089. MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const mbfl_encoding *to_encoding, const mbfl_encoding **from_encodings, size_t num_from_encodings)
  2090. {
  2091. HashTable *output, *chash;
  2092. zend_long idx;
  2093. zend_string *key;
  2094. zval *entry, entry_tmp;
  2095. size_t ckey_len, cval_len;
  2096. char *ckey, *cval;
  2097. if (!input) {
  2098. return NULL;
  2099. }
  2100. if (GC_IS_RECURSIVE(input)) {
  2101. GC_UNPROTECT_RECURSION(input);
  2102. php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values");
  2103. return NULL;
  2104. }
  2105. GC_TRY_PROTECT_RECURSION(input);
  2106. output = zend_new_array(zend_hash_num_elements(input));
  2107. ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
  2108. /* convert key */
  2109. if (key) {
  2110. ckey = php_mb_convert_encoding(
  2111. ZSTR_VAL(key), ZSTR_LEN(key),
  2112. to_encoding, from_encodings, num_from_encodings, &ckey_len);
  2113. key = zend_string_init(ckey, ckey_len, 0);
  2114. efree(ckey);
  2115. }
  2116. /* convert value */
  2117. ZEND_ASSERT(entry);
  2118. try_again:
  2119. switch(Z_TYPE_P(entry)) {
  2120. case IS_STRING:
  2121. cval = php_mb_convert_encoding(
  2122. Z_STRVAL_P(entry), Z_STRLEN_P(entry),
  2123. to_encoding, from_encodings, num_from_encodings, &cval_len);
  2124. ZVAL_STRINGL(&entry_tmp, cval, cval_len);
  2125. efree(cval);
  2126. break;
  2127. case IS_NULL:
  2128. case IS_TRUE:
  2129. case IS_FALSE:
  2130. case IS_LONG:
  2131. case IS_DOUBLE:
  2132. ZVAL_COPY(&entry_tmp, entry);
  2133. break;
  2134. case IS_ARRAY:
  2135. chash = php_mb_convert_encoding_recursive(
  2136. Z_ARRVAL_P(entry), to_encoding, from_encodings, num_from_encodings);
  2137. if (chash) {
  2138. ZVAL_ARR(&entry_tmp, chash);
  2139. } else {
  2140. ZVAL_EMPTY_ARRAY(&entry_tmp);
  2141. }
  2142. break;
  2143. case IS_REFERENCE:
  2144. entry = Z_REFVAL_P(entry);
  2145. goto try_again;
  2146. case IS_OBJECT:
  2147. default:
  2148. if (key) {
  2149. zend_string_release(key);
  2150. }
  2151. php_error_docref(NULL, E_WARNING, "Object is not supported");
  2152. continue;
  2153. }
  2154. if (key) {
  2155. zend_hash_add(output, key, &entry_tmp);
  2156. zend_string_release(key);
  2157. } else {
  2158. zend_hash_index_add(output, idx, &entry_tmp);
  2159. }
  2160. } ZEND_HASH_FOREACH_END();
  2161. GC_TRY_UNPROTECT_RECURSION(input);
  2162. return output;
  2163. }
  2164. /* }}} */
  2165. static void remove_non_encodings_from_elist(const mbfl_encoding **elist, size_t *size)
  2166. {
  2167. /* mbstring supports some 'text encodings' which aren't really text encodings
  2168. * at all, but really 'byte encodings', like Base64, QPrint, and so on.
  2169. * These should never be returned by `mb_detect_encoding`. */
  2170. int shift = 0;
  2171. for (int i = 0; i < *size; i++) {
  2172. const mbfl_encoding *encoding = elist[i];
  2173. if (encoding->no_encoding <= mbfl_no_encoding_charset_min) {
  2174. shift++; /* Remove this encoding from the list */
  2175. } else if (shift) {
  2176. elist[i - shift] = encoding;
  2177. }
  2178. }
  2179. *size -= shift;
  2180. }
  2181. /* {{{ Returns converted string in desired encoding */
  2182. PHP_FUNCTION(mb_convert_encoding)
  2183. {
  2184. zend_string *to_encoding_name;
  2185. zend_string *input_str, *from_encodings_str = NULL;
  2186. HashTable *input_ht, *from_encodings_ht = NULL;
  2187. const mbfl_encoding **from_encodings;
  2188. size_t num_from_encodings;
  2189. bool free_from_encodings;
  2190. ZEND_PARSE_PARAMETERS_START(2, 3)
  2191. Z_PARAM_ARRAY_HT_OR_STR(input_ht, input_str)
  2192. Z_PARAM_STR(to_encoding_name)
  2193. Z_PARAM_OPTIONAL
  2194. Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(from_encodings_ht, from_encodings_str)
  2195. ZEND_PARSE_PARAMETERS_END();
  2196. const mbfl_encoding *to_encoding = php_mb_get_encoding(to_encoding_name, 2);
  2197. if (!to_encoding) {
  2198. RETURN_THROWS();
  2199. }
  2200. if (from_encodings_ht) {
  2201. if (php_mb_parse_encoding_array(from_encodings_ht, &from_encodings, &num_from_encodings, 3) == FAILURE) {
  2202. RETURN_THROWS();
  2203. }
  2204. free_from_encodings = 1;
  2205. } else if (from_encodings_str) {
  2206. if (php_mb_parse_encoding_list(ZSTR_VAL(from_encodings_str), ZSTR_LEN(from_encodings_str),
  2207. &from_encodings, &num_from_encodings,
  2208. /* persistent */ 0, /* arg_num */ 3, /* allow_pass_encoding */ 0) == FAILURE) {
  2209. RETURN_THROWS();
  2210. }
  2211. free_from_encodings = 1;
  2212. } else {
  2213. from_encodings = &MBSTRG(current_internal_encoding);
  2214. num_from_encodings = 1;
  2215. free_from_encodings = 0;
  2216. }
  2217. if (num_from_encodings > 1) {
  2218. remove_non_encodings_from_elist(from_encodings, &num_from_encodings);
  2219. }
  2220. if (!num_from_encodings) {
  2221. efree(ZEND_VOIDP(from_encodings));
  2222. zend_argument_value_error(3, "must specify at least one encoding");
  2223. RETURN_THROWS();
  2224. }
  2225. if (input_str) {
  2226. /* new encoding */
  2227. size_t size;
  2228. char *ret = php_mb_convert_encoding(ZSTR_VAL(input_str), ZSTR_LEN(input_str),
  2229. to_encoding, from_encodings, num_from_encodings, &size);
  2230. if (ret != NULL) {
  2231. // TODO: avoid reallocation ???
  2232. RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */
  2233. efree(ret);
  2234. } else {
  2235. RETVAL_FALSE;
  2236. }
  2237. } else {
  2238. HashTable *tmp;
  2239. tmp = php_mb_convert_encoding_recursive(
  2240. input_ht, to_encoding, from_encodings, num_from_encodings);
  2241. RETVAL_ARR(tmp);
  2242. }
  2243. if (free_from_encodings) {
  2244. efree(ZEND_VOIDP(from_encodings));
  2245. }
  2246. }
  2247. /* }}} */
  2248. static char *mbstring_convert_case(
  2249. int case_mode, const char *str, size_t str_len, size_t *ret_len,
  2250. const mbfl_encoding *enc) {
  2251. return php_unicode_convert_case(
  2252. case_mode, str, str_len, ret_len, enc,
  2253. MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar));
  2254. }
  2255. /* {{{ Returns a case-folded version of source_string */
  2256. PHP_FUNCTION(mb_convert_case)
  2257. {
  2258. zend_string *from_encoding = NULL;
  2259. char *str;
  2260. size_t str_len, ret_len;
  2261. zend_long case_mode = 0;
  2262. ZEND_PARSE_PARAMETERS_START(2, 3)
  2263. Z_PARAM_STRING(str, str_len)
  2264. Z_PARAM_LONG(case_mode)
  2265. Z_PARAM_OPTIONAL
  2266. Z_PARAM_STR_OR_NULL(from_encoding)
  2267. ZEND_PARSE_PARAMETERS_END();
  2268. const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 3);
  2269. if (!enc) {
  2270. RETURN_THROWS();
  2271. }
  2272. if (case_mode < 0 || case_mode > PHP_UNICODE_CASE_MODE_MAX) {
  2273. zend_argument_value_error(2, "must be one of the MB_CASE_* constants");
  2274. RETURN_THROWS();
  2275. }
  2276. char *newstr = mbstring_convert_case(case_mode, str, str_len, &ret_len, enc);
  2277. /* If newstr is NULL something went wrong in mbfl and this is a bug */
  2278. ZEND_ASSERT(newstr != NULL);
  2279. // TODO: avoid reallocation ???
  2280. RETVAL_STRINGL(newstr, ret_len);
  2281. efree(newstr);
  2282. }
  2283. /* }}} */
  2284. /* {{{ Returns a upper cased version of source_string */
  2285. PHP_FUNCTION(mb_strtoupper)
  2286. {
  2287. zend_string *from_encoding = NULL;
  2288. char *str;
  2289. size_t str_len, ret_len;
  2290. ZEND_PARSE_PARAMETERS_START(1, 2)
  2291. Z_PARAM_STRING(str, str_len)
  2292. Z_PARAM_OPTIONAL
  2293. Z_PARAM_STR_OR_NULL(from_encoding)
  2294. ZEND_PARSE_PARAMETERS_END();
  2295. const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 2);
  2296. if (!enc) {
  2297. RETURN_THROWS();
  2298. }
  2299. char *newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
  2300. /* If newstr is NULL something went wrong in mbfl and this is a bug */
  2301. ZEND_ASSERT(newstr != NULL);
  2302. // TODO: avoid reallocation ???
  2303. RETVAL_STRINGL(newstr, ret_len);
  2304. efree(newstr);
  2305. }
  2306. /* }}} */
  2307. /* {{{ Returns a lower cased version of source_string */
  2308. PHP_FUNCTION(mb_strtolower)
  2309. {
  2310. zend_string *from_encoding = NULL;
  2311. char *str;
  2312. size_t str_len;
  2313. char *newstr;
  2314. size_t ret_len;
  2315. const mbfl_encoding *enc;
  2316. ZEND_PARSE_PARAMETERS_START(1, 2)
  2317. Z_PARAM_STRING(str, str_len)
  2318. Z_PARAM_OPTIONAL
  2319. Z_PARAM_STR_OR_NULL(from_encoding)
  2320. ZEND_PARSE_PARAMETERS_END();
  2321. enc = php_mb_get_encoding(from_encoding, 2);
  2322. if (!enc) {
  2323. RETURN_THROWS();
  2324. }
  2325. newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
  2326. /* If newstr is NULL something went wrong in mbfl and this is a bug */
  2327. ZEND_ASSERT(newstr != NULL);
  2328. // TODO: avoid reallocation ???
  2329. RETVAL_STRINGL(newstr, ret_len);
  2330. efree(newstr);
  2331. }
  2332. /* }}} */
  2333. static const mbfl_encoding **duplicate_elist(const mbfl_encoding **elist, size_t size)
  2334. {
  2335. const mbfl_encoding **new_elist = safe_emalloc(size, sizeof(mbfl_encoding*), 0);
  2336. memcpy(ZEND_VOIDP(new_elist), elist, size * sizeof(mbfl_encoding*));
  2337. return new_elist;
  2338. }
  2339. /* {{{ Encodings of the given string is returned (as a string) */
  2340. PHP_FUNCTION(mb_detect_encoding)
  2341. {
  2342. char *str;
  2343. size_t str_len;
  2344. zend_string *encoding_str = NULL;
  2345. HashTable *encoding_ht = NULL;
  2346. bool strict = 0;
  2347. mbfl_string string;
  2348. const mbfl_encoding *ret;
  2349. const mbfl_encoding **elist;
  2350. size_t size;
  2351. ZEND_PARSE_PARAMETERS_START(1, 3)
  2352. Z_PARAM_STRING(str, str_len)
  2353. Z_PARAM_OPTIONAL
  2354. Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(encoding_ht, encoding_str)
  2355. Z_PARAM_BOOL(strict)
  2356. ZEND_PARSE_PARAMETERS_END();
  2357. /* make encoding list */
  2358. if (encoding_ht) {
  2359. if (FAILURE == php_mb_parse_encoding_array(encoding_ht, &elist, &size, 2)) {
  2360. RETURN_THROWS();
  2361. }
  2362. } else if (encoding_str) {
  2363. if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(encoding_str), ZSTR_LEN(encoding_str), &elist, &size, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0)) {
  2364. RETURN_THROWS();
  2365. }
  2366. } else {
  2367. elist = duplicate_elist(MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size));
  2368. size = MBSTRG(current_detect_order_list_size);
  2369. }
  2370. if (size == 0) {
  2371. efree(ZEND_VOIDP(elist));
  2372. zend_argument_value_error(2, "must specify at least one encoding");
  2373. RETURN_THROWS();
  2374. }
  2375. remove_non_encodings_from_elist(elist, &size);
  2376. if (size == 0) {
  2377. efree(ZEND_VOIDP(elist));
  2378. RETURN_FALSE;
  2379. }
  2380. if (ZEND_NUM_ARGS() < 3) {
  2381. strict = MBSTRG(strict_detection);
  2382. }
  2383. if (strict && size == 1) {
  2384. /* If there is only a single candidate encoding, mb_check_encoding is faster */
  2385. ret = (php_mb_check_encoding(str, str_len, *elist)) ? *elist : NULL;
  2386. } else {
  2387. mbfl_string_init(&string);
  2388. string.val = (unsigned char *)str;
  2389. string.len = str_len;
  2390. ret = mbfl_identify_encoding(&string, elist, size, strict);
  2391. }
  2392. efree(ZEND_VOIDP(elist));
  2393. if (ret == NULL) {
  2394. RETURN_FALSE;
  2395. }
  2396. RETVAL_STRING((char *)ret->name);
  2397. }
  2398. /* }}} */
  2399. /* {{{ Returns an array of all supported entity encodings */
  2400. PHP_FUNCTION(mb_list_encodings)
  2401. {
  2402. ZEND_PARSE_PARAMETERS_NONE();
  2403. array_init(return_value);
  2404. for (const mbfl_encoding **encodings = mbfl_get_supported_encodings(); *encodings; encodings++) {
  2405. add_next_index_string(return_value, (*encodings)->name);
  2406. }
  2407. }
  2408. /* }}} */
  2409. /* {{{ Returns an array of the aliases of a given encoding name */
  2410. PHP_FUNCTION(mb_encoding_aliases)
  2411. {
  2412. const mbfl_encoding *encoding;
  2413. zend_string *encoding_name = NULL;
  2414. ZEND_PARSE_PARAMETERS_START(1, 1)
  2415. Z_PARAM_STR(encoding_name)
  2416. ZEND_PARSE_PARAMETERS_END();
  2417. encoding = php_mb_get_encoding(encoding_name, 1);
  2418. if (!encoding) {
  2419. RETURN_THROWS();
  2420. }
  2421. array_init(return_value);
  2422. if (encoding->aliases != NULL) {
  2423. for (const char **alias = encoding->aliases; *alias; ++alias) {
  2424. add_next_index_string(return_value, (char *)*alias);
  2425. }
  2426. }
  2427. }
  2428. /* }}} */
  2429. /* {{{ Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
  2430. PHP_FUNCTION(mb_encode_mimeheader)
  2431. {
  2432. const mbfl_encoding *charset, *transenc;
  2433. mbfl_string string, result, *ret;
  2434. zend_string *charset_name = NULL;
  2435. char *trans_enc_name = NULL, *string_val;
  2436. size_t trans_enc_name_len;
  2437. char *linefeed = "\r\n";
  2438. size_t linefeed_len;
  2439. zend_long indent = 0;
  2440. string.encoding = MBSTRG(current_internal_encoding);
  2441. ZEND_PARSE_PARAMETERS_START(1, 5)
  2442. Z_PARAM_STRING(string_val, string.len)
  2443. Z_PARAM_OPTIONAL
  2444. Z_PARAM_STR(charset_name)
  2445. Z_PARAM_STRING(trans_enc_name, trans_enc_name_len)
  2446. Z_PARAM_STRING(linefeed, linefeed_len)
  2447. Z_PARAM_LONG(indent)
  2448. ZEND_PARSE_PARAMETERS_END();
  2449. string.val = (unsigned char*)string_val;
  2450. charset = &mbfl_encoding_pass;
  2451. transenc = &mbfl_encoding_base64;
  2452. if (charset_name != NULL) {
  2453. charset = php_mb_get_encoding(charset_name, 2);
  2454. if (!charset) {
  2455. RETURN_THROWS();
  2456. }
  2457. } else {
  2458. const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
  2459. if (lang != NULL) {
  2460. charset = mbfl_no2encoding(lang->mail_charset);
  2461. transenc = mbfl_no2encoding(lang->mail_header_encoding);
  2462. }
  2463. }
  2464. if (trans_enc_name != NULL) {
  2465. if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
  2466. transenc = &mbfl_encoding_base64;
  2467. } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
  2468. transenc = &mbfl_encoding_qprint;
  2469. }
  2470. }
  2471. mbfl_string_init(&result);
  2472. ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
  2473. ZEND_ASSERT(ret != NULL);
  2474. // TODO: avoid reallocation ???
  2475. RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
  2476. efree(ret->val);
  2477. }
  2478. /* }}} */
  2479. /* {{{ Decodes the MIME "encoded-word" in the string */
  2480. PHP_FUNCTION(mb_decode_mimeheader)
  2481. {
  2482. char *string_val;
  2483. mbfl_string string, result, *ret;
  2484. string.encoding = MBSTRG(current_internal_encoding);
  2485. ZEND_PARSE_PARAMETERS_START(1, 1)
  2486. Z_PARAM_STRING(string_val, string.len)
  2487. ZEND_PARSE_PARAMETERS_END();
  2488. string.val = (unsigned char*)string_val;
  2489. mbfl_string_init(&result);
  2490. ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
  2491. ZEND_ASSERT(ret != NULL);
  2492. // TODO: avoid reallocation ???
  2493. RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
  2494. efree(ret->val);
  2495. }
  2496. /* }}} */
  2497. /* {{{ Conversion between full-width character and half-width character (Japanese) */
  2498. PHP_FUNCTION(mb_convert_kana)
  2499. {
  2500. int opt;
  2501. mbfl_string string, result, *ret;
  2502. char *optstr = NULL, *string_val;
  2503. size_t optstr_len;
  2504. zend_string *encname = NULL;
  2505. ZEND_PARSE_PARAMETERS_START(1, 3)
  2506. Z_PARAM_STRING(string_val, string.len)
  2507. Z_PARAM_OPTIONAL
  2508. Z_PARAM_STRING(optstr, optstr_len)
  2509. Z_PARAM_STR_OR_NULL(encname)
  2510. ZEND_PARSE_PARAMETERS_END();
  2511. string.val = (unsigned char*)string_val;
  2512. /* "Zen" is 全, or "full"; "Han" is 半, or "half"
  2513. * This refers to "fullwidth" or "halfwidth" variants of characters used for writing Japanese */
  2514. if (optstr != NULL) {
  2515. char *p = optstr, *e = p + optstr_len;
  2516. opt = 0;
  2517. while (p < e) {
  2518. switch (*p++) {
  2519. case 'A':
  2520. opt |= MBFL_FILT_TL_HAN2ZEN_ALL;
  2521. break;
  2522. case 'a':
  2523. opt |= MBFL_FILT_TL_ZEN2HAN_ALL;
  2524. break;
  2525. case 'R':
  2526. opt |= MBFL_FILT_TL_HAN2ZEN_ALPHA;
  2527. break;
  2528. case 'r':
  2529. opt |= MBFL_FILT_TL_ZEN2HAN_ALPHA;
  2530. break;
  2531. case 'N':
  2532. opt |= MBFL_FILT_TL_HAN2ZEN_NUMERIC;
  2533. break;
  2534. case 'n':
  2535. opt |= MBFL_FILT_TL_ZEN2HAN_NUMERIC;
  2536. break;
  2537. case 'S':
  2538. opt |= MBFL_FILT_TL_HAN2ZEN_SPACE;
  2539. break;
  2540. case 's':
  2541. opt |= MBFL_FILT_TL_ZEN2HAN_SPACE;
  2542. break;
  2543. case 'K':
  2544. opt |= MBFL_FILT_TL_HAN2ZEN_KATAKANA;
  2545. break;
  2546. case 'k':
  2547. opt |= MBFL_FILT_TL_ZEN2HAN_KATAKANA;
  2548. break;
  2549. case 'H':
  2550. opt |= MBFL_FILT_TL_HAN2ZEN_HIRAGANA;
  2551. break;
  2552. case 'h':
  2553. opt |= MBFL_FILT_TL_ZEN2HAN_HIRAGANA;
  2554. break;
  2555. case 'V':
  2556. opt |= MBFL_FILT_TL_HAN2ZEN_GLUE;
  2557. break;
  2558. case 'C':
  2559. opt |= MBFL_FILT_TL_ZEN2HAN_HIRA2KANA;
  2560. break;
  2561. case 'c':
  2562. opt |= MBFL_FILT_TL_ZEN2HAN_KANA2HIRA;
  2563. break;
  2564. case 'M':
  2565. /* TODO: figure out what 'M' and 'm' are for, and rename the constant
  2566. * to something meaningful */
  2567. opt |= MBFL_FILT_TL_HAN2ZEN_COMPAT1;
  2568. break;
  2569. case 'm':
  2570. opt |= MBFL_FILT_TL_ZEN2HAN_COMPAT1;
  2571. break;
  2572. }
  2573. }
  2574. } else {
  2575. opt = MBFL_FILT_TL_HAN2ZEN_KATAKANA | MBFL_FILT_TL_HAN2ZEN_GLUE;
  2576. }
  2577. /* encoding */
  2578. string.encoding = php_mb_get_encoding(encname, 3);
  2579. if (!string.encoding) {
  2580. RETURN_THROWS();
  2581. }
  2582. ret = mbfl_ja_jp_hantozen(&string, &result, opt);
  2583. ZEND_ASSERT(ret != NULL);
  2584. // TODO: avoid reallocation ???
  2585. RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
  2586. efree(ret->val);
  2587. }
  2588. /* }}} */
  2589. static int mb_recursive_encoder_detector_feed(mbfl_encoding_detector *identd, zval *var, int *recursion_error) /* {{{ */
  2590. {
  2591. mbfl_string string;
  2592. HashTable *ht;
  2593. zval *entry;
  2594. ZVAL_DEREF(var);
  2595. if (Z_TYPE_P(var) == IS_STRING) {
  2596. string.val = (unsigned char *)Z_STRVAL_P(var);
  2597. string.len = Z_STRLEN_P(var);
  2598. if (mbfl_encoding_detector_feed(identd, &string)) {
  2599. return 1; /* complete detecting */
  2600. }
  2601. } else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
  2602. if (Z_REFCOUNTED_P(var)) {
  2603. if (Z_IS_RECURSIVE_P(var)) {
  2604. *recursion_error = 1;
  2605. return 0;
  2606. }
  2607. Z_PROTECT_RECURSION_P(var);
  2608. }
  2609. ht = HASH_OF(var);
  2610. if (ht != NULL) {
  2611. ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
  2612. if (mb_recursive_encoder_detector_feed(identd, entry, recursion_error)) {
  2613. if (Z_REFCOUNTED_P(var)) {
  2614. Z_UNPROTECT_RECURSION_P(var);
  2615. }
  2616. return 1;
  2617. } else if (*recursion_error) {
  2618. if (Z_REFCOUNTED_P(var)) {
  2619. Z_UNPROTECT_RECURSION_P(var);
  2620. }
  2621. return 0;
  2622. }
  2623. } ZEND_HASH_FOREACH_END();
  2624. }
  2625. if (Z_REFCOUNTED_P(var)) {
  2626. Z_UNPROTECT_RECURSION_P(var);
  2627. }
  2628. }
  2629. return 0;
  2630. } /* }}} */
  2631. static int mb_recursive_convert_variable(mbfl_buffer_converter *convd, zval *var) /* {{{ */
  2632. {
  2633. mbfl_string string, result, *ret;
  2634. HashTable *ht;
  2635. zval *entry, *orig_var;
  2636. orig_var = var;
  2637. ZVAL_DEREF(var);
  2638. if (Z_TYPE_P(var) == IS_STRING) {
  2639. string.val = (unsigned char *)Z_STRVAL_P(var);
  2640. string.len = Z_STRLEN_P(var);
  2641. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  2642. if (ret != NULL) {
  2643. zval_ptr_dtor(orig_var);
  2644. // TODO: avoid reallocation ???
  2645. ZVAL_STRINGL(orig_var, (char *)ret->val, ret->len);
  2646. efree(ret->val);
  2647. }
  2648. } else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
  2649. if (Z_TYPE_P(var) == IS_ARRAY) {
  2650. SEPARATE_ARRAY(var);
  2651. }
  2652. if (Z_REFCOUNTED_P(var)) {
  2653. if (Z_IS_RECURSIVE_P(var)) {
  2654. return 1;
  2655. }
  2656. Z_PROTECT_RECURSION_P(var);
  2657. }
  2658. ht = HASH_OF(var);
  2659. if (ht != NULL) {
  2660. ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
  2661. if (mb_recursive_convert_variable(convd, entry)) {
  2662. if (Z_REFCOUNTED_P(var)) {
  2663. Z_UNPROTECT_RECURSION_P(var);
  2664. }
  2665. return 1;
  2666. }
  2667. } ZEND_HASH_FOREACH_END();
  2668. }
  2669. if (Z_REFCOUNTED_P(var)) {
  2670. Z_UNPROTECT_RECURSION_P(var);
  2671. }
  2672. }
  2673. return 0;
  2674. } /* }}} */
  2675. /* {{{ Converts the string resource in variables to desired encoding */
  2676. PHP_FUNCTION(mb_convert_variables)
  2677. {
  2678. zval *args;
  2679. zend_string *to_enc_str;
  2680. zend_string *from_enc_str;
  2681. HashTable *from_enc_ht;
  2682. mbfl_string string, result;
  2683. const mbfl_encoding *from_encoding, *to_encoding;
  2684. mbfl_encoding_detector *identd;
  2685. mbfl_buffer_converter *convd;
  2686. int n, argc;
  2687. size_t elistsz;
  2688. const mbfl_encoding **elist;
  2689. int recursion_error = 0;
  2690. ZEND_PARSE_PARAMETERS_START(3, -1)
  2691. Z_PARAM_STR(to_enc_str)
  2692. Z_PARAM_ARRAY_HT_OR_STR(from_enc_ht, from_enc_str)
  2693. Z_PARAM_VARIADIC('+', args, argc)
  2694. ZEND_PARSE_PARAMETERS_END();
  2695. /* new encoding */
  2696. to_encoding = php_mb_get_encoding(to_enc_str, 1);
  2697. if (!to_encoding) {
  2698. RETURN_THROWS();
  2699. }
  2700. /* initialize string */
  2701. from_encoding = MBSTRG(current_internal_encoding);
  2702. mbfl_string_init_set(&string, from_encoding);
  2703. mbfl_string_init(&result);
  2704. /* pre-conversion encoding */
  2705. if (from_enc_ht) {
  2706. if (php_mb_parse_encoding_array(from_enc_ht, &elist, &elistsz, 2) == FAILURE) {
  2707. RETURN_THROWS();
  2708. }
  2709. } else {
  2710. if (php_mb_parse_encoding_list(ZSTR_VAL(from_enc_str), ZSTR_LEN(from_enc_str), &elist, &elistsz, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0) == FAILURE) {
  2711. RETURN_THROWS();
  2712. }
  2713. }
  2714. if (elistsz == 0) {
  2715. efree(ZEND_VOIDP(elist));
  2716. zend_argument_value_error(2, "must specify at least one encoding");
  2717. RETURN_THROWS();
  2718. }
  2719. if (elistsz == 1) {
  2720. from_encoding = *elist;
  2721. } else {
  2722. /* auto detect */
  2723. from_encoding = NULL;
  2724. identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
  2725. if (identd != NULL) {
  2726. n = 0;
  2727. while (n < argc) {
  2728. if (mb_recursive_encoder_detector_feed(identd, &args[n], &recursion_error)) {
  2729. break;
  2730. }
  2731. n++;
  2732. }
  2733. from_encoding = mbfl_encoding_detector_judge(identd);
  2734. mbfl_encoding_detector_delete(identd);
  2735. if (recursion_error) {
  2736. efree(ZEND_VOIDP(elist));
  2737. php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
  2738. RETURN_FALSE;
  2739. }
  2740. }
  2741. if (!from_encoding) {
  2742. php_error_docref(NULL, E_WARNING, "Unable to detect encoding");
  2743. efree(ZEND_VOIDP(elist));
  2744. RETURN_FALSE;
  2745. }
  2746. }
  2747. efree(ZEND_VOIDP(elist));
  2748. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
  2749. /* If this assertion fails this means some memory allocation failure which is a bug */
  2750. ZEND_ASSERT(convd != NULL);
  2751. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  2752. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  2753. /* convert */
  2754. n = 0;
  2755. while (n < argc) {
  2756. zval *zv = &args[n];
  2757. ZVAL_DEREF(zv);
  2758. recursion_error = mb_recursive_convert_variable(convd, zv);
  2759. if (recursion_error) {
  2760. break;
  2761. }
  2762. n++;
  2763. }
  2764. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  2765. mbfl_buffer_converter_delete(convd);
  2766. if (recursion_error) {
  2767. php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
  2768. RETURN_FALSE;
  2769. }
  2770. RETURN_STRING(from_encoding->name);
  2771. }
  2772. /* }}} */
  2773. /* HTML numeric entities */
  2774. /* Convert PHP array to data structure required by mbfl_html_numeric_entity */
  2775. static int *make_conversion_map(HashTable *target_hash, int *convmap_size)
  2776. {
  2777. zval *hash_entry;
  2778. int n_elems = zend_hash_num_elements(target_hash);
  2779. if (n_elems % 4 != 0) {
  2780. zend_argument_value_error(2, "must have a multiple of 4 elements");
  2781. return NULL;
  2782. }
  2783. int *convmap = (int *)safe_emalloc(n_elems, sizeof(int), 0);
  2784. int *mapelm = convmap;
  2785. ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
  2786. *mapelm++ = zval_get_long(hash_entry);
  2787. } ZEND_HASH_FOREACH_END();
  2788. *convmap_size = n_elems / 4;
  2789. return convmap;
  2790. }
  2791. /* {{{ Converts specified characters to HTML numeric entities */
  2792. PHP_FUNCTION(mb_encode_numericentity)
  2793. {
  2794. char *str = NULL;
  2795. zend_string *encoding = NULL;
  2796. int mapsize;
  2797. HashTable *target_hash;
  2798. bool is_hex = 0;
  2799. mbfl_string string, result, *ret;
  2800. ZEND_PARSE_PARAMETERS_START(2, 4)
  2801. Z_PARAM_STRING(str, string.len)
  2802. Z_PARAM_ARRAY_HT(target_hash)
  2803. Z_PARAM_OPTIONAL
  2804. Z_PARAM_STR_OR_NULL(encoding)
  2805. Z_PARAM_BOOL(is_hex)
  2806. ZEND_PARSE_PARAMETERS_END();
  2807. string.val = (unsigned char *)str;
  2808. string.encoding = php_mb_get_encoding(encoding, 3);
  2809. if (!string.encoding) {
  2810. RETURN_THROWS();
  2811. }
  2812. int *convmap = make_conversion_map(target_hash, &mapsize);
  2813. if (convmap == NULL) {
  2814. RETURN_THROWS();
  2815. }
  2816. ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, is_hex ? 2 : 0);
  2817. ZEND_ASSERT(ret != NULL);
  2818. // TODO: avoid reallocation ???
  2819. RETVAL_STRINGL((char *)ret->val, ret->len);
  2820. efree(ret->val);
  2821. efree(convmap);
  2822. }
  2823. /* }}} */
  2824. /* {{{ Converts HTML numeric entities to character code */
  2825. PHP_FUNCTION(mb_decode_numericentity)
  2826. {
  2827. char *str = NULL;
  2828. zend_string *encoding = NULL;
  2829. int mapsize;
  2830. HashTable *target_hash;
  2831. mbfl_string string, result, *ret;
  2832. ZEND_PARSE_PARAMETERS_START(2, 3)
  2833. Z_PARAM_STRING(str, string.len)
  2834. Z_PARAM_ARRAY_HT(target_hash)
  2835. Z_PARAM_OPTIONAL
  2836. Z_PARAM_STR_OR_NULL(encoding)
  2837. ZEND_PARSE_PARAMETERS_END();
  2838. string.val = (unsigned char *)str;
  2839. string.encoding = php_mb_get_encoding(encoding, 3);
  2840. if (!string.encoding) {
  2841. RETURN_THROWS();
  2842. }
  2843. int *convmap = make_conversion_map(target_hash, &mapsize);
  2844. if (convmap == NULL) {
  2845. RETURN_THROWS();
  2846. }
  2847. ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, 1);
  2848. ZEND_ASSERT(ret != NULL);
  2849. // TODO: avoid reallocation ???
  2850. RETVAL_STRINGL((char *)ret->val, ret->len);
  2851. efree(ret->val);
  2852. efree((void *)convmap);
  2853. }
  2854. /* }}} */
  2855. /* {{{ Sends an email message with MIME scheme */
  2856. #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
  2857. if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
  2858. pos += 2; \
  2859. while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
  2860. pos++; \
  2861. } \
  2862. continue; \
  2863. }
  2864. #define CRLF "\r\n"
  2865. static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
  2866. {
  2867. const char *ps;
  2868. size_t icnt;
  2869. int state = 0;
  2870. int crlf_state = -1;
  2871. char *token = NULL;
  2872. size_t token_pos = 0;
  2873. zend_string *fld_name, *fld_val;
  2874. ps = str;
  2875. icnt = str_len;
  2876. fld_name = fld_val = NULL;
  2877. /*
  2878. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  2879. * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
  2880. * state 0 1 2 3
  2881. *
  2882. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  2883. * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
  2884. * crlf_state -1 0 1 -1
  2885. *
  2886. */
  2887. while (icnt > 0) {
  2888. switch (*ps) {
  2889. case ':':
  2890. if (crlf_state == 1) {
  2891. token_pos++;
  2892. }
  2893. if (state == 0 || state == 1) {
  2894. if(token && token_pos > 0) {
  2895. fld_name = zend_string_init(token, token_pos, 0);
  2896. }
  2897. state = 2;
  2898. } else {
  2899. token_pos++;
  2900. }
  2901. crlf_state = 0;
  2902. break;
  2903. case '\n':
  2904. if (crlf_state == -1) {
  2905. goto out;
  2906. }
  2907. crlf_state = -1;
  2908. break;
  2909. case '\r':
  2910. if (crlf_state == 1) {
  2911. token_pos++;
  2912. } else {
  2913. crlf_state = 1;
  2914. }
  2915. break;
  2916. case ' ': case '\t':
  2917. if (crlf_state == -1) {
  2918. if (state == 3) {
  2919. /* continuing from the previous line */
  2920. state = 4;
  2921. } else {
  2922. /* simply skipping this new line */
  2923. state = 5;
  2924. }
  2925. } else {
  2926. if (crlf_state == 1) {
  2927. token_pos++;
  2928. }
  2929. if (state == 1 || state == 3) {
  2930. token_pos++;
  2931. }
  2932. }
  2933. crlf_state = 0;
  2934. break;
  2935. default:
  2936. switch (state) {
  2937. case 0:
  2938. token = (char*)ps;
  2939. token_pos = 0;
  2940. state = 1;
  2941. break;
  2942. case 2:
  2943. if (crlf_state != -1) {
  2944. token = (char*)ps;
  2945. token_pos = 0;
  2946. state = 3;
  2947. break;
  2948. }
  2949. ZEND_FALLTHROUGH;
  2950. case 3:
  2951. if (crlf_state == -1) {
  2952. if(token && token_pos > 0) {
  2953. fld_val = zend_string_init(token, token_pos, 0);
  2954. }
  2955. if (fld_name != NULL && fld_val != NULL) {
  2956. zval val;
  2957. zend_str_tolower(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
  2958. ZVAL_STR(&val, fld_val);
  2959. zend_hash_update(ht, fld_name, &val);
  2960. zend_string_release_ex(fld_name, 0);
  2961. }
  2962. fld_name = fld_val = NULL;
  2963. token = (char*)ps;
  2964. token_pos = 0;
  2965. state = 1;
  2966. }
  2967. break;
  2968. case 4:
  2969. token_pos++;
  2970. state = 3;
  2971. break;
  2972. }
  2973. if (crlf_state == 1) {
  2974. token_pos++;
  2975. }
  2976. token_pos++;
  2977. crlf_state = 0;
  2978. break;
  2979. }
  2980. ps++, icnt--;
  2981. }
  2982. out:
  2983. if (state == 2) {
  2984. token = "";
  2985. token_pos = 0;
  2986. state = 3;
  2987. }
  2988. if (state == 3) {
  2989. if(token && token_pos > 0) {
  2990. fld_val = zend_string_init(token, token_pos, 0);
  2991. }
  2992. if (fld_name != NULL && fld_val != NULL) {
  2993. zval val;
  2994. zend_str_tolower(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
  2995. ZVAL_STR(&val, fld_val);
  2996. zend_hash_update(ht, fld_name, &val);
  2997. zend_string_release_ex(fld_name, 0);
  2998. }
  2999. }
  3000. return state;
  3001. }
  3002. PHP_FUNCTION(mb_send_mail)
  3003. {
  3004. char *to;
  3005. size_t to_len;
  3006. char *message;
  3007. size_t message_len;
  3008. char *subject;
  3009. size_t subject_len;
  3010. zend_string *extra_cmd = NULL;
  3011. HashTable *headers_ht = NULL;
  3012. zend_string *str_headers = NULL;
  3013. size_t n, i;
  3014. char *to_r = NULL;
  3015. char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
  3016. struct {
  3017. int cnt_type:1;
  3018. int cnt_trans_enc:1;
  3019. } suppressed_hdrs = { 0, 0 };
  3020. char *message_buf = NULL, *subject_buf = NULL, *p;
  3021. mbfl_string orig_str, conv_str;
  3022. mbfl_string *pstr; /* pointer to mbfl string for return value */
  3023. enum mbfl_no_encoding;
  3024. const mbfl_encoding *tran_cs, /* transfer text charset */
  3025. *head_enc, /* header transfer encoding */
  3026. *body_enc; /* body transfer encoding */
  3027. mbfl_memory_device device; /* automatic allocateable buffer for additional header */
  3028. const mbfl_language *lang;
  3029. int err = 0;
  3030. HashTable ht_headers;
  3031. zval *s;
  3032. extern void mbfl_memory_device_unput(mbfl_memory_device *device);
  3033. /* initialize */
  3034. mbfl_memory_device_init(&device, 0, 0);
  3035. mbfl_string_init(&orig_str);
  3036. mbfl_string_init(&conv_str);
  3037. /* character-set, transfer-encoding */
  3038. tran_cs = &mbfl_encoding_utf8;
  3039. head_enc = &mbfl_encoding_base64;
  3040. body_enc = &mbfl_encoding_base64;
  3041. lang = mbfl_no2language(MBSTRG(language));
  3042. if (lang != NULL) {
  3043. tran_cs = mbfl_no2encoding(lang->mail_charset);
  3044. head_enc = mbfl_no2encoding(lang->mail_header_encoding);
  3045. body_enc = mbfl_no2encoding(lang->mail_body_encoding);
  3046. }
  3047. ZEND_PARSE_PARAMETERS_START(3, 5)
  3048. Z_PARAM_PATH(to, to_len)
  3049. Z_PARAM_PATH(subject, subject_len)
  3050. Z_PARAM_PATH(message, message_len)
  3051. Z_PARAM_OPTIONAL
  3052. Z_PARAM_ARRAY_HT_OR_STR(headers_ht, str_headers)
  3053. Z_PARAM_PATH_STR_OR_NULL(extra_cmd)
  3054. ZEND_PARSE_PARAMETERS_END();
  3055. if (str_headers) {
  3056. if (strlen(ZSTR_VAL(str_headers)) != ZSTR_LEN(str_headers)) {
  3057. zend_argument_value_error(4, "must not contain any null bytes");
  3058. RETURN_THROWS();
  3059. }
  3060. str_headers = php_trim(str_headers, NULL, 0, 2);
  3061. } else if (headers_ht) {
  3062. str_headers = php_mail_build_headers(headers_ht);
  3063. if (EG(exception)) {
  3064. RETURN_THROWS();
  3065. }
  3066. }
  3067. zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
  3068. if (str_headers != NULL) {
  3069. _php_mbstr_parse_mail_headers(&ht_headers, ZSTR_VAL(str_headers), ZSTR_LEN(str_headers));
  3070. }
  3071. if ((s = zend_hash_str_find(&ht_headers, "content-type", sizeof("content-type") - 1))) {
  3072. char *tmp;
  3073. char *param_name;
  3074. char *charset = NULL;
  3075. ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
  3076. p = strchr(Z_STRVAL_P(s), ';');
  3077. if (p != NULL) {
  3078. /* skipping the padded spaces */
  3079. do {
  3080. ++p;
  3081. } while (*p == ' ' || *p == '\t');
  3082. if (*p != '\0') {
  3083. if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
  3084. if (strcasecmp(param_name, "charset") == 0) {
  3085. const mbfl_encoding *_tran_cs = tran_cs;
  3086. charset = php_strtok_r(NULL, "= \"", &tmp);
  3087. if (charset != NULL) {
  3088. _tran_cs = mbfl_name2encoding(charset);
  3089. }
  3090. if (!_tran_cs) {
  3091. php_error_docref(NULL, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
  3092. _tran_cs = &mbfl_encoding_ascii;
  3093. }
  3094. tran_cs = _tran_cs;
  3095. }
  3096. }
  3097. }
  3098. }
  3099. suppressed_hdrs.cnt_type = 1;
  3100. }
  3101. if ((s = zend_hash_str_find(&ht_headers, "content-transfer-encoding", sizeof("content-transfer-encoding") - 1))) {
  3102. const mbfl_encoding *_body_enc;
  3103. ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
  3104. _body_enc = mbfl_name2encoding(Z_STRVAL_P(s));
  3105. switch (_body_enc ? _body_enc->no_encoding : mbfl_no_encoding_invalid) {
  3106. case mbfl_no_encoding_base64:
  3107. case mbfl_no_encoding_7bit:
  3108. case mbfl_no_encoding_8bit:
  3109. body_enc = _body_enc;
  3110. break;
  3111. default:
  3112. php_error_docref(NULL, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s));
  3113. body_enc = &mbfl_encoding_8bit;
  3114. break;
  3115. }
  3116. suppressed_hdrs.cnt_trans_enc = 1;
  3117. }
  3118. /* To: */
  3119. if (to_len > 0) {
  3120. to_r = estrndup(to, to_len);
  3121. for (; to_len; to_len--) {
  3122. if (!isspace((unsigned char) to_r[to_len - 1])) {
  3123. break;
  3124. }
  3125. to_r[to_len - 1] = '\0';
  3126. }
  3127. for (i = 0; to_r[i]; i++) {
  3128. if (iscntrl((unsigned char) to_r[i])) {
  3129. /* According to RFC 822, section 3.1.1 long headers may be separated into
  3130. * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
  3131. * To prevent these separators from being replaced with a space, we use the
  3132. * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
  3133. */
  3134. SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
  3135. to_r[i] = ' ';
  3136. }
  3137. }
  3138. } else {
  3139. to_r = to;
  3140. }
  3141. /* Subject: */
  3142. orig_str.val = (unsigned char *)subject;
  3143. orig_str.len = subject_len;
  3144. orig_str.encoding = MBSTRG(current_internal_encoding);
  3145. if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
  3146. || orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
  3147. orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
  3148. }
  3149. pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, CRLF, sizeof("Subject: [PHP-jp nnnnnnnn]" CRLF) - 1);
  3150. if (pstr != NULL) {
  3151. subject_buf = subject = (char *)pstr->val;
  3152. }
  3153. /* message body */
  3154. orig_str.val = (unsigned char *)message;
  3155. orig_str.len = message_len;
  3156. orig_str.encoding = MBSTRG(current_internal_encoding);
  3157. if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
  3158. || orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
  3159. orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
  3160. }
  3161. pstr = NULL;
  3162. {
  3163. mbfl_string tmpstr;
  3164. if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
  3165. tmpstr.encoding = &mbfl_encoding_8bit;
  3166. pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
  3167. efree(tmpstr.val);
  3168. }
  3169. }
  3170. if (pstr != NULL) {
  3171. message_buf = message = (char *)pstr->val;
  3172. }
  3173. /* other headers */
  3174. #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
  3175. #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
  3176. #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
  3177. #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
  3178. if (str_headers != NULL) {
  3179. p = ZSTR_VAL(str_headers);
  3180. n = ZSTR_LEN(str_headers);
  3181. mbfl_memory_device_strncat(&device, p, n);
  3182. if (n > 0 && p[n - 1] != '\n') {
  3183. mbfl_memory_device_strncat(&device, CRLF, sizeof(CRLF)-1);
  3184. }
  3185. zend_string_release_ex(str_headers, 0);
  3186. }
  3187. if (!zend_hash_str_exists(&ht_headers, "mime-version", sizeof("mime-version") - 1)) {
  3188. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
  3189. mbfl_memory_device_strncat(&device, CRLF, sizeof(CRLF)-1);
  3190. }
  3191. if (!suppressed_hdrs.cnt_type) {
  3192. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
  3193. p = (char *)mbfl_encoding_preferred_mime_name(tran_cs);
  3194. if (p != NULL) {
  3195. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
  3196. mbfl_memory_device_strcat(&device, p);
  3197. }
  3198. mbfl_memory_device_strncat(&device, CRLF, sizeof(CRLF)-1);
  3199. }
  3200. if (!suppressed_hdrs.cnt_trans_enc) {
  3201. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
  3202. p = (char *)mbfl_encoding_preferred_mime_name(body_enc);
  3203. if (p == NULL) {
  3204. p = "7bit";
  3205. }
  3206. mbfl_memory_device_strcat(&device, p);
  3207. mbfl_memory_device_strncat(&device, CRLF, sizeof(CRLF)-1);
  3208. }
  3209. mbfl_memory_device_unput(&device);
  3210. mbfl_memory_device_unput(&device);
  3211. mbfl_memory_device_output('\0', &device);
  3212. str_headers = zend_string_init((char *)device.buffer, strlen((char *)device.buffer), 0);
  3213. if (force_extra_parameters) {
  3214. extra_cmd = php_escape_shell_cmd(force_extra_parameters);
  3215. } else if (extra_cmd) {
  3216. extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
  3217. }
  3218. if (!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
  3219. RETVAL_TRUE;
  3220. } else {
  3221. RETVAL_FALSE;
  3222. }
  3223. if (extra_cmd) {
  3224. zend_string_release_ex(extra_cmd, 0);
  3225. }
  3226. if (to_r != to) {
  3227. efree(to_r);
  3228. }
  3229. if (subject_buf) {
  3230. efree((void *)subject_buf);
  3231. }
  3232. if (message_buf) {
  3233. efree((void *)message_buf);
  3234. }
  3235. mbfl_memory_device_clear(&device);
  3236. zend_hash_destroy(&ht_headers);
  3237. if (str_headers) {
  3238. zend_string_release_ex(str_headers, 0);
  3239. }
  3240. }
  3241. #undef SKIP_LONG_HEADER_SEP_MBSTRING
  3242. #undef CRLF
  3243. #undef MAIL_ASCIIZ_CHECK_MBSTRING
  3244. #undef PHP_MBSTR_MAIL_MIME_HEADER1
  3245. #undef PHP_MBSTR_MAIL_MIME_HEADER2
  3246. #undef PHP_MBSTR_MAIL_MIME_HEADER3
  3247. #undef PHP_MBSTR_MAIL_MIME_HEADER4
  3248. /* }}} */
  3249. /* {{{ Returns the current settings of mbstring */
  3250. PHP_FUNCTION(mb_get_info)
  3251. {
  3252. zend_string *type = NULL;
  3253. size_t n;
  3254. char *name;
  3255. zval row;
  3256. const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
  3257. const mbfl_encoding **entry;
  3258. ZEND_PARSE_PARAMETERS_START(0, 1)
  3259. Z_PARAM_OPTIONAL
  3260. Z_PARAM_STR(type)
  3261. ZEND_PARSE_PARAMETERS_END();
  3262. if (!type || zend_string_equals_literal_ci(type, "all")) {
  3263. array_init(return_value);
  3264. if (MBSTRG(current_internal_encoding)) {
  3265. add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name);
  3266. }
  3267. if (MBSTRG(http_input_identify)) {
  3268. add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name);
  3269. }
  3270. if (MBSTRG(current_http_output_encoding)) {
  3271. add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name);
  3272. }
  3273. if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
  3274. add_assoc_string(return_value, "http_output_conv_mimetypes", name);
  3275. }
  3276. if (lang != NULL) {
  3277. if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
  3278. add_assoc_string(return_value, "mail_charset", name);
  3279. }
  3280. if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
  3281. add_assoc_string(return_value, "mail_header_encoding", name);
  3282. }
  3283. if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
  3284. add_assoc_string(return_value, "mail_body_encoding", name);
  3285. }
  3286. }
  3287. add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
  3288. if (MBSTRG(encoding_translation)) {
  3289. add_assoc_string(return_value, "encoding_translation", "On");
  3290. } else {
  3291. add_assoc_string(return_value, "encoding_translation", "Off");
  3292. }
  3293. if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
  3294. add_assoc_string(return_value, "language", name);
  3295. }
  3296. n = MBSTRG(current_detect_order_list_size);
  3297. entry = MBSTRG(current_detect_order_list);
  3298. if (n > 0) {
  3299. size_t i;
  3300. array_init(&row);
  3301. for (i = 0; i < n; i++) {
  3302. add_next_index_string(&row, (*entry)->name);
  3303. entry++;
  3304. }
  3305. add_assoc_zval(return_value, "detect_order", &row);
  3306. }
  3307. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  3308. add_assoc_string(return_value, "substitute_character", "none");
  3309. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  3310. add_assoc_string(return_value, "substitute_character", "long");
  3311. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
  3312. add_assoc_string(return_value, "substitute_character", "entity");
  3313. } else {
  3314. add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
  3315. }
  3316. if (MBSTRG(strict_detection)) {
  3317. add_assoc_string(return_value, "strict_detection", "On");
  3318. } else {
  3319. add_assoc_string(return_value, "strict_detection", "Off");
  3320. }
  3321. } else if (zend_string_equals_literal_ci(type, "internal_encoding")) {
  3322. if (MBSTRG(current_internal_encoding)) {
  3323. RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name);
  3324. }
  3325. } else if (zend_string_equals_literal_ci(type, "http_input")) {
  3326. if (MBSTRG(http_input_identify)) {
  3327. RETVAL_STRING((char *)MBSTRG(http_input_identify)->name);
  3328. }
  3329. } else if (zend_string_equals_literal_ci(type, "http_output")) {
  3330. if (MBSTRG(current_http_output_encoding)) {
  3331. RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name);
  3332. }
  3333. } else if (zend_string_equals_literal_ci(type, "http_output_conv_mimetypes")) {
  3334. if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
  3335. RETVAL_STRING(name);
  3336. }
  3337. } else if (zend_string_equals_literal_ci(type, "mail_charset")) {
  3338. if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
  3339. RETVAL_STRING(name);
  3340. }
  3341. } else if (zend_string_equals_literal_ci(type, "mail_header_encoding")) {
  3342. if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
  3343. RETVAL_STRING(name);
  3344. }
  3345. } else if (zend_string_equals_literal_ci(type, "mail_body_encoding")) {
  3346. if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
  3347. RETVAL_STRING(name);
  3348. }
  3349. } else if (zend_string_equals_literal_ci(type, "illegal_chars")) {
  3350. RETVAL_LONG(MBSTRG(illegalchars));
  3351. } else if (zend_string_equals_literal_ci(type, "encoding_translation")) {
  3352. if (MBSTRG(encoding_translation)) {
  3353. RETVAL_STRING("On");
  3354. } else {
  3355. RETVAL_STRING("Off");
  3356. }
  3357. } else if (zend_string_equals_literal_ci(type, "language")) {
  3358. if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
  3359. RETVAL_STRING(name);
  3360. }
  3361. } else if (zend_string_equals_literal_ci(type, "detect_order")) {
  3362. n = MBSTRG(current_detect_order_list_size);
  3363. entry = MBSTRG(current_detect_order_list);
  3364. if (n > 0) {
  3365. size_t i;
  3366. array_init(return_value);
  3367. for (i = 0; i < n; i++) {
  3368. add_next_index_string(return_value, (*entry)->name);
  3369. entry++;
  3370. }
  3371. }
  3372. } else if (zend_string_equals_literal_ci(type, "substitute_character")) {
  3373. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  3374. RETVAL_STRING("none");
  3375. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  3376. RETVAL_STRING("long");
  3377. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
  3378. RETVAL_STRING("entity");
  3379. } else {
  3380. RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
  3381. }
  3382. } else if (zend_string_equals_literal_ci(type, "strict_detection")) {
  3383. if (MBSTRG(strict_detection)) {
  3384. RETVAL_STRING("On");
  3385. } else {
  3386. RETVAL_STRING("Off");
  3387. }
  3388. } else {
  3389. // TODO Convert to ValueError
  3390. RETURN_FALSE;
  3391. }
  3392. }
  3393. /* }}} */
  3394. static int mbfl_filt_check_errors(int c, void* data)
  3395. {
  3396. if (c == MBFL_BAD_INPUT) {
  3397. (*((mbfl_convert_filter**)data))->num_illegalchar++;
  3398. }
  3399. return 0;
  3400. }
  3401. MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const mbfl_encoding *encoding)
  3402. {
  3403. mbfl_convert_filter *filter = mbfl_convert_filter_new(encoding, &mbfl_encoding_wchar, mbfl_filt_check_errors, NULL, &filter);
  3404. while (length--) {
  3405. unsigned char c = *input++;
  3406. (filter->filter_function)(c, filter);
  3407. if (filter->num_illegalchar) {
  3408. mbfl_convert_filter_delete(filter);
  3409. return 0;
  3410. }
  3411. }
  3412. (filter->filter_flush)(filter);
  3413. int result = !filter->num_illegalchar;
  3414. mbfl_convert_filter_delete(filter);
  3415. return result;
  3416. }
  3417. static int php_mb_check_encoding_recursive(HashTable *vars, const mbfl_encoding *encoding)
  3418. {
  3419. zend_long idx;
  3420. zend_string *key;
  3421. zval *entry;
  3422. int valid = 1;
  3423. (void)(idx); /* Suppress spurious compiler warning that `idx` is not used */
  3424. if (GC_IS_RECURSIVE(vars)) {
  3425. php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
  3426. return 0;
  3427. }
  3428. GC_TRY_PROTECT_RECURSION(vars);
  3429. ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) {
  3430. ZVAL_DEREF(entry);
  3431. if (key) {
  3432. if (!php_mb_check_encoding(ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
  3433. valid = 0;
  3434. break;
  3435. }
  3436. }
  3437. switch (Z_TYPE_P(entry)) {
  3438. case IS_STRING:
  3439. if (!php_mb_check_encoding(Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
  3440. valid = 0;
  3441. break;
  3442. }
  3443. break;
  3444. case IS_ARRAY:
  3445. if (!php_mb_check_encoding_recursive(Z_ARRVAL_P(entry), encoding)) {
  3446. valid = 0;
  3447. break;
  3448. }
  3449. break;
  3450. case IS_LONG:
  3451. case IS_DOUBLE:
  3452. case IS_NULL:
  3453. case IS_TRUE:
  3454. case IS_FALSE:
  3455. break;
  3456. default:
  3457. /* Other types are error. */
  3458. valid = 0;
  3459. break;
  3460. }
  3461. } ZEND_HASH_FOREACH_END();
  3462. GC_TRY_UNPROTECT_RECURSION(vars);
  3463. return valid;
  3464. }
  3465. /* {{{ Check if the string is valid for the specified encoding */
  3466. PHP_FUNCTION(mb_check_encoding)
  3467. {
  3468. zend_string *input_str = NULL, *enc = NULL;
  3469. HashTable *input_ht = NULL;
  3470. const mbfl_encoding *encoding;
  3471. ZEND_PARSE_PARAMETERS_START(0, 2)
  3472. Z_PARAM_OPTIONAL
  3473. Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(input_ht, input_str)
  3474. Z_PARAM_STR_OR_NULL(enc)
  3475. ZEND_PARSE_PARAMETERS_END();
  3476. encoding = php_mb_get_encoding(enc, 2);
  3477. if (!encoding) {
  3478. RETURN_THROWS();
  3479. }
  3480. if (input_ht) {
  3481. RETURN_BOOL(php_mb_check_encoding_recursive(input_ht, encoding));
  3482. } else if (input_str) {
  3483. RETURN_BOOL(php_mb_check_encoding(ZSTR_VAL(input_str), ZSTR_LEN(input_str), encoding));
  3484. } else {
  3485. php_error_docref(NULL, E_DEPRECATED,
  3486. "Calling mb_check_encoding() without argument is deprecated");
  3487. /* FIXME: Actually check all inputs, except $_FILES file content. */
  3488. RETURN_BOOL(MBSTRG(illegalchars) == 0);
  3489. }
  3490. }
  3491. /* }}} */
  3492. static inline zend_long php_mb_ord(const char *str, size_t str_len, zend_string *enc_name,
  3493. const uint32_t enc_name_arg_num)
  3494. {
  3495. const mbfl_encoding *enc;
  3496. enum mbfl_no_encoding no_enc;
  3497. ZEND_ASSERT(str_len > 0);
  3498. enc = php_mb_get_encoding(enc_name, enc_name_arg_num);
  3499. if (!enc) {
  3500. return -2;
  3501. }
  3502. no_enc = enc->no_encoding;
  3503. if (php_mb_is_unsupported_no_encoding(no_enc)) {
  3504. zend_value_error("mb_ord() does not support the \"%s\" encoding", enc->name);
  3505. return -2;
  3506. }
  3507. {
  3508. mbfl_wchar_device dev;
  3509. mbfl_convert_filter *filter;
  3510. zend_long cp;
  3511. mbfl_wchar_device_init(&dev);
  3512. filter = mbfl_convert_filter_new(enc, &mbfl_encoding_wchar, mbfl_wchar_device_output, 0, &dev);
  3513. /* If this assertion fails this means some memory allocation failure which is a bug */
  3514. ZEND_ASSERT(filter != NULL);
  3515. mbfl_convert_filter_feed_string(filter, (unsigned char*)str, str_len);
  3516. mbfl_convert_filter_flush(filter);
  3517. if (dev.pos < 1 || filter->num_illegalchar || dev.buffer[0] == MBFL_BAD_INPUT) {
  3518. cp = -1;
  3519. } else {
  3520. cp = dev.buffer[0];
  3521. }
  3522. mbfl_convert_filter_delete(filter);
  3523. mbfl_wchar_device_clear(&dev);
  3524. return cp;
  3525. }
  3526. }
  3527. /* {{{ */
  3528. PHP_FUNCTION(mb_ord)
  3529. {
  3530. char *str;
  3531. size_t str_len;
  3532. zend_string *enc = NULL;
  3533. zend_long cp;
  3534. ZEND_PARSE_PARAMETERS_START(1, 2)
  3535. Z_PARAM_STRING(str, str_len)
  3536. Z_PARAM_OPTIONAL
  3537. Z_PARAM_STR_OR_NULL(enc)
  3538. ZEND_PARSE_PARAMETERS_END();
  3539. if (str_len == 0) {
  3540. zend_argument_value_error(1, "must not be empty");
  3541. RETURN_THROWS();
  3542. }
  3543. cp = php_mb_ord(str, str_len, enc, 2);
  3544. if (0 > cp) {
  3545. if (cp == -2) {
  3546. RETURN_THROWS();
  3547. }
  3548. RETURN_FALSE;
  3549. }
  3550. RETURN_LONG(cp);
  3551. }
  3552. /* }}} */
  3553. static inline zend_string *php_mb_chr(zend_long cp, zend_string *enc_name, uint32_t enc_name_arg_num)
  3554. {
  3555. const mbfl_encoding *enc;
  3556. enum mbfl_no_encoding no_enc;
  3557. zend_string *ret;
  3558. char* buf;
  3559. size_t buf_len;
  3560. enc = php_mb_get_encoding(enc_name, enc_name_arg_num);
  3561. if (!enc) {
  3562. return NULL;
  3563. }
  3564. no_enc = enc->no_encoding;
  3565. if (php_mb_is_unsupported_no_encoding(no_enc)) {
  3566. zend_value_error("mb_chr() does not support the \"%s\" encoding", enc->name);
  3567. return NULL;
  3568. }
  3569. if (cp < 0 || cp > 0x10ffff) {
  3570. return NULL;
  3571. }
  3572. if (php_mb_is_no_encoding_utf8(no_enc)) {
  3573. if (cp > 0xd7ff && 0xe000 > cp) {
  3574. return NULL;
  3575. }
  3576. if (cp < 0x80) {
  3577. ret = ZSTR_CHAR(cp);
  3578. } else if (cp < 0x800) {
  3579. ret = zend_string_alloc(2, 0);
  3580. ZSTR_VAL(ret)[0] = 0xc0 | (cp >> 6);
  3581. ZSTR_VAL(ret)[1] = 0x80 | (cp & 0x3f);
  3582. ZSTR_VAL(ret)[2] = 0;
  3583. } else if (cp < 0x10000) {
  3584. ret = zend_string_alloc(3, 0);
  3585. ZSTR_VAL(ret)[0] = 0xe0 | (cp >> 12);
  3586. ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 6) & 0x3f);
  3587. ZSTR_VAL(ret)[2] = 0x80 | (cp & 0x3f);
  3588. ZSTR_VAL(ret)[3] = 0;
  3589. } else {
  3590. ret = zend_string_alloc(4, 0);
  3591. ZSTR_VAL(ret)[0] = 0xf0 | (cp >> 18);
  3592. ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 12) & 0x3f);
  3593. ZSTR_VAL(ret)[2] = 0x80 | ((cp >> 6) & 0x3f);
  3594. ZSTR_VAL(ret)[3] = 0x80 | (cp & 0x3f);
  3595. ZSTR_VAL(ret)[4] = 0;
  3596. }
  3597. return ret;
  3598. }
  3599. buf_len = 4;
  3600. buf = (char *) emalloc(buf_len + 1);
  3601. buf[0] = (cp >> 24) & 0xff;
  3602. buf[1] = (cp >> 16) & 0xff;
  3603. buf[2] = (cp >> 8) & 0xff;
  3604. buf[3] = cp & 0xff;
  3605. buf[4] = 0;
  3606. char *ret_str;
  3607. size_t ret_len;
  3608. long orig_illegalchars = MBSTRG(illegalchars);
  3609. MBSTRG(illegalchars) = 0;
  3610. ret_str = php_mb_convert_encoding_ex(buf, buf_len, enc, &mbfl_encoding_ucs4be, &ret_len);
  3611. if (MBSTRG(illegalchars) != 0) {
  3612. efree(buf);
  3613. efree(ret_str);
  3614. MBSTRG(illegalchars) = orig_illegalchars;
  3615. return NULL;
  3616. }
  3617. ret = zend_string_init(ret_str, ret_len, 0);
  3618. efree(ret_str);
  3619. MBSTRG(illegalchars) = orig_illegalchars;
  3620. efree(buf);
  3621. return ret;
  3622. }
  3623. /* {{{ */
  3624. PHP_FUNCTION(mb_chr)
  3625. {
  3626. zend_long cp;
  3627. zend_string *enc = NULL;
  3628. ZEND_PARSE_PARAMETERS_START(1, 2)
  3629. Z_PARAM_LONG(cp)
  3630. Z_PARAM_OPTIONAL
  3631. Z_PARAM_STR_OR_NULL(enc)
  3632. ZEND_PARSE_PARAMETERS_END();
  3633. zend_string* ret = php_mb_chr(cp, enc, 2);
  3634. if (ret == NULL) {
  3635. RETURN_FALSE;
  3636. }
  3637. RETURN_STR(ret);
  3638. }
  3639. /* }}} */
  3640. /* {{{ */
  3641. PHP_FUNCTION(mb_scrub)
  3642. {
  3643. char* str;
  3644. size_t str_len;
  3645. zend_string *enc_name = NULL;
  3646. ZEND_PARSE_PARAMETERS_START(1, 2)
  3647. Z_PARAM_STRING(str, str_len)
  3648. Z_PARAM_OPTIONAL
  3649. Z_PARAM_STR_OR_NULL(enc_name)
  3650. ZEND_PARSE_PARAMETERS_END();
  3651. const mbfl_encoding *enc = php_mb_get_encoding(enc_name, 2);
  3652. if (!enc) {
  3653. RETURN_THROWS();
  3654. }
  3655. size_t ret_len;
  3656. char *ret = php_mb_convert_encoding_ex(str, str_len, enc, enc, &ret_len);
  3657. RETVAL_STRINGL(ret, ret_len);
  3658. efree(ret);
  3659. }
  3660. /* }}} */
  3661. /* {{{ php_mb_populate_current_detect_order_list */
  3662. static void php_mb_populate_current_detect_order_list(void)
  3663. {
  3664. const mbfl_encoding **entry = 0;
  3665. size_t nentries;
  3666. if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
  3667. nentries = MBSTRG(detect_order_list_size);
  3668. entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
  3669. memcpy(ZEND_VOIDP(entry), MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
  3670. } else {
  3671. const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
  3672. size_t i;
  3673. nentries = MBSTRG(default_detect_order_list_size);
  3674. entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
  3675. for (i = 0; i < nentries; i++) {
  3676. entry[i] = mbfl_no2encoding(src[i]);
  3677. }
  3678. }
  3679. MBSTRG(current_detect_order_list) = entry;
  3680. MBSTRG(current_detect_order_list_size) = nentries;
  3681. }
  3682. /* }}} */
  3683. /* {{{ static int php_mb_encoding_translation() */
  3684. static int php_mb_encoding_translation(void)
  3685. {
  3686. return MBSTRG(encoding_translation);
  3687. }
  3688. /* }}} */
  3689. /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
  3690. MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
  3691. {
  3692. if (enc) {
  3693. if (enc->mblen_table) {
  3694. if (s) {
  3695. return enc->mblen_table[*(unsigned char *)s];
  3696. }
  3697. } else if (enc->flag & MBFL_ENCTYPE_WCS2) {
  3698. return 2;
  3699. } else if (enc->flag & MBFL_ENCTYPE_WCS4) {
  3700. return 4;
  3701. }
  3702. }
  3703. return 1;
  3704. }
  3705. /* }}} */
  3706. /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
  3707. MBSTRING_API size_t php_mb_mbchar_bytes(const char *s)
  3708. {
  3709. return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
  3710. }
  3711. /* }}} */
  3712. /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
  3713. MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
  3714. {
  3715. const char *p = s;
  3716. char *last=NULL;
  3717. if (nbytes == (size_t)-1) {
  3718. size_t nb = 0;
  3719. while (*p != '\0') {
  3720. if (nb == 0) {
  3721. if ((unsigned char)*p == (unsigned char)c) {
  3722. last = (char *)p;
  3723. }
  3724. nb = php_mb_mbchar_bytes_ex(p, enc);
  3725. if (nb == 0) {
  3726. return NULL; /* something is going wrong! */
  3727. }
  3728. }
  3729. --nb;
  3730. ++p;
  3731. }
  3732. } else {
  3733. size_t bcnt = nbytes;
  3734. size_t nbytes_char;
  3735. while (bcnt > 0) {
  3736. if ((unsigned char)*p == (unsigned char)c) {
  3737. last = (char *)p;
  3738. }
  3739. nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
  3740. if (bcnt < nbytes_char) {
  3741. return NULL;
  3742. }
  3743. p += nbytes_char;
  3744. bcnt -= nbytes_char;
  3745. }
  3746. }
  3747. return last;
  3748. }
  3749. /* }}} */
  3750. /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
  3751. MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes)
  3752. {
  3753. return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
  3754. }
  3755. /* }}} */
  3756. /* {{{ MBSTRING_API int php_mb_stripos() */
  3757. MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len, const char *old_needle, size_t old_needle_len, zend_long offset, const mbfl_encoding *enc)
  3758. {
  3759. size_t n = (size_t) -1;
  3760. mbfl_string haystack, needle;
  3761. mbfl_string_init_set(&haystack, enc);
  3762. mbfl_string_init_set(&needle, enc);
  3763. do {
  3764. /* We're using simple case-folding here, because we'd have to deal with remapping of
  3765. * offsets otherwise. */
  3766. size_t len = 0;
  3767. haystack.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_haystack, old_haystack_len, &len, enc);
  3768. haystack.len = len;
  3769. if (!haystack.val) {
  3770. break;
  3771. }
  3772. if (haystack.len == 0) {
  3773. break;
  3774. }
  3775. needle.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_needle, old_needle_len, &len, enc);
  3776. needle.len = len;
  3777. if (!needle.val) {
  3778. break;
  3779. }
  3780. n = mbfl_strpos(&haystack, &needle, offset, mode);
  3781. } while(0);
  3782. if (haystack.val) {
  3783. efree(haystack.val);
  3784. }
  3785. if (needle.val) {
  3786. efree(needle.val);
  3787. }
  3788. return n;
  3789. }
  3790. /* }}} */
  3791. static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size) /* {{{ */
  3792. {
  3793. *list = (const zend_encoding **)MBSTRG(http_input_list);
  3794. *list_size = MBSTRG(http_input_list_size);
  3795. }
  3796. /* }}} */
  3797. static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ */
  3798. {
  3799. MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
  3800. }
  3801. /* }}} */