zend_language_scanner.l 74 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139
  1. /*
  2. +----------------------------------------------------------------------+
  3. | Zend Engine |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) Zend Technologies Ltd. (http://www.zend.com) |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 2.00 of the Zend license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.zend.com/license/2_00.txt. |
  11. | If you did not receive a copy of the Zend license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@zend.com so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Authors: Marcus Boerger <helly@php.net> |
  16. | Nuno Lopes <nlopess@php.net> |
  17. | Scott MacVicar <scottmac@php.net> |
  18. | Flex version authors: |
  19. | Andi Gutmans <andi@php.net> |
  20. | Zeev Suraski <zeev@php.net> |
  21. +----------------------------------------------------------------------+
  22. */
  23. #if 0
  24. # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
  25. #else
  26. # define YYDEBUG(s, c)
  27. #endif
  28. #include "zend_language_scanner_defs.h"
  29. #include <errno.h>
  30. #include "zend.h"
  31. #ifdef ZEND_WIN32
  32. # include <Winuser.h>
  33. #endif
  34. #include "zend_alloc.h"
  35. #include <zend_language_parser.h>
  36. #include "zend_compile.h"
  37. #include "zend_language_scanner.h"
  38. #include "zend_highlight.h"
  39. #include "zend_constants.h"
  40. #include "zend_variables.h"
  41. #include "zend_operators.h"
  42. #include "zend_API.h"
  43. #include "zend_strtod.h"
  44. #include "zend_exceptions.h"
  45. #include "zend_virtual_cwd.h"
  46. #define YYCTYPE unsigned char
  47. #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
  48. #define YYCURSOR SCNG(yy_cursor)
  49. #define YYLIMIT SCNG(yy_limit)
  50. #define YYMARKER SCNG(yy_marker)
  51. #define YYGETCONDITION() SCNG(yy_state)
  52. #define YYSETCONDITION(s) SCNG(yy_state) = s
  53. #define STATE(name) yyc##name
  54. /* emulate flex constructs */
  55. #define BEGIN(state) YYSETCONDITION(STATE(state))
  56. #define YYSTATE YYGETCONDITION()
  57. #define yytext ((char*)SCNG(yy_text))
  58. #define yyleng SCNG(yy_leng)
  59. #define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
  60. yyleng = (unsigned int)x; } while(0)
  61. #define yymore() goto yymore_restart
  62. /* perform sanity check. If this message is triggered you should
  63. increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
  64. /*!max:re2c */
  65. #if ZEND_MMAP_AHEAD < YYMAXFILL
  66. # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
  67. #endif
  68. #include <stdarg.h>
  69. #ifdef HAVE_UNISTD_H
  70. # include <unistd.h>
  71. #endif
  72. /* Globals Macros */
  73. #define SCNG LANG_SCNG
  74. #ifdef ZTS
  75. ZEND_API ts_rsrc_id language_scanner_globals_id;
  76. ZEND_API size_t language_scanner_globals_offset;
  77. #else
  78. ZEND_API zend_php_scanner_globals language_scanner_globals;
  79. #endif
  80. #define HANDLE_NEWLINES(s, l) \
  81. do { \
  82. char *p = (s), *boundary = p+(l); \
  83. \
  84. while (p<boundary) { \
  85. if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \
  86. CG(zend_lineno)++; \
  87. } \
  88. p++; \
  89. } \
  90. } while (0)
  91. #define HANDLE_NEWLINE(c) \
  92. { \
  93. if (c == '\n' || c == '\r') { \
  94. CG(zend_lineno)++; \
  95. } \
  96. }
  97. /* To save initial string length after scanning to first variable */
  98. #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
  99. #define GET_DOUBLE_QUOTES_SCANNED_LENGTH() SCNG(scanned_string_len)
  100. #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x80)
  101. #define IS_LABEL_SUCCESSOR(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || ((c) >= '0' && (c) <= '9') || (c) == '_' || (c) >= 0x80)
  102. #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
  103. #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
  104. static void strip_underscores(char *str, size_t *len)
  105. {
  106. char *src = str, *dest = str;
  107. while (*src != '\0') {
  108. if (*src != '_') {
  109. *dest = *src;
  110. dest++;
  111. } else {
  112. --(*len);
  113. }
  114. src++;
  115. }
  116. *dest = '\0';
  117. }
  118. static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
  119. {
  120. const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
  121. ZEND_ASSERT(internal_encoding);
  122. return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding));
  123. }
  124. static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
  125. {
  126. return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding));
  127. }
  128. static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
  129. {
  130. return zend_multibyte_encoding_converter(to, to_length, from, from_length,
  131. LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8);
  132. }
  133. static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
  134. {
  135. const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
  136. ZEND_ASSERT(internal_encoding);
  137. return zend_multibyte_encoding_converter(to, to_length, from, from_length,
  138. internal_encoding, zend_multibyte_encoding_utf8);
  139. }
  140. static void _yy_push_state(int new_state)
  141. {
  142. zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
  143. YYSETCONDITION(new_state);
  144. }
  145. #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
  146. static void yy_pop_state(void)
  147. {
  148. int *stack_state = zend_stack_top(&SCNG(state_stack));
  149. YYSETCONDITION(*stack_state);
  150. zend_stack_del_top(&SCNG(state_stack));
  151. }
  152. static void yy_scan_buffer(char *str, size_t len)
  153. {
  154. YYCURSOR = (YYCTYPE*)str;
  155. YYLIMIT = YYCURSOR + len;
  156. if (!SCNG(yy_start)) {
  157. SCNG(yy_start) = YYCURSOR;
  158. }
  159. }
  160. void startup_scanner(void)
  161. {
  162. CG(parse_error) = 0;
  163. CG(doc_comment) = NULL;
  164. CG(extra_fn_flags) = 0;
  165. zend_stack_init(&SCNG(state_stack), sizeof(int));
  166. zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
  167. zend_ptr_stack_init(&SCNG(heredoc_label_stack));
  168. SCNG(heredoc_scan_ahead) = 0;
  169. }
  170. static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
  171. efree(heredoc_label->label);
  172. }
  173. void shutdown_scanner(void)
  174. {
  175. CG(parse_error) = 0;
  176. RESET_DOC_COMMENT();
  177. zend_stack_destroy(&SCNG(state_stack));
  178. zend_stack_destroy(&SCNG(nest_location_stack));
  179. zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
  180. zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
  181. SCNG(heredoc_scan_ahead) = 0;
  182. SCNG(on_event) = NULL;
  183. }
  184. ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
  185. {
  186. lex_state->yy_leng = SCNG(yy_leng);
  187. lex_state->yy_start = SCNG(yy_start);
  188. lex_state->yy_text = SCNG(yy_text);
  189. lex_state->yy_cursor = SCNG(yy_cursor);
  190. lex_state->yy_marker = SCNG(yy_marker);
  191. lex_state->yy_limit = SCNG(yy_limit);
  192. lex_state->state_stack = SCNG(state_stack);
  193. zend_stack_init(&SCNG(state_stack), sizeof(int));
  194. lex_state->nest_location_stack = SCNG(nest_location_stack);
  195. zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
  196. lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
  197. zend_ptr_stack_init(&SCNG(heredoc_label_stack));
  198. lex_state->in = SCNG(yy_in);
  199. lex_state->yy_state = YYSTATE;
  200. lex_state->filename = CG(compiled_filename);
  201. lex_state->lineno = CG(zend_lineno);
  202. CG(compiled_filename) = NULL;
  203. lex_state->script_org = SCNG(script_org);
  204. lex_state->script_org_size = SCNG(script_org_size);
  205. lex_state->script_filtered = SCNG(script_filtered);
  206. lex_state->script_filtered_size = SCNG(script_filtered_size);
  207. lex_state->input_filter = SCNG(input_filter);
  208. lex_state->output_filter = SCNG(output_filter);
  209. lex_state->script_encoding = SCNG(script_encoding);
  210. lex_state->on_event = SCNG(on_event);
  211. lex_state->on_event_context = SCNG(on_event_context);
  212. lex_state->ast = CG(ast);
  213. lex_state->ast_arena = CG(ast_arena);
  214. }
  215. ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
  216. {
  217. SCNG(yy_leng) = lex_state->yy_leng;
  218. SCNG(yy_start) = lex_state->yy_start;
  219. SCNG(yy_text) = lex_state->yy_text;
  220. SCNG(yy_cursor) = lex_state->yy_cursor;
  221. SCNG(yy_marker) = lex_state->yy_marker;
  222. SCNG(yy_limit) = lex_state->yy_limit;
  223. zend_stack_destroy(&SCNG(state_stack));
  224. SCNG(state_stack) = lex_state->state_stack;
  225. zend_stack_destroy(&SCNG(nest_location_stack));
  226. SCNG(nest_location_stack) = lex_state->nest_location_stack;
  227. zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
  228. zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
  229. SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
  230. SCNG(yy_in) = lex_state->in;
  231. YYSETCONDITION(lex_state->yy_state);
  232. CG(zend_lineno) = lex_state->lineno;
  233. zend_restore_compiled_filename(lex_state->filename);
  234. if (SCNG(script_filtered)) {
  235. efree(SCNG(script_filtered));
  236. SCNG(script_filtered) = NULL;
  237. }
  238. SCNG(script_org) = lex_state->script_org;
  239. SCNG(script_org_size) = lex_state->script_org_size;
  240. SCNG(script_filtered) = lex_state->script_filtered;
  241. SCNG(script_filtered_size) = lex_state->script_filtered_size;
  242. SCNG(input_filter) = lex_state->input_filter;
  243. SCNG(output_filter) = lex_state->output_filter;
  244. SCNG(script_encoding) = lex_state->script_encoding;
  245. SCNG(on_event) = lex_state->on_event;
  246. SCNG(on_event_context) = lex_state->on_event_context;
  247. CG(ast) = lex_state->ast;
  248. CG(ast_arena) = lex_state->ast_arena;
  249. RESET_DOC_COMMENT();
  250. }
  251. ZEND_API zend_result zend_lex_tstring(zval *zv, unsigned char *ident)
  252. {
  253. unsigned char *end = ident;
  254. while ((*end >= 'a' && *end <= 'z') || (*end >= 'A' && *end <= 'Z') || *end == '_') {
  255. end++;
  256. }
  257. size_t length = end - ident;
  258. if (length == 0) {
  259. ZEND_ASSERT(ident[0] == '<' && ident[1] == '?' && ident[2] == '=');
  260. zend_throw_exception(zend_ce_parse_error, "Cannot use \"<?=\" as an identifier", 0);
  261. return FAILURE;
  262. }
  263. if (SCNG(on_event)) {
  264. SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, (char *) ident, length, SCNG(on_event_context));
  265. }
  266. ZVAL_STRINGL(zv, (char *) ident, length);
  267. return SUCCESS;
  268. }
  269. #define BOM_UTF32_BE "\x00\x00\xfe\xff"
  270. #define BOM_UTF32_LE "\xff\xfe\x00\x00"
  271. #define BOM_UTF16_BE "\xfe\xff"
  272. #define BOM_UTF16_LE "\xff\xfe"
  273. #define BOM_UTF8 "\xef\xbb\xbf"
  274. static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size)
  275. {
  276. const unsigned char *p;
  277. int wchar_size = 2;
  278. int le = 0;
  279. /* utf-16 or utf-32? */
  280. p = script;
  281. assert(p >= script);
  282. while ((size_t)(p-script) < script_size) {
  283. p = memchr(p, 0, script_size-(p-script)-2);
  284. if (!p) {
  285. break;
  286. }
  287. if (*(p+1) == '\0' && *(p+2) == '\0') {
  288. wchar_size = 4;
  289. break;
  290. }
  291. /* searching for UTF-32 specific byte orders, so this will do */
  292. p += 4;
  293. }
  294. /* BE or LE? */
  295. p = script;
  296. assert(p >= script);
  297. while ((size_t)(p-script) < script_size) {
  298. if (*p == '\0' && *(p+wchar_size-1) != '\0') {
  299. /* BE */
  300. le = 0;
  301. break;
  302. } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
  303. /* LE* */
  304. le = 1;
  305. break;
  306. }
  307. p += wchar_size;
  308. }
  309. if (wchar_size == 2) {
  310. return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
  311. } else {
  312. return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
  313. }
  314. return NULL;
  315. }
  316. static const zend_encoding* zend_multibyte_detect_unicode(void)
  317. {
  318. const zend_encoding *script_encoding = NULL;
  319. int bom_size;
  320. unsigned char *pos1, *pos2;
  321. if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
  322. return NULL;
  323. }
  324. /* check out BOM */
  325. if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
  326. script_encoding = zend_multibyte_encoding_utf32be;
  327. bom_size = sizeof(BOM_UTF32_BE)-1;
  328. } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
  329. script_encoding = zend_multibyte_encoding_utf32le;
  330. bom_size = sizeof(BOM_UTF32_LE)-1;
  331. } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
  332. script_encoding = zend_multibyte_encoding_utf16be;
  333. bom_size = sizeof(BOM_UTF16_BE)-1;
  334. } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
  335. script_encoding = zend_multibyte_encoding_utf16le;
  336. bom_size = sizeof(BOM_UTF16_LE)-1;
  337. } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
  338. script_encoding = zend_multibyte_encoding_utf8;
  339. bom_size = sizeof(BOM_UTF8)-1;
  340. }
  341. if (script_encoding) {
  342. /* remove BOM */
  343. LANG_SCNG(script_org) += bom_size;
  344. LANG_SCNG(script_org_size) -= bom_size;
  345. return script_encoding;
  346. }
  347. /* script contains NULL bytes -> auto-detection */
  348. if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
  349. /* check if the NULL byte is after the __HALT_COMPILER(); */
  350. pos2 = LANG_SCNG(script_org);
  351. while ((size_t)(pos1 - pos2) >= sizeof("__HALT_COMPILER();")-1) {
  352. pos2 = memchr(pos2, '_', pos1 - pos2);
  353. if (!pos2) break;
  354. pos2++;
  355. if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
  356. pos2 += sizeof("_HALT_COMPILER")-1;
  357. while (*pos2 == ' ' ||
  358. *pos2 == '\t' ||
  359. *pos2 == '\r' ||
  360. *pos2 == '\n') {
  361. pos2++;
  362. }
  363. if (*pos2 == '(') {
  364. pos2++;
  365. while (*pos2 == ' ' ||
  366. *pos2 == '\t' ||
  367. *pos2 == '\r' ||
  368. *pos2 == '\n') {
  369. pos2++;
  370. }
  371. if (*pos2 == ')') {
  372. pos2++;
  373. while (*pos2 == ' ' ||
  374. *pos2 == '\t' ||
  375. *pos2 == '\r' ||
  376. *pos2 == '\n') {
  377. pos2++;
  378. }
  379. if (*pos2 == ';') {
  380. return NULL;
  381. }
  382. }
  383. }
  384. }
  385. }
  386. /* make best effort if BOM is missing */
  387. return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size));
  388. }
  389. return NULL;
  390. }
  391. static const zend_encoding* zend_multibyte_find_script_encoding(void)
  392. {
  393. const zend_encoding *script_encoding;
  394. if (CG(detect_unicode)) {
  395. /* check out bom(byte order mark) and see if containing wchars */
  396. script_encoding = zend_multibyte_detect_unicode();
  397. if (script_encoding != NULL) {
  398. /* bom or wchar detection is prior to 'script_encoding' option */
  399. return script_encoding;
  400. }
  401. }
  402. /* if no script_encoding specified, just leave alone */
  403. if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
  404. return NULL;
  405. }
  406. /* if multiple encodings specified, detect automagically */
  407. if (CG(script_encoding_list_size) > 1) {
  408. return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size));
  409. }
  410. return CG(script_encoding_list)[0];
  411. }
  412. ZEND_API zend_result zend_multibyte_set_filter(const zend_encoding *onetime_encoding)
  413. {
  414. const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
  415. const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding();
  416. if (!script_encoding) {
  417. return FAILURE;
  418. }
  419. /* judge input/output filter */
  420. LANG_SCNG(script_encoding) = script_encoding;
  421. LANG_SCNG(input_filter) = NULL;
  422. LANG_SCNG(output_filter) = NULL;
  423. if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
  424. if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
  425. /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
  426. LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
  427. LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
  428. } else {
  429. LANG_SCNG(input_filter) = NULL;
  430. LANG_SCNG(output_filter) = NULL;
  431. }
  432. return SUCCESS;
  433. }
  434. if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
  435. LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
  436. LANG_SCNG(output_filter) = NULL;
  437. } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
  438. LANG_SCNG(input_filter) = NULL;
  439. LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
  440. } else {
  441. /* both script and internal encodings are incompatible w/ flex */
  442. LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
  443. LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
  444. }
  445. return SUCCESS;
  446. }
  447. ZEND_API zend_result open_file_for_scanning(zend_file_handle *file_handle)
  448. {
  449. char *buf;
  450. size_t size;
  451. zend_string *compiled_filename;
  452. if (zend_stream_fixup(file_handle, &buf, &size) == FAILURE) {
  453. /* Still add it to open_files to make destroy_file_handle work */
  454. zend_llist_add_element(&CG(open_files), file_handle);
  455. file_handle->in_list = 1;
  456. return FAILURE;
  457. }
  458. ZEND_ASSERT(!EG(exception) && "stream_fixup() should have failed");
  459. zend_llist_add_element(&CG(open_files), file_handle);
  460. file_handle->in_list = 1;
  461. /* Reset the scanner for scanning the new file */
  462. SCNG(yy_in) = file_handle;
  463. SCNG(yy_start) = NULL;
  464. if (size != (size_t)-1) {
  465. if (CG(multibyte)) {
  466. SCNG(script_org) = (unsigned char*)buf;
  467. SCNG(script_org_size) = size;
  468. SCNG(script_filtered) = NULL;
  469. zend_multibyte_set_filter(NULL);
  470. if (SCNG(input_filter)) {
  471. if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
  472. zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
  473. "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
  474. }
  475. buf = (char*)SCNG(script_filtered);
  476. size = SCNG(script_filtered_size);
  477. }
  478. }
  479. SCNG(yy_start) = (unsigned char *)buf;
  480. yy_scan_buffer(buf, size);
  481. } else {
  482. zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
  483. }
  484. if (CG(skip_shebang)) {
  485. BEGIN(SHEBANG);
  486. } else {
  487. BEGIN(INITIAL);
  488. }
  489. if (file_handle->opened_path) {
  490. compiled_filename = zend_string_copy(file_handle->opened_path);
  491. } else {
  492. compiled_filename = zend_string_copy(file_handle->filename);
  493. }
  494. zend_set_compiled_filename(compiled_filename);
  495. zend_string_release_ex(compiled_filename, 0);
  496. RESET_DOC_COMMENT();
  497. CG(zend_lineno) = 1;
  498. CG(increment_lineno) = 0;
  499. return SUCCESS;
  500. }
  501. static zend_op_array *zend_compile(int type)
  502. {
  503. zend_op_array *op_array = NULL;
  504. bool original_in_compilation = CG(in_compilation);
  505. CG(in_compilation) = 1;
  506. CG(ast) = NULL;
  507. CG(ast_arena) = zend_arena_create(1024 * 32);
  508. if (!zendparse()) {
  509. int last_lineno = CG(zend_lineno);
  510. zend_file_context original_file_context;
  511. zend_oparray_context original_oparray_context;
  512. zend_op_array *original_active_op_array = CG(active_op_array);
  513. op_array = emalloc(sizeof(zend_op_array));
  514. init_op_array(op_array, type, INITIAL_OP_ARRAY_SIZE);
  515. CG(active_op_array) = op_array;
  516. /* Use heap to not waste arena memory */
  517. op_array->fn_flags |= ZEND_ACC_HEAP_RT_CACHE;
  518. if (zend_ast_process) {
  519. zend_ast_process(CG(ast));
  520. }
  521. zend_file_context_begin(&original_file_context);
  522. zend_oparray_context_begin(&original_oparray_context);
  523. zend_compile_top_stmt(CG(ast));
  524. CG(zend_lineno) = last_lineno;
  525. zend_emit_final_return(type == ZEND_USER_FUNCTION);
  526. op_array->line_start = 1;
  527. op_array->line_end = last_lineno;
  528. zend_init_static_variables_map_ptr(op_array);
  529. pass_two(op_array);
  530. zend_oparray_context_end(&original_oparray_context);
  531. zend_file_context_end(&original_file_context);
  532. CG(active_op_array) = original_active_op_array;
  533. }
  534. zend_ast_destroy(CG(ast));
  535. zend_arena_destroy(CG(ast_arena));
  536. CG(in_compilation) = original_in_compilation;
  537. return op_array;
  538. }
  539. ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type)
  540. {
  541. zend_lex_state original_lex_state;
  542. zend_op_array *op_array = NULL;
  543. zend_save_lexical_state(&original_lex_state);
  544. if (open_file_for_scanning(file_handle)==FAILURE) {
  545. if (!EG(exception)) {
  546. if (type==ZEND_REQUIRE) {
  547. zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, ZSTR_VAL(file_handle->filename));
  548. } else {
  549. zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, ZSTR_VAL(file_handle->filename));
  550. }
  551. }
  552. } else {
  553. op_array = zend_compile(ZEND_USER_FUNCTION);
  554. }
  555. zend_restore_lexical_state(&original_lex_state);
  556. return op_array;
  557. }
  558. ZEND_API zend_ast *zend_compile_string_to_ast(
  559. zend_string *code, zend_arena **ast_arena, zend_string *filename) {
  560. zval code_zv;
  561. bool original_in_compilation;
  562. zend_lex_state original_lex_state;
  563. zend_ast *ast;
  564. ZVAL_STR_COPY(&code_zv, code);
  565. original_in_compilation = CG(in_compilation);
  566. CG(in_compilation) = 1;
  567. zend_save_lexical_state(&original_lex_state);
  568. zend_prepare_string_for_scanning(&code_zv, filename);
  569. CG(ast) = NULL;
  570. CG(ast_arena) = zend_arena_create(1024 * 32);
  571. LANG_SCNG(yy_state) = yycINITIAL;
  572. if (zendparse() != 0) {
  573. zend_ast_destroy(CG(ast));
  574. zend_arena_destroy(CG(ast_arena));
  575. CG(ast) = NULL;
  576. }
  577. /* restore_lexical_state changes CG(ast) and CG(ast_arena) */
  578. ast = CG(ast);
  579. *ast_arena = CG(ast_arena);
  580. zend_restore_lexical_state(&original_lex_state);
  581. CG(in_compilation) = original_in_compilation;
  582. zval_ptr_dtor_str(&code_zv);
  583. return ast;
  584. }
  585. zend_op_array *compile_filename(int type, zend_string *filename)
  586. {
  587. zend_file_handle file_handle;
  588. zend_op_array *retval;
  589. zend_string *opened_path = NULL;
  590. zend_stream_init_filename_ex(&file_handle, filename);
  591. retval = zend_compile_file(&file_handle, type);
  592. if (retval && file_handle.handle.stream.handle) {
  593. if (!file_handle.opened_path) {
  594. file_handle.opened_path = opened_path = zend_string_copy(filename);
  595. }
  596. zend_hash_add_empty_element(&EG(included_files), file_handle.opened_path);
  597. if (opened_path) {
  598. zend_string_release_ex(opened_path, 0);
  599. }
  600. }
  601. zend_destroy_file_handle(&file_handle);
  602. return retval;
  603. }
  604. ZEND_API void zend_prepare_string_for_scanning(zval *str, zend_string *filename)
  605. {
  606. char *buf;
  607. size_t size, old_len;
  608. /* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
  609. old_len = Z_STRLEN_P(str);
  610. Z_STR_P(str) = zend_string_extend(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
  611. Z_TYPE_INFO_P(str) = IS_STRING_EX;
  612. memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
  613. SCNG(yy_in) = NULL;
  614. SCNG(yy_start) = NULL;
  615. buf = Z_STRVAL_P(str);
  616. size = old_len;
  617. if (CG(multibyte)) {
  618. SCNG(script_org) = (unsigned char*)buf;
  619. SCNG(script_org_size) = size;
  620. SCNG(script_filtered) = NULL;
  621. zend_multibyte_set_filter(zend_multibyte_get_internal_encoding());
  622. if (SCNG(input_filter)) {
  623. if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
  624. zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
  625. "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
  626. }
  627. buf = (char*)SCNG(script_filtered);
  628. size = SCNG(script_filtered_size);
  629. }
  630. }
  631. yy_scan_buffer(buf, size);
  632. zend_set_compiled_filename(filename);
  633. CG(zend_lineno) = 1;
  634. CG(increment_lineno) = 0;
  635. RESET_DOC_COMMENT();
  636. }
  637. ZEND_API size_t zend_get_scanned_file_offset(void)
  638. {
  639. size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
  640. if (SCNG(input_filter)) {
  641. size_t original_offset = offset, length = 0;
  642. do {
  643. unsigned char *p = NULL;
  644. if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset)) {
  645. return (size_t)-1;
  646. }
  647. efree(p);
  648. if (length > original_offset) {
  649. offset--;
  650. } else if (length < original_offset) {
  651. offset++;
  652. }
  653. } while (original_offset != length);
  654. }
  655. return offset;
  656. }
  657. zend_op_array *compile_string(zend_string *source_string, const char *filename)
  658. {
  659. zend_lex_state original_lex_state;
  660. zend_op_array *op_array = NULL;
  661. zval tmp;
  662. zend_string *filename_str;
  663. if (ZSTR_LEN(source_string) == 0) {
  664. return NULL;
  665. }
  666. ZVAL_STR_COPY(&tmp, source_string);
  667. zend_save_lexical_state(&original_lex_state);
  668. filename_str = zend_string_init(filename, strlen(filename), 0);
  669. zend_prepare_string_for_scanning(&tmp, filename_str);
  670. zend_string_release(filename_str);
  671. BEGIN(ST_IN_SCRIPTING);
  672. op_array = zend_compile(ZEND_EVAL_CODE);
  673. zend_restore_lexical_state(&original_lex_state);
  674. zval_ptr_dtor(&tmp);
  675. return op_array;
  676. }
  677. zend_result highlight_file(const char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
  678. {
  679. zend_lex_state original_lex_state;
  680. zend_file_handle file_handle;
  681. zend_stream_init_filename(&file_handle, filename);
  682. zend_save_lexical_state(&original_lex_state);
  683. if (open_file_for_scanning(&file_handle)==FAILURE) {
  684. zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename);
  685. zend_destroy_file_handle(&file_handle);
  686. zend_restore_lexical_state(&original_lex_state);
  687. return FAILURE;
  688. }
  689. zend_highlight(syntax_highlighter_ini);
  690. if (SCNG(script_filtered)) {
  691. efree(SCNG(script_filtered));
  692. SCNG(script_filtered) = NULL;
  693. }
  694. zend_destroy_file_handle(&file_handle);
  695. zend_restore_lexical_state(&original_lex_state);
  696. return SUCCESS;
  697. }
  698. void highlight_string(zend_string *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, const char *filename)
  699. {
  700. zend_lex_state original_lex_state;
  701. zval str_zv;
  702. zend_string *filename_str = zend_string_init(filename, strlen(filename), 0);
  703. ZVAL_STR_COPY(&str_zv, str);
  704. zend_save_lexical_state(&original_lex_state);
  705. zend_prepare_string_for_scanning(&str_zv, filename_str);
  706. zend_string_release(filename_str);
  707. BEGIN(INITIAL);
  708. zend_highlight(syntax_highlighter_ini);
  709. if (SCNG(script_filtered)) {
  710. efree(SCNG(script_filtered));
  711. SCNG(script_filtered) = NULL;
  712. }
  713. zend_restore_lexical_state(&original_lex_state);
  714. zval_ptr_dtor(&str_zv);
  715. }
  716. ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding)
  717. {
  718. size_t length;
  719. unsigned char *new_yy_start;
  720. /* convert and set */
  721. if (!SCNG(input_filter)) {
  722. if (SCNG(script_filtered)) {
  723. efree(SCNG(script_filtered));
  724. SCNG(script_filtered) = NULL;
  725. }
  726. SCNG(script_filtered_size) = 0;
  727. length = SCNG(script_org_size);
  728. new_yy_start = SCNG(script_org);
  729. } else {
  730. if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size))) {
  731. zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
  732. "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
  733. }
  734. if (SCNG(script_filtered)) {
  735. efree(SCNG(script_filtered));
  736. }
  737. SCNG(script_filtered) = new_yy_start;
  738. SCNG(script_filtered_size) = length;
  739. }
  740. SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
  741. SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
  742. SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
  743. SCNG(yy_limit) = new_yy_start + length;
  744. SCNG(yy_start) = new_yy_start;
  745. }
  746. // TODO: avoid reallocation ???
  747. # define zend_copy_value(zendlval, yytext, yyleng) \
  748. if (SCNG(output_filter)) { \
  749. size_t sz = 0; \
  750. char *s = NULL; \
  751. SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \
  752. ZVAL_STRINGL(zendlval, s, sz); \
  753. efree(s); \
  754. } else if (yyleng == 1) { \
  755. ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*(yytext))); \
  756. } else { \
  757. ZVAL_STRINGL(zendlval, yytext, yyleng); \
  758. }
  759. static zend_result zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type)
  760. {
  761. char *s, *t;
  762. char *end;
  763. if (len <= 1) {
  764. if (len < 1) {
  765. ZVAL_EMPTY_STRING(zendlval);
  766. } else {
  767. zend_uchar c = (zend_uchar)*str;
  768. if (c == '\n' || c == '\r') {
  769. CG(zend_lineno)++;
  770. }
  771. ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR(c));
  772. }
  773. goto skip_escape_conversion;
  774. }
  775. ZVAL_STRINGL(zendlval, str, len);
  776. /* convert escape sequences */
  777. s = Z_STRVAL_P(zendlval);
  778. end = s+Z_STRLEN_P(zendlval);
  779. while (1) {
  780. if (UNEXPECTED(*s=='\\')) {
  781. break;
  782. }
  783. if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
  784. CG(zend_lineno)++;
  785. }
  786. s++;
  787. if (s == end) {
  788. goto skip_escape_conversion;
  789. }
  790. }
  791. t = s;
  792. while (s<end) {
  793. if (*s=='\\') {
  794. s++;
  795. if (s >= end) {
  796. *t++ = '\\';
  797. break;
  798. }
  799. switch(*s) {
  800. case 'n':
  801. *t++ = '\n';
  802. break;
  803. case 'r':
  804. *t++ = '\r';
  805. break;
  806. case 't':
  807. *t++ = '\t';
  808. break;
  809. case 'f':
  810. *t++ = '\f';
  811. break;
  812. case 'v':
  813. *t++ = '\v';
  814. break;
  815. case 'e':
  816. #ifdef ZEND_WIN32
  817. *t++ = VK_ESCAPE;
  818. #else
  819. *t++ = '\e';
  820. #endif
  821. break;
  822. case '"':
  823. case '`':
  824. if (*s != quote_type) {
  825. *t++ = '\\';
  826. *t++ = *s;
  827. break;
  828. }
  829. ZEND_FALLTHROUGH;
  830. case '\\':
  831. case '$':
  832. *t++ = *s;
  833. break;
  834. case 'x':
  835. case 'X':
  836. if (ZEND_IS_HEX(*(s+1))) {
  837. char hex_buf[3] = { 0, 0, 0 };
  838. hex_buf[0] = *(++s);
  839. if (ZEND_IS_HEX(*(s+1))) {
  840. hex_buf[1] = *(++s);
  841. }
  842. *t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
  843. } else {
  844. *t++ = '\\';
  845. *t++ = *s;
  846. }
  847. break;
  848. /* UTF-8 codepoint escape, format: /\\u\{\x+\}/ */
  849. case 'u':
  850. {
  851. /* cache where we started so we can parse after validating */
  852. char *start = s + 1;
  853. size_t len = 0;
  854. bool valid = 1;
  855. unsigned long codepoint;
  856. if (*start != '{') {
  857. /* we silently let this pass to avoid breaking code
  858. * with JSON in string literals (e.g. "\"\u202e\""
  859. */
  860. *t++ = '\\';
  861. *t++ = 'u';
  862. break;
  863. } else {
  864. /* on the other hand, invalid \u{blah} errors */
  865. s++;
  866. len++;
  867. s++;
  868. while (*s != '}') {
  869. if (!ZEND_IS_HEX(*s)) {
  870. valid = 0;
  871. break;
  872. } else {
  873. len++;
  874. }
  875. s++;
  876. }
  877. if (*s == '}') {
  878. valid = 1;
  879. len++;
  880. }
  881. }
  882. /* \u{} is invalid */
  883. if (len <= 2) {
  884. valid = 0;
  885. }
  886. if (!valid) {
  887. zend_throw_exception(zend_ce_parse_error,
  888. "Invalid UTF-8 codepoint escape sequence", 0);
  889. zval_ptr_dtor(zendlval);
  890. ZVAL_UNDEF(zendlval);
  891. return FAILURE;
  892. }
  893. errno = 0;
  894. codepoint = strtoul(start + 1, NULL, 16);
  895. /* per RFC 3629, UTF-8 can only represent 21 bits */
  896. if (codepoint > 0x10FFFF || errno) {
  897. zend_throw_exception(zend_ce_parse_error,
  898. "Invalid UTF-8 codepoint escape sequence: Codepoint too large", 0);
  899. zval_ptr_dtor(zendlval);
  900. ZVAL_UNDEF(zendlval);
  901. return FAILURE;
  902. }
  903. /* based on https://en.wikipedia.org/wiki/UTF-8#Sample_code */
  904. if (codepoint < 0x80) {
  905. *t++ = codepoint;
  906. } else if (codepoint <= 0x7FF) {
  907. *t++ = (codepoint >> 6) + 0xC0;
  908. *t++ = (codepoint & 0x3F) + 0x80;
  909. } else if (codepoint <= 0xFFFF) {
  910. *t++ = (codepoint >> 12) + 0xE0;
  911. *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
  912. *t++ = (codepoint & 0x3F) + 0x80;
  913. } else if (codepoint <= 0x10FFFF) {
  914. *t++ = (codepoint >> 18) + 0xF0;
  915. *t++ = ((codepoint >> 12) & 0x3F) + 0x80;
  916. *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
  917. *t++ = (codepoint & 0x3F) + 0x80;
  918. }
  919. }
  920. break;
  921. default:
  922. /* check for an octal */
  923. if (ZEND_IS_OCT(*s)) {
  924. char octal_buf[4] = { 0, 0, 0, 0 };
  925. octal_buf[0] = *s;
  926. if (ZEND_IS_OCT(*(s+1))) {
  927. octal_buf[1] = *(++s);
  928. if (ZEND_IS_OCT(*(s+1))) {
  929. octal_buf[2] = *(++s);
  930. }
  931. }
  932. if (octal_buf[2] && (octal_buf[0] > '3') && !SCNG(heredoc_scan_ahead)) {
  933. /* 3 octit values must not overflow 0xFF (\377) */
  934. zend_error(E_COMPILE_WARNING, "Octal escape sequence overflow \\%s is greater than \\377", octal_buf);
  935. }
  936. *t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
  937. } else {
  938. *t++ = '\\';
  939. *t++ = *s;
  940. }
  941. break;
  942. }
  943. } else {
  944. *t++ = *s;
  945. }
  946. if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
  947. CG(zend_lineno)++;
  948. }
  949. s++;
  950. }
  951. *t = 0;
  952. Z_STRLEN_P(zendlval) = t - Z_STRVAL_P(zendlval);
  953. skip_escape_conversion:
  954. if (SCNG(output_filter)) {
  955. size_t sz = 0;
  956. unsigned char *str;
  957. // TODO: avoid realocation ???
  958. s = Z_STRVAL_P(zendlval);
  959. SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
  960. zval_ptr_dtor(zendlval);
  961. ZVAL_STRINGL(zendlval, (char *) str, sz);
  962. efree(str);
  963. }
  964. return SUCCESS;
  965. }
  966. #define HEREDOC_USING_SPACES 1
  967. #define HEREDOC_USING_TABS 2
  968. static const char *next_newline(const char *str, const char *end, size_t *newline_len) {
  969. for (; str < end; str++) {
  970. if (*str == '\r') {
  971. *newline_len = str + 1 < end && *(str + 1) == '\n' ? 2 : 1;
  972. return str;
  973. } else if (*str == '\n') {
  974. *newline_len = 1;
  975. return str;
  976. }
  977. }
  978. *newline_len = 0;
  979. return NULL;
  980. }
  981. static bool strip_multiline_string_indentation(
  982. zval *zendlval, int indentation, bool using_spaces,
  983. bool newline_at_start, bool newline_at_end)
  984. {
  985. const char *str = Z_STRVAL_P(zendlval), *end = str + Z_STRLEN_P(zendlval);
  986. char *copy = Z_STRVAL_P(zendlval);
  987. int newline_count = 0;
  988. size_t newline_len;
  989. const char *nl;
  990. if (!newline_at_start) {
  991. nl = next_newline(str, end, &newline_len);
  992. if (!nl) {
  993. return 1;
  994. }
  995. str = nl + newline_len;
  996. copy = (char *) nl + newline_len;
  997. newline_count++;
  998. } else {
  999. nl = str;
  1000. }
  1001. /* <= intentional */
  1002. while (str <= end && nl) {
  1003. size_t skip;
  1004. nl = next_newline(str, end, &newline_len);
  1005. if (!nl && newline_at_end) {
  1006. nl = end;
  1007. }
  1008. /* Try to skip indentation */
  1009. for (skip = 0; skip < indentation; skip++, str++) {
  1010. if (str == nl) {
  1011. /* Don't require full indentation on whitespace-only lines */
  1012. break;
  1013. }
  1014. if (str == end || (*str != ' ' && *str != '\t')) {
  1015. CG(zend_lineno) += newline_count;
  1016. zend_throw_exception_ex(zend_ce_parse_error, 0,
  1017. "Invalid body indentation level (expecting an indentation level of at least %d)", indentation);
  1018. goto error;
  1019. }
  1020. if ((!using_spaces && *str == ' ') || (using_spaces && *str == '\t')) {
  1021. CG(zend_lineno) += newline_count;
  1022. zend_throw_exception(zend_ce_parse_error,
  1023. "Invalid indentation - tabs and spaces cannot be mixed", 0);
  1024. goto error;
  1025. }
  1026. }
  1027. if (str == end) {
  1028. break;
  1029. }
  1030. size_t len = nl ? (nl - str + newline_len) : (end - str);
  1031. memmove(copy, str, len);
  1032. str += len;
  1033. copy += len;
  1034. newline_count++;
  1035. }
  1036. *copy = '\0';
  1037. Z_STRLEN_P(zendlval) = copy - Z_STRVAL_P(zendlval);
  1038. return 1;
  1039. error:
  1040. zval_ptr_dtor_str(zendlval);
  1041. ZVAL_UNDEF(zendlval);
  1042. return 0;
  1043. }
  1044. static void copy_heredoc_label_stack(void *void_heredoc_label)
  1045. {
  1046. zend_heredoc_label *heredoc_label = void_heredoc_label;
  1047. zend_heredoc_label *new_heredoc_label = emalloc(sizeof(zend_heredoc_label));
  1048. *new_heredoc_label = *heredoc_label;
  1049. new_heredoc_label->label = estrndup(heredoc_label->label, heredoc_label->length);
  1050. zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) new_heredoc_label);
  1051. }
  1052. /* Check that { }, [ ], ( ) are nested correctly */
  1053. static void report_bad_nesting(char opening, int opening_lineno, char closing)
  1054. {
  1055. char buf[256];
  1056. size_t used = 0;
  1057. used = snprintf(buf, sizeof(buf), "Unclosed '%c'", opening);
  1058. if (opening_lineno != CG(zend_lineno)) {
  1059. used += snprintf(buf + used, sizeof(buf) - used, " on line %d", opening_lineno);
  1060. }
  1061. if (closing) { /* 'closing' will be 0 if at end of file */
  1062. used += snprintf(buf + used, sizeof(buf) - used, " does not match '%c'", closing);
  1063. }
  1064. zend_throw_exception(zend_ce_parse_error, buf, 0);
  1065. }
  1066. static void enter_nesting(char opening)
  1067. {
  1068. zend_nest_location nest_loc = {opening, CG(zend_lineno)};
  1069. zend_stack_push(&SCNG(nest_location_stack), &nest_loc);
  1070. }
  1071. static zend_result exit_nesting(char closing)
  1072. {
  1073. if (zend_stack_is_empty(&SCNG(nest_location_stack))) {
  1074. zend_throw_exception_ex(zend_ce_parse_error, 0, "Unmatched '%c'", closing);
  1075. return FAILURE;
  1076. }
  1077. zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
  1078. char opening = nest_loc->text;
  1079. if ((opening == '{' && closing != '}') ||
  1080. (opening == '[' && closing != ']') ||
  1081. (opening == '(' && closing != ')')) {
  1082. report_bad_nesting(opening, nest_loc->lineno, closing);
  1083. return FAILURE;
  1084. }
  1085. zend_stack_del_top(&SCNG(nest_location_stack));
  1086. return SUCCESS;
  1087. }
  1088. static zend_result check_nesting_at_end(void)
  1089. {
  1090. if (!zend_stack_is_empty(&SCNG(nest_location_stack))) {
  1091. zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
  1092. report_bad_nesting(nest_loc->text, nest_loc->lineno, 0);
  1093. return FAILURE;
  1094. }
  1095. return SUCCESS;
  1096. }
  1097. #define PARSER_MODE() \
  1098. EXPECTED(elem != NULL)
  1099. #define RETURN_TOKEN(_token) do { \
  1100. token = _token; \
  1101. goto emit_token; \
  1102. } while (0)
  1103. #define RETURN_TOKEN_WITH_VAL(_token) do { \
  1104. token = _token; \
  1105. goto emit_token_with_val; \
  1106. } while (0)
  1107. #define RETURN_TOKEN_WITH_STR(_token, _offset) do { \
  1108. token = _token; \
  1109. offset = _offset; \
  1110. goto emit_token_with_str; \
  1111. } while (0)
  1112. #define RETURN_TOKEN_WITH_IDENT(_token) do { \
  1113. token = _token; \
  1114. goto emit_token_with_ident; \
  1115. } while (0)
  1116. #define RETURN_OR_SKIP_TOKEN(_token) do { \
  1117. token = _token; \
  1118. if (PARSER_MODE()) { \
  1119. goto skip_token; \
  1120. } \
  1121. goto emit_token; \
  1122. } while (0)
  1123. #define RETURN_EXIT_NESTING_TOKEN(_token) do { \
  1124. if (exit_nesting(_token) && PARSER_MODE()) { \
  1125. RETURN_TOKEN(T_ERROR); \
  1126. } else { \
  1127. RETURN_TOKEN(_token); \
  1128. } \
  1129. } while(0)
  1130. #define RETURN_END_TOKEN do { \
  1131. if (check_nesting_at_end() && PARSER_MODE()) { \
  1132. RETURN_TOKEN(T_ERROR); \
  1133. } else { \
  1134. RETURN_TOKEN(END); \
  1135. } \
  1136. } while (0)
  1137. int ZEND_FASTCALL lex_scan(zval *zendlval, zend_parser_stack_elem *elem)
  1138. {
  1139. int token;
  1140. int offset;
  1141. int start_line = CG(zend_lineno);
  1142. ZVAL_UNDEF(zendlval);
  1143. restart:
  1144. SCNG(yy_text) = YYCURSOR;
  1145. /*!re2c
  1146. re2c:yyfill:check = 0;
  1147. LNUM [0-9]+(_[0-9]+)*
  1148. DNUM ({LNUM}?"."{LNUM})|({LNUM}"."{LNUM}?)
  1149. EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
  1150. HNUM "0x"[0-9a-fA-F]+(_[0-9a-fA-F]+)*
  1151. BNUM "0b"[01]+(_[01]+)*
  1152. ONUM "0o"[0-7]+(_[0-7]+)*
  1153. LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
  1154. WHITESPACE [ \n\r\t]+
  1155. TABS_AND_SPACES [ \t]*
  1156. TOKENS [;:,.|^&+-/*=%!~$<>?@]
  1157. ANY_CHAR [^]
  1158. NEWLINE ("\r"|"\n"|"\r\n")
  1159. /* compute yyleng before each rule */
  1160. <!*> := yyleng = YYCURSOR - SCNG(yy_text);
  1161. <ST_IN_SCRIPTING>"exit" {
  1162. RETURN_TOKEN_WITH_IDENT(T_EXIT);
  1163. }
  1164. <ST_IN_SCRIPTING>"die" {
  1165. RETURN_TOKEN_WITH_IDENT(T_EXIT);
  1166. }
  1167. <ST_IN_SCRIPTING>"fn" {
  1168. RETURN_TOKEN_WITH_IDENT(T_FN);
  1169. }
  1170. <ST_IN_SCRIPTING>"function" {
  1171. RETURN_TOKEN_WITH_IDENT(T_FUNCTION);
  1172. }
  1173. <ST_IN_SCRIPTING>"const" {
  1174. RETURN_TOKEN_WITH_IDENT(T_CONST);
  1175. }
  1176. <ST_IN_SCRIPTING>"return" {
  1177. RETURN_TOKEN_WITH_IDENT(T_RETURN);
  1178. }
  1179. <ST_IN_SCRIPTING>"#[" {
  1180. enter_nesting('[');
  1181. RETURN_TOKEN(T_ATTRIBUTE);
  1182. }
  1183. <ST_IN_SCRIPTING>"yield"{WHITESPACE}"from"[^a-zA-Z0-9_\x80-\xff] {
  1184. yyless(yyleng - 1);
  1185. HANDLE_NEWLINES(yytext, yyleng);
  1186. RETURN_TOKEN_WITH_IDENT(T_YIELD_FROM);
  1187. }
  1188. <ST_IN_SCRIPTING>"yield" {
  1189. RETURN_TOKEN_WITH_IDENT(T_YIELD);
  1190. }
  1191. <ST_IN_SCRIPTING>"try" {
  1192. RETURN_TOKEN_WITH_IDENT(T_TRY);
  1193. }
  1194. <ST_IN_SCRIPTING>"catch" {
  1195. RETURN_TOKEN_WITH_IDENT(T_CATCH);
  1196. }
  1197. <ST_IN_SCRIPTING>"finally" {
  1198. RETURN_TOKEN_WITH_IDENT(T_FINALLY);
  1199. }
  1200. <ST_IN_SCRIPTING>"throw" {
  1201. RETURN_TOKEN_WITH_IDENT(T_THROW);
  1202. }
  1203. <ST_IN_SCRIPTING>"if" {
  1204. RETURN_TOKEN_WITH_IDENT(T_IF);
  1205. }
  1206. <ST_IN_SCRIPTING>"elseif" {
  1207. RETURN_TOKEN_WITH_IDENT(T_ELSEIF);
  1208. }
  1209. <ST_IN_SCRIPTING>"endif" {
  1210. RETURN_TOKEN_WITH_IDENT(T_ENDIF);
  1211. }
  1212. <ST_IN_SCRIPTING>"else" {
  1213. RETURN_TOKEN_WITH_IDENT(T_ELSE);
  1214. }
  1215. <ST_IN_SCRIPTING>"while" {
  1216. RETURN_TOKEN_WITH_IDENT(T_WHILE);
  1217. }
  1218. <ST_IN_SCRIPTING>"endwhile" {
  1219. RETURN_TOKEN_WITH_IDENT(T_ENDWHILE);
  1220. }
  1221. <ST_IN_SCRIPTING>"do" {
  1222. RETURN_TOKEN_WITH_IDENT(T_DO);
  1223. }
  1224. <ST_IN_SCRIPTING>"for" {
  1225. RETURN_TOKEN_WITH_IDENT(T_FOR);
  1226. }
  1227. <ST_IN_SCRIPTING>"endfor" {
  1228. RETURN_TOKEN_WITH_IDENT(T_ENDFOR);
  1229. }
  1230. <ST_IN_SCRIPTING>"foreach" {
  1231. RETURN_TOKEN_WITH_IDENT(T_FOREACH);
  1232. }
  1233. <ST_IN_SCRIPTING>"endforeach" {
  1234. RETURN_TOKEN_WITH_IDENT(T_ENDFOREACH);
  1235. }
  1236. <ST_IN_SCRIPTING>"declare" {
  1237. RETURN_TOKEN_WITH_IDENT(T_DECLARE);
  1238. }
  1239. <ST_IN_SCRIPTING>"enddeclare" {
  1240. RETURN_TOKEN_WITH_IDENT(T_ENDDECLARE);
  1241. }
  1242. <ST_IN_SCRIPTING>"instanceof" {
  1243. RETURN_TOKEN_WITH_IDENT(T_INSTANCEOF);
  1244. }
  1245. <ST_IN_SCRIPTING>"as" {
  1246. RETURN_TOKEN_WITH_IDENT(T_AS);
  1247. }
  1248. <ST_IN_SCRIPTING>"switch" {
  1249. RETURN_TOKEN_WITH_IDENT(T_SWITCH);
  1250. }
  1251. <ST_IN_SCRIPTING>"match" {
  1252. RETURN_TOKEN_WITH_IDENT(T_MATCH);
  1253. }
  1254. <ST_IN_SCRIPTING>"endswitch" {
  1255. RETURN_TOKEN_WITH_IDENT(T_ENDSWITCH);
  1256. }
  1257. <ST_IN_SCRIPTING>"case" {
  1258. RETURN_TOKEN_WITH_IDENT(T_CASE);
  1259. }
  1260. <ST_IN_SCRIPTING>"default" {
  1261. RETURN_TOKEN_WITH_IDENT(T_DEFAULT);
  1262. }
  1263. <ST_IN_SCRIPTING>"break" {
  1264. RETURN_TOKEN_WITH_IDENT(T_BREAK);
  1265. }
  1266. <ST_IN_SCRIPTING>"continue" {
  1267. RETURN_TOKEN_WITH_IDENT(T_CONTINUE);
  1268. }
  1269. <ST_IN_SCRIPTING>"goto" {
  1270. RETURN_TOKEN_WITH_IDENT(T_GOTO);
  1271. }
  1272. <ST_IN_SCRIPTING>"echo" {
  1273. RETURN_TOKEN_WITH_IDENT(T_ECHO);
  1274. }
  1275. <ST_IN_SCRIPTING>"print" {
  1276. RETURN_TOKEN_WITH_IDENT(T_PRINT);
  1277. }
  1278. <ST_IN_SCRIPTING>"class" {
  1279. RETURN_TOKEN_WITH_IDENT(T_CLASS);
  1280. }
  1281. <ST_IN_SCRIPTING>"interface" {
  1282. RETURN_TOKEN_WITH_IDENT(T_INTERFACE);
  1283. }
  1284. <ST_IN_SCRIPTING>"trait" {
  1285. RETURN_TOKEN_WITH_IDENT(T_TRAIT);
  1286. }
  1287. /*
  1288. * The enum keyword must be followed by whitespace and another identifier.
  1289. * This avoids the BC break of using enum in classes, namespaces, functions and constants.
  1290. */
  1291. <ST_IN_SCRIPTING>"enum"{WHITESPACE}("extends"|"implements") {
  1292. yyless(4);
  1293. RETURN_TOKEN_WITH_STR(T_STRING, 0);
  1294. }
  1295. <ST_IN_SCRIPTING>"enum"{WHITESPACE}[a-zA-Z_\x80-\xff] {
  1296. yyless(4);
  1297. RETURN_TOKEN_WITH_IDENT(T_ENUM);
  1298. }
  1299. <ST_IN_SCRIPTING>"extends" {
  1300. RETURN_TOKEN_WITH_IDENT(T_EXTENDS);
  1301. }
  1302. <ST_IN_SCRIPTING>"implements" {
  1303. RETURN_TOKEN_WITH_IDENT(T_IMPLEMENTS);
  1304. }
  1305. <ST_IN_SCRIPTING>"->" {
  1306. yy_push_state(ST_LOOKING_FOR_PROPERTY);
  1307. RETURN_TOKEN(T_OBJECT_OPERATOR);
  1308. }
  1309. <ST_IN_SCRIPTING>"?->" {
  1310. yy_push_state(ST_LOOKING_FOR_PROPERTY);
  1311. RETURN_TOKEN(T_NULLSAFE_OBJECT_OPERATOR);
  1312. }
  1313. <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
  1314. goto return_whitespace;
  1315. }
  1316. <ST_LOOKING_FOR_PROPERTY>"->" {
  1317. RETURN_TOKEN(T_OBJECT_OPERATOR);
  1318. }
  1319. <ST_LOOKING_FOR_PROPERTY>"?->" {
  1320. RETURN_TOKEN(T_NULLSAFE_OBJECT_OPERATOR);
  1321. }
  1322. <ST_LOOKING_FOR_PROPERTY>{LABEL} {
  1323. yy_pop_state();
  1324. RETURN_TOKEN_WITH_STR(T_STRING, 0);
  1325. }
  1326. <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
  1327. yyless(0);
  1328. yy_pop_state();
  1329. goto restart;
  1330. }
  1331. <ST_IN_SCRIPTING>"::" {
  1332. RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
  1333. }
  1334. <ST_IN_SCRIPTING>"..." {
  1335. RETURN_TOKEN(T_ELLIPSIS);
  1336. }
  1337. <ST_IN_SCRIPTING>"??" {
  1338. RETURN_TOKEN(T_COALESCE);
  1339. }
  1340. <ST_IN_SCRIPTING>"new" {
  1341. RETURN_TOKEN_WITH_IDENT(T_NEW);
  1342. }
  1343. <ST_IN_SCRIPTING>"clone" {
  1344. RETURN_TOKEN_WITH_IDENT(T_CLONE);
  1345. }
  1346. <ST_IN_SCRIPTING>"var" {
  1347. RETURN_TOKEN_WITH_IDENT(T_VAR);
  1348. }
  1349. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
  1350. RETURN_TOKEN(T_INT_CAST);
  1351. }
  1352. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("double"|"float"){TABS_AND_SPACES}")" {
  1353. RETURN_TOKEN(T_DOUBLE_CAST);
  1354. }
  1355. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"real"{TABS_AND_SPACES}")" {
  1356. if (PARSER_MODE()) {
  1357. zend_throw_exception(zend_ce_parse_error, "The (real) cast has been removed, use (float) instead", 0);
  1358. RETURN_TOKEN(T_ERROR);
  1359. }
  1360. RETURN_TOKEN(T_DOUBLE_CAST);
  1361. }
  1362. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
  1363. RETURN_TOKEN(T_STRING_CAST);
  1364. }
  1365. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
  1366. RETURN_TOKEN(T_ARRAY_CAST);
  1367. }
  1368. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
  1369. RETURN_TOKEN(T_OBJECT_CAST);
  1370. }
  1371. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
  1372. RETURN_TOKEN(T_BOOL_CAST);
  1373. }
  1374. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
  1375. RETURN_TOKEN(T_UNSET_CAST);
  1376. }
  1377. <ST_IN_SCRIPTING>"eval" {
  1378. RETURN_TOKEN_WITH_IDENT(T_EVAL);
  1379. }
  1380. <ST_IN_SCRIPTING>"include" {
  1381. RETURN_TOKEN_WITH_IDENT(T_INCLUDE);
  1382. }
  1383. <ST_IN_SCRIPTING>"include_once" {
  1384. RETURN_TOKEN_WITH_IDENT(T_INCLUDE_ONCE);
  1385. }
  1386. <ST_IN_SCRIPTING>"require" {
  1387. RETURN_TOKEN_WITH_IDENT(T_REQUIRE);
  1388. }
  1389. <ST_IN_SCRIPTING>"require_once" {
  1390. RETURN_TOKEN_WITH_IDENT(T_REQUIRE_ONCE);
  1391. }
  1392. <ST_IN_SCRIPTING>"namespace" {
  1393. RETURN_TOKEN_WITH_IDENT(T_NAMESPACE);
  1394. }
  1395. <ST_IN_SCRIPTING>"use" {
  1396. RETURN_TOKEN_WITH_IDENT(T_USE);
  1397. }
  1398. <ST_IN_SCRIPTING>"insteadof" {
  1399. RETURN_TOKEN_WITH_IDENT(T_INSTEADOF);
  1400. }
  1401. <ST_IN_SCRIPTING>"global" {
  1402. RETURN_TOKEN_WITH_IDENT(T_GLOBAL);
  1403. }
  1404. <ST_IN_SCRIPTING>"isset" {
  1405. RETURN_TOKEN_WITH_IDENT(T_ISSET);
  1406. }
  1407. <ST_IN_SCRIPTING>"empty" {
  1408. RETURN_TOKEN_WITH_IDENT(T_EMPTY);
  1409. }
  1410. <ST_IN_SCRIPTING>"__halt_compiler" {
  1411. RETURN_TOKEN_WITH_IDENT(T_HALT_COMPILER);
  1412. }
  1413. <ST_IN_SCRIPTING>"static" {
  1414. RETURN_TOKEN_WITH_IDENT(T_STATIC);
  1415. }
  1416. <ST_IN_SCRIPTING>"abstract" {
  1417. RETURN_TOKEN_WITH_IDENT(T_ABSTRACT);
  1418. }
  1419. <ST_IN_SCRIPTING>"final" {
  1420. RETURN_TOKEN_WITH_IDENT(T_FINAL);
  1421. }
  1422. <ST_IN_SCRIPTING>"private" {
  1423. RETURN_TOKEN_WITH_IDENT(T_PRIVATE);
  1424. }
  1425. <ST_IN_SCRIPTING>"protected" {
  1426. RETURN_TOKEN_WITH_IDENT(T_PROTECTED);
  1427. }
  1428. <ST_IN_SCRIPTING>"public" {
  1429. RETURN_TOKEN_WITH_IDENT(T_PUBLIC);
  1430. }
  1431. <ST_IN_SCRIPTING>"readonly" {
  1432. RETURN_TOKEN_WITH_IDENT(T_READONLY);
  1433. }
  1434. /* Don't treat "readonly(" as a keyword, to allow using it as a function name. */
  1435. <ST_IN_SCRIPTING>"readonly"[ \n\r\t]*"(" {
  1436. yyless(strlen("readonly"));
  1437. RETURN_TOKEN_WITH_STR(T_STRING, 0);
  1438. }
  1439. <ST_IN_SCRIPTING>"unset" {
  1440. RETURN_TOKEN_WITH_IDENT(T_UNSET);
  1441. }
  1442. <ST_IN_SCRIPTING>"=>" {
  1443. RETURN_TOKEN(T_DOUBLE_ARROW);
  1444. }
  1445. <ST_IN_SCRIPTING>"list" {
  1446. RETURN_TOKEN_WITH_IDENT(T_LIST);
  1447. }
  1448. <ST_IN_SCRIPTING>"array" {
  1449. RETURN_TOKEN_WITH_IDENT(T_ARRAY);
  1450. }
  1451. <ST_IN_SCRIPTING>"callable" {
  1452. RETURN_TOKEN_WITH_IDENT(T_CALLABLE);
  1453. }
  1454. <ST_IN_SCRIPTING>"++" {
  1455. RETURN_TOKEN(T_INC);
  1456. }
  1457. <ST_IN_SCRIPTING>"--" {
  1458. RETURN_TOKEN(T_DEC);
  1459. }
  1460. <ST_IN_SCRIPTING>"===" {
  1461. RETURN_TOKEN(T_IS_IDENTICAL);
  1462. }
  1463. <ST_IN_SCRIPTING>"!==" {
  1464. RETURN_TOKEN(T_IS_NOT_IDENTICAL);
  1465. }
  1466. <ST_IN_SCRIPTING>"==" {
  1467. RETURN_TOKEN(T_IS_EQUAL);
  1468. }
  1469. <ST_IN_SCRIPTING>"!="|"<>" {
  1470. RETURN_TOKEN(T_IS_NOT_EQUAL);
  1471. }
  1472. <ST_IN_SCRIPTING>"<=>" {
  1473. RETURN_TOKEN(T_SPACESHIP);
  1474. }
  1475. <ST_IN_SCRIPTING>"<=" {
  1476. RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL);
  1477. }
  1478. <ST_IN_SCRIPTING>">=" {
  1479. RETURN_TOKEN(T_IS_GREATER_OR_EQUAL);
  1480. }
  1481. <ST_IN_SCRIPTING>"+=" {
  1482. RETURN_TOKEN(T_PLUS_EQUAL);
  1483. }
  1484. <ST_IN_SCRIPTING>"-=" {
  1485. RETURN_TOKEN(T_MINUS_EQUAL);
  1486. }
  1487. <ST_IN_SCRIPTING>"*=" {
  1488. RETURN_TOKEN(T_MUL_EQUAL);
  1489. }
  1490. <ST_IN_SCRIPTING>"*\*" {
  1491. RETURN_TOKEN(T_POW);
  1492. }
  1493. <ST_IN_SCRIPTING>"*\*=" {
  1494. RETURN_TOKEN(T_POW_EQUAL);
  1495. }
  1496. <ST_IN_SCRIPTING>"/=" {
  1497. RETURN_TOKEN(T_DIV_EQUAL);
  1498. }
  1499. <ST_IN_SCRIPTING>".=" {
  1500. RETURN_TOKEN(T_CONCAT_EQUAL);
  1501. }
  1502. <ST_IN_SCRIPTING>"%=" {
  1503. RETURN_TOKEN(T_MOD_EQUAL);
  1504. }
  1505. <ST_IN_SCRIPTING>"<<=" {
  1506. RETURN_TOKEN(T_SL_EQUAL);
  1507. }
  1508. <ST_IN_SCRIPTING>">>=" {
  1509. RETURN_TOKEN(T_SR_EQUAL);
  1510. }
  1511. <ST_IN_SCRIPTING>"&=" {
  1512. RETURN_TOKEN(T_AND_EQUAL);
  1513. }
  1514. <ST_IN_SCRIPTING>"|=" {
  1515. RETURN_TOKEN(T_OR_EQUAL);
  1516. }
  1517. <ST_IN_SCRIPTING>"^=" {
  1518. RETURN_TOKEN(T_XOR_EQUAL);
  1519. }
  1520. <ST_IN_SCRIPTING>"??=" {
  1521. RETURN_TOKEN(T_COALESCE_EQUAL);
  1522. }
  1523. <ST_IN_SCRIPTING>"||" {
  1524. RETURN_TOKEN(T_BOOLEAN_OR);
  1525. }
  1526. <ST_IN_SCRIPTING>"&&" {
  1527. RETURN_TOKEN(T_BOOLEAN_AND);
  1528. }
  1529. <ST_IN_SCRIPTING>"OR" {
  1530. RETURN_TOKEN_WITH_IDENT(T_LOGICAL_OR);
  1531. }
  1532. <ST_IN_SCRIPTING>"AND" {
  1533. RETURN_TOKEN_WITH_IDENT(T_LOGICAL_AND);
  1534. }
  1535. <ST_IN_SCRIPTING>"XOR" {
  1536. RETURN_TOKEN_WITH_IDENT(T_LOGICAL_XOR);
  1537. }
  1538. <ST_IN_SCRIPTING>"<<" {
  1539. RETURN_TOKEN(T_SL);
  1540. }
  1541. <ST_IN_SCRIPTING>">>" {
  1542. RETURN_TOKEN(T_SR);
  1543. }
  1544. <ST_IN_SCRIPTING>"&"[ \t\r\n]*("$"|"...") {
  1545. yyless(1);
  1546. RETURN_TOKEN(T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG);
  1547. }
  1548. <ST_IN_SCRIPTING>"&" {
  1549. RETURN_TOKEN(T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG);
  1550. }
  1551. <ST_IN_SCRIPTING>"]"|")" {
  1552. /* Check that ] and ) match up properly with a preceding [ or ( */
  1553. RETURN_EXIT_NESTING_TOKEN(yytext[0]);
  1554. }
  1555. <ST_IN_SCRIPTING>"["|"(" {
  1556. enter_nesting(yytext[0]);
  1557. RETURN_TOKEN(yytext[0]);
  1558. }
  1559. <ST_IN_SCRIPTING>{TOKENS} {
  1560. RETURN_TOKEN(yytext[0]);
  1561. }
  1562. <ST_IN_SCRIPTING>"{" {
  1563. yy_push_state(ST_IN_SCRIPTING);
  1564. enter_nesting('{');
  1565. RETURN_TOKEN('{');
  1566. }
  1567. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
  1568. yy_push_state(ST_LOOKING_FOR_VARNAME);
  1569. enter_nesting('{');
  1570. RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
  1571. }
  1572. <ST_IN_SCRIPTING>"}" {
  1573. RESET_DOC_COMMENT();
  1574. if (!zend_stack_is_empty(&SCNG(state_stack))) {
  1575. yy_pop_state();
  1576. }
  1577. RETURN_EXIT_NESTING_TOKEN('}');
  1578. }
  1579. <ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
  1580. yyless(yyleng - 1);
  1581. yy_pop_state();
  1582. yy_push_state(ST_IN_SCRIPTING);
  1583. RETURN_TOKEN_WITH_STR(T_STRING_VARNAME, 0);
  1584. }
  1585. <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
  1586. yyless(0);
  1587. yy_pop_state();
  1588. yy_push_state(ST_IN_SCRIPTING);
  1589. goto restart;
  1590. }
  1591. <ST_IN_SCRIPTING>{BNUM} {
  1592. /* The +/- 2 skips "0b" */
  1593. size_t len = yyleng - 2;
  1594. char *end, *bin = yytext + 2;
  1595. bool contains_underscores;
  1596. /* Skip any leading 0s */
  1597. while (len > 0 && (*bin == '0' || *bin == '_')) {
  1598. ++bin;
  1599. --len;
  1600. }
  1601. contains_underscores = (memchr(bin, '_', len) != NULL);
  1602. if (contains_underscores) {
  1603. bin = estrndup(bin, len);
  1604. strip_underscores(bin, &len);
  1605. }
  1606. if (len < SIZEOF_ZEND_LONG * 8) {
  1607. if (len == 0) {
  1608. ZVAL_LONG(zendlval, 0);
  1609. } else {
  1610. errno = 0;
  1611. ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
  1612. ZEND_ASSERT(!errno && end == bin + len);
  1613. }
  1614. if (contains_underscores) {
  1615. efree(bin);
  1616. }
  1617. RETURN_TOKEN_WITH_VAL(T_LNUMBER);
  1618. } else {
  1619. ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
  1620. /* errno isn't checked since we allow HUGE_VAL/INF overflow */
  1621. ZEND_ASSERT(end == bin + len);
  1622. if (contains_underscores) {
  1623. efree(bin);
  1624. }
  1625. RETURN_TOKEN_WITH_VAL(T_DNUMBER);
  1626. }
  1627. }
  1628. <ST_IN_SCRIPTING>{ONUM} {
  1629. /* The +/- 2 skips "0o" */
  1630. size_t len = yyleng - 2;
  1631. char *end, *octal = yytext + 2;
  1632. bool contains_underscores = (memchr(octal, '_', len) != NULL);
  1633. /* Skip any leading 0s */
  1634. while (len > 0 && (*octal == '0' || *octal == '_')) {
  1635. ++octal;
  1636. --len;
  1637. }
  1638. if (len == 0) {
  1639. ZVAL_LONG(zendlval, 0);
  1640. RETURN_TOKEN_WITH_VAL(T_LNUMBER);
  1641. }
  1642. if (contains_underscores) {
  1643. octal = estrndup(octal, len);
  1644. strip_underscores(octal, &len);
  1645. }
  1646. errno = 0;
  1647. ZVAL_LONG(zendlval, ZEND_STRTOL(octal, &end, 8));
  1648. ZEND_ASSERT(end == octal + len);
  1649. if (!errno) {
  1650. if (contains_underscores) {
  1651. efree(octal);
  1652. }
  1653. RETURN_TOKEN_WITH_VAL(T_LNUMBER);
  1654. }
  1655. /* Overflow */
  1656. ZEND_ASSERT(errno == ERANGE);
  1657. /* Reset errno */
  1658. errno = 0;
  1659. /* zend_oct_strtod skips leading '0' */
  1660. ZVAL_DOUBLE(zendlval, zend_oct_strtod(octal, (const char **)&end));
  1661. ZEND_ASSERT(!errno);
  1662. ZEND_ASSERT(end == octal + len);
  1663. if (contains_underscores) {
  1664. efree(octal);
  1665. }
  1666. RETURN_TOKEN_WITH_VAL(T_DNUMBER);
  1667. }
  1668. <ST_IN_SCRIPTING>{LNUM} {
  1669. size_t len = yyleng;
  1670. char *end, *lnum = yytext;
  1671. bool is_octal = lnum[0] == '0';
  1672. bool contains_underscores = (memchr(lnum, '_', len) != NULL);
  1673. if (contains_underscores) {
  1674. lnum = estrndup(lnum, len);
  1675. strip_underscores(lnum, &len);
  1676. }
  1677. /* Digits 8 and 9 are illegal in octal literals. */
  1678. if (is_octal) {
  1679. size_t i;
  1680. for (i = 0; i < len; i++) {
  1681. if (lnum[i] == '8' || lnum[i] == '9') {
  1682. zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
  1683. if (PARSER_MODE()) {
  1684. if (contains_underscores) {
  1685. efree(lnum);
  1686. }
  1687. ZVAL_UNDEF(zendlval);
  1688. RETURN_TOKEN(T_ERROR);
  1689. }
  1690. /* Continue in order to determine if this is T_LNUMBER or T_DNUMBER. */
  1691. len = i;
  1692. break;
  1693. }
  1694. }
  1695. }
  1696. if (len < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
  1697. errno = 0;
  1698. /* base must be passed explicitly for correct parse error on Windows */
  1699. ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, is_octal ? 8 : 10));
  1700. ZEND_ASSERT(end == lnum + len);
  1701. } else {
  1702. errno = 0;
  1703. ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, 0));
  1704. if (errno == ERANGE) { /* Overflow */
  1705. errno = 0;
  1706. if (is_octal) { /* octal overflow */
  1707. ZVAL_DOUBLE(zendlval, zend_oct_strtod(lnum, (const char **)&end));
  1708. } else {
  1709. ZVAL_DOUBLE(zendlval, zend_strtod(lnum, (const char **)&end));
  1710. }
  1711. ZEND_ASSERT(end == lnum + len);
  1712. if (contains_underscores) {
  1713. efree(lnum);
  1714. }
  1715. RETURN_TOKEN_WITH_VAL(T_DNUMBER);
  1716. }
  1717. ZEND_ASSERT(end == lnum + len);
  1718. }
  1719. ZEND_ASSERT(!errno);
  1720. if (contains_underscores) {
  1721. efree(lnum);
  1722. }
  1723. RETURN_TOKEN_WITH_VAL(T_LNUMBER);
  1724. }
  1725. <ST_IN_SCRIPTING>{HNUM} {
  1726. /* The +/- 2 skips "0x" */
  1727. size_t len = yyleng - 2;
  1728. char *end, *hex = yytext + 2;
  1729. bool contains_underscores;
  1730. /* Skip any leading 0s */
  1731. while (len > 0 && (*hex == '0' || *hex == '_')) {
  1732. ++hex;
  1733. --len;
  1734. }
  1735. contains_underscores = (memchr(hex, '_', len) != NULL);
  1736. if (contains_underscores) {
  1737. hex = estrndup(hex, len);
  1738. strip_underscores(hex, &len);
  1739. }
  1740. if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
  1741. if (len == 0) {
  1742. ZVAL_LONG(zendlval, 0);
  1743. } else {
  1744. errno = 0;
  1745. ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
  1746. ZEND_ASSERT(!errno && end == hex + len);
  1747. }
  1748. if (contains_underscores) {
  1749. efree(hex);
  1750. }
  1751. RETURN_TOKEN_WITH_VAL(T_LNUMBER);
  1752. } else {
  1753. ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
  1754. /* errno isn't checked since we allow HUGE_VAL/INF overflow */
  1755. ZEND_ASSERT(end == hex + len);
  1756. if (contains_underscores) {
  1757. efree(hex);
  1758. }
  1759. RETURN_TOKEN_WITH_VAL(T_DNUMBER);
  1760. }
  1761. }
  1762. <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
  1763. if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
  1764. char *end;
  1765. errno = 0;
  1766. ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 10));
  1767. if (errno == ERANGE) {
  1768. goto string;
  1769. }
  1770. ZEND_ASSERT(end == yytext + yyleng);
  1771. } else {
  1772. string:
  1773. ZVAL_STRINGL(zendlval, yytext, yyleng);
  1774. }
  1775. RETURN_TOKEN_WITH_VAL(T_NUM_STRING);
  1776. }
  1777. <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM}|{ONUM} { /* Offset must be treated as a string */
  1778. if (yyleng == 1) {
  1779. ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*(yytext)));
  1780. } else {
  1781. ZVAL_STRINGL(zendlval, yytext, yyleng);
  1782. }
  1783. RETURN_TOKEN_WITH_VAL(T_NUM_STRING);
  1784. }
  1785. <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
  1786. const char *end;
  1787. size_t len = yyleng;
  1788. char *dnum = yytext;
  1789. bool contains_underscores = (memchr(dnum, '_', len) != NULL);
  1790. if (contains_underscores) {
  1791. dnum = estrndup(dnum, len);
  1792. strip_underscores(dnum, &len);
  1793. }
  1794. ZVAL_DOUBLE(zendlval, zend_strtod(dnum, &end));
  1795. /* errno isn't checked since we allow HUGE_VAL/INF overflow */
  1796. ZEND_ASSERT(end == dnum + len);
  1797. if (contains_underscores) {
  1798. efree(dnum);
  1799. }
  1800. RETURN_TOKEN_WITH_VAL(T_DNUMBER);
  1801. }
  1802. <ST_IN_SCRIPTING>"__CLASS__" {
  1803. RETURN_TOKEN_WITH_IDENT(T_CLASS_C);
  1804. }
  1805. <ST_IN_SCRIPTING>"__TRAIT__" {
  1806. RETURN_TOKEN_WITH_IDENT(T_TRAIT_C);
  1807. }
  1808. <ST_IN_SCRIPTING>"__FUNCTION__" {
  1809. RETURN_TOKEN_WITH_IDENT(T_FUNC_C);
  1810. }
  1811. <ST_IN_SCRIPTING>"__METHOD__" {
  1812. RETURN_TOKEN_WITH_IDENT(T_METHOD_C);
  1813. }
  1814. <ST_IN_SCRIPTING>"__LINE__" {
  1815. RETURN_TOKEN_WITH_IDENT(T_LINE);
  1816. }
  1817. <ST_IN_SCRIPTING>"__FILE__" {
  1818. RETURN_TOKEN_WITH_IDENT(T_FILE);
  1819. }
  1820. <ST_IN_SCRIPTING>"__DIR__" {
  1821. RETURN_TOKEN_WITH_IDENT(T_DIR);
  1822. }
  1823. <ST_IN_SCRIPTING>"__NAMESPACE__" {
  1824. RETURN_TOKEN_WITH_IDENT(T_NS_C);
  1825. }
  1826. <SHEBANG>"#!" .* {NEWLINE} {
  1827. CG(zend_lineno)++;
  1828. BEGIN(INITIAL);
  1829. goto restart;
  1830. }
  1831. <SHEBANG>{ANY_CHAR} {
  1832. yyless(0);
  1833. BEGIN(INITIAL);
  1834. goto restart;
  1835. }
  1836. <INITIAL>"<?=" {
  1837. BEGIN(ST_IN_SCRIPTING);
  1838. if (PARSER_MODE()) {
  1839. /* We'll reject this as an identifier in zend_lex_tstring. */
  1840. RETURN_TOKEN_WITH_IDENT(T_ECHO);
  1841. }
  1842. RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
  1843. }
  1844. <INITIAL>"<?php"([ \t]|{NEWLINE}) {
  1845. HANDLE_NEWLINE(yytext[yyleng-1]);
  1846. BEGIN(ST_IN_SCRIPTING);
  1847. RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
  1848. }
  1849. <INITIAL>"<?php" {
  1850. /* Allow <?php followed by end of file. */
  1851. if (YYCURSOR == YYLIMIT) {
  1852. BEGIN(ST_IN_SCRIPTING);
  1853. RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
  1854. }
  1855. /* Degenerate case: <?phpX is interpreted as <? phpX with short tags. */
  1856. if (CG(short_tags)) {
  1857. yyless(2);
  1858. BEGIN(ST_IN_SCRIPTING);
  1859. RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
  1860. }
  1861. goto inline_char_handler;
  1862. }
  1863. <INITIAL>"<?" {
  1864. if (CG(short_tags)) {
  1865. BEGIN(ST_IN_SCRIPTING);
  1866. RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
  1867. } else {
  1868. goto inline_char_handler;
  1869. }
  1870. }
  1871. <INITIAL>{ANY_CHAR} {
  1872. if (YYCURSOR > YYLIMIT) {
  1873. RETURN_END_TOKEN;
  1874. }
  1875. inline_char_handler:
  1876. while (1) {
  1877. YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
  1878. YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
  1879. if (YYCURSOR >= YYLIMIT) {
  1880. break;
  1881. }
  1882. if (*YYCURSOR == '?') {
  1883. if (CG(short_tags) /* <? */
  1884. || (*(YYCURSOR + 1) == '=') /* <?= */
  1885. || (!strncasecmp((char*)YYCURSOR + 1, "php", 3) && /* <?php[ \t\r\n] */
  1886. (YYCURSOR + 4 == YYLIMIT ||
  1887. YYCURSOR[4] == ' ' || YYCURSOR[4] == '\t' ||
  1888. YYCURSOR[4] == '\n' || YYCURSOR[4] == '\r'))
  1889. ) {
  1890. YYCURSOR--;
  1891. break;
  1892. }
  1893. }
  1894. }
  1895. yyleng = YYCURSOR - SCNG(yy_text);
  1896. if (SCNG(output_filter)) {
  1897. size_t readsize;
  1898. char *s = NULL;
  1899. size_t sz = 0;
  1900. // TODO: avoid reallocation ???
  1901. readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng);
  1902. ZVAL_STRINGL(zendlval, s, sz);
  1903. efree(s);
  1904. if (readsize < yyleng) {
  1905. yyless(readsize);
  1906. }
  1907. } else if (yyleng == 1) {
  1908. ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*yytext));
  1909. } else {
  1910. ZVAL_STRINGL(zendlval, yytext, yyleng);
  1911. }
  1912. HANDLE_NEWLINES(yytext, yyleng);
  1913. RETURN_TOKEN_WITH_VAL(T_INLINE_HTML);
  1914. }
  1915. /* Make sure a label character follows "->" or "?->", otherwise there is no property
  1916. * and "->"/"?->" will be taken literally
  1917. */
  1918. <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x80-\xff] {
  1919. yyless(yyleng - 3);
  1920. yy_push_state(ST_LOOKING_FOR_PROPERTY);
  1921. RETURN_TOKEN_WITH_STR(T_VARIABLE, 1);
  1922. }
  1923. <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"?->"[a-zA-Z_\x80-\xff] {
  1924. yyless(yyleng - 4);
  1925. yy_push_state(ST_LOOKING_FOR_PROPERTY);
  1926. RETURN_TOKEN_WITH_STR(T_VARIABLE, 1);
  1927. }
  1928. /* A [ always designates a variable offset, regardless of what follows
  1929. */
  1930. <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
  1931. yyless(yyleng - 1);
  1932. yy_push_state(ST_VAR_OFFSET);
  1933. RETURN_TOKEN_WITH_STR(T_VARIABLE, 1);
  1934. }
  1935. <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
  1936. RETURN_TOKEN_WITH_STR(T_VARIABLE, 1);
  1937. }
  1938. <ST_VAR_OFFSET>"]" {
  1939. yy_pop_state();
  1940. RETURN_TOKEN(']');
  1941. }
  1942. <ST_VAR_OFFSET>{TOKENS}|[[(){}"`] {
  1943. /* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */
  1944. RETURN_TOKEN(yytext[0]);
  1945. }
  1946. <ST_VAR_OFFSET>[ \n\r\t\\'#] {
  1947. /* Invalid rule to return a more explicit parse error with proper line number */
  1948. yyless(0);
  1949. yy_pop_state();
  1950. ZVAL_NULL(zendlval);
  1951. RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
  1952. }
  1953. <ST_IN_SCRIPTING>"namespace"("\\"{LABEL})+ {
  1954. RETURN_TOKEN_WITH_STR(T_NAME_RELATIVE, sizeof("namespace\\") - 1);
  1955. }
  1956. <ST_IN_SCRIPTING>{LABEL}("\\"{LABEL})+ {
  1957. RETURN_TOKEN_WITH_STR(T_NAME_QUALIFIED, 0);
  1958. }
  1959. <ST_IN_SCRIPTING>"\\"{LABEL}("\\"{LABEL})* {
  1960. RETURN_TOKEN_WITH_STR(T_NAME_FULLY_QUALIFIED, 1);
  1961. }
  1962. <ST_IN_SCRIPTING>"\\" {
  1963. RETURN_TOKEN(T_NS_SEPARATOR);
  1964. }
  1965. <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
  1966. RETURN_TOKEN_WITH_STR(T_STRING, 0);
  1967. }
  1968. <ST_IN_SCRIPTING>"#"|"//" {
  1969. while (YYCURSOR < YYLIMIT) {
  1970. switch (*YYCURSOR++) {
  1971. case '\r':
  1972. case '\n':
  1973. YYCURSOR--;
  1974. break;
  1975. case '?':
  1976. if (*YYCURSOR == '>') {
  1977. YYCURSOR--;
  1978. break;
  1979. }
  1980. ZEND_FALLTHROUGH;
  1981. default:
  1982. continue;
  1983. }
  1984. break;
  1985. }
  1986. yyleng = YYCURSOR - SCNG(yy_text);
  1987. RETURN_OR_SKIP_TOKEN(T_COMMENT);
  1988. }
  1989. <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
  1990. int doc_com;
  1991. if (yyleng > 2) {
  1992. doc_com = 1;
  1993. RESET_DOC_COMMENT();
  1994. } else {
  1995. doc_com = 0;
  1996. }
  1997. while (YYCURSOR < YYLIMIT) {
  1998. if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
  1999. break;
  2000. }
  2001. }
  2002. if (YYCURSOR < YYLIMIT) {
  2003. YYCURSOR++;
  2004. } else {
  2005. zend_throw_exception_ex(zend_ce_parse_error, 0, "Unterminated comment starting line %d", CG(zend_lineno));
  2006. if (PARSER_MODE()) {
  2007. RETURN_TOKEN(T_ERROR);
  2008. }
  2009. }
  2010. yyleng = YYCURSOR - SCNG(yy_text);
  2011. HANDLE_NEWLINES(yytext, yyleng);
  2012. if (doc_com) {
  2013. CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
  2014. RETURN_OR_SKIP_TOKEN(T_DOC_COMMENT);
  2015. }
  2016. RETURN_OR_SKIP_TOKEN(T_COMMENT);
  2017. }
  2018. <ST_IN_SCRIPTING>"?>"{NEWLINE}? {
  2019. BEGIN(INITIAL);
  2020. if (yytext[yyleng-1] != '>') {
  2021. CG(increment_lineno) = 1;
  2022. }
  2023. if (PARSER_MODE()) {
  2024. RETURN_TOKEN(';'); /* implicit ';' at php-end tag */
  2025. }
  2026. RETURN_TOKEN(T_CLOSE_TAG);
  2027. }
  2028. <ST_IN_SCRIPTING>b?['] {
  2029. char *s, *t;
  2030. char *end;
  2031. int bprefix = (yytext[0] != '\'') ? 1 : 0;
  2032. while (1) {
  2033. if (YYCURSOR < YYLIMIT) {
  2034. if (*YYCURSOR == '\'') {
  2035. YYCURSOR++;
  2036. yyleng = YYCURSOR - SCNG(yy_text);
  2037. break;
  2038. } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
  2039. YYCURSOR++;
  2040. }
  2041. } else {
  2042. yyleng = YYLIMIT - SCNG(yy_text);
  2043. /* Unclosed single quotes; treat similar to double quotes, but without a separate token
  2044. * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
  2045. * rule, which continued in ST_IN_SCRIPTING state after the quote */
  2046. ZVAL_NULL(zendlval);
  2047. RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
  2048. }
  2049. }
  2050. if (yyleng-bprefix-2 <= 1) {
  2051. if (yyleng-bprefix-2 < 1) {
  2052. ZVAL_EMPTY_STRING(zendlval);
  2053. } else {
  2054. zend_uchar c = (zend_uchar)*(yytext+bprefix+1);
  2055. if (c == '\n' || c == '\r') {
  2056. CG(zend_lineno)++;
  2057. }
  2058. ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR(c));
  2059. }
  2060. goto skip_escape_conversion;
  2061. }
  2062. ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
  2063. /* convert escape sequences */
  2064. s = Z_STRVAL_P(zendlval);
  2065. end = s+Z_STRLEN_P(zendlval);
  2066. while (1) {
  2067. if (UNEXPECTED(*s=='\\')) {
  2068. break;
  2069. }
  2070. if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
  2071. CG(zend_lineno)++;
  2072. }
  2073. s++;
  2074. if (s == end) {
  2075. goto skip_escape_conversion;
  2076. }
  2077. }
  2078. t = s;
  2079. while (s<end) {
  2080. if (*s=='\\') {
  2081. s++;
  2082. if (*s == '\\' || *s == '\'') {
  2083. *t++ = *s;
  2084. } else {
  2085. *t++ = '\\';
  2086. *t++ = *s;
  2087. }
  2088. } else {
  2089. *t++ = *s;
  2090. }
  2091. if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
  2092. CG(zend_lineno)++;
  2093. }
  2094. s++;
  2095. }
  2096. *t = 0;
  2097. Z_STRLEN_P(zendlval) = t - Z_STRVAL_P(zendlval);
  2098. skip_escape_conversion:
  2099. if (SCNG(output_filter)) {
  2100. size_t sz = 0;
  2101. char *str = NULL;
  2102. zend_string *new_str;
  2103. s = Z_STRVAL_P(zendlval);
  2104. // TODO: avoid reallocation ???
  2105. SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
  2106. new_str = zend_string_init(str, sz, 0);
  2107. if (str != s) {
  2108. efree(str);
  2109. }
  2110. zend_string_release_ex(Z_STR_P(zendlval), 0);
  2111. ZVAL_STR(zendlval, new_str);
  2112. }
  2113. RETURN_TOKEN_WITH_VAL(T_CONSTANT_ENCAPSED_STRING);
  2114. }
  2115. <ST_IN_SCRIPTING>b?["] {
  2116. int bprefix = (yytext[0] != '"') ? 1 : 0;
  2117. while (YYCURSOR < YYLIMIT) {
  2118. switch (*YYCURSOR++) {
  2119. case '"':
  2120. yyleng = YYCURSOR - SCNG(yy_text);
  2121. if (EXPECTED(zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"') == SUCCESS)
  2122. || !PARSER_MODE()) {
  2123. RETURN_TOKEN_WITH_VAL(T_CONSTANT_ENCAPSED_STRING);
  2124. } else {
  2125. RETURN_TOKEN(T_ERROR);
  2126. }
  2127. case '$':
  2128. if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
  2129. break;
  2130. }
  2131. continue;
  2132. case '{':
  2133. if (*YYCURSOR == '$') {
  2134. break;
  2135. }
  2136. continue;
  2137. case '\\':
  2138. if (YYCURSOR < YYLIMIT) {
  2139. YYCURSOR++;
  2140. }
  2141. ZEND_FALLTHROUGH;
  2142. default:
  2143. continue;
  2144. }
  2145. YYCURSOR--;
  2146. break;
  2147. }
  2148. /* Remember how much was scanned to save rescanning */
  2149. SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
  2150. YYCURSOR = SCNG(yy_text) + yyleng;
  2151. BEGIN(ST_DOUBLE_QUOTES);
  2152. RETURN_TOKEN('"');
  2153. }
  2154. <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
  2155. char *s;
  2156. unsigned char *saved_cursor;
  2157. int bprefix = (yytext[0] != '<') ? 1 : 0, spacing = 0, indentation = 0;
  2158. zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
  2159. bool is_heredoc = 1;
  2160. CG(zend_lineno)++;
  2161. heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
  2162. s = yytext+bprefix+3;
  2163. while ((*s == ' ') || (*s == '\t')) {
  2164. s++;
  2165. heredoc_label->length--;
  2166. }
  2167. if (*s == '\'') {
  2168. s++;
  2169. heredoc_label->length -= 2;
  2170. is_heredoc = 0;
  2171. BEGIN(ST_NOWDOC);
  2172. } else {
  2173. if (*s == '"') {
  2174. s++;
  2175. heredoc_label->length -= 2;
  2176. }
  2177. BEGIN(ST_HEREDOC);
  2178. }
  2179. heredoc_label->label = estrndup(s, heredoc_label->length);
  2180. heredoc_label->indentation_uses_spaces = 0;
  2181. heredoc_label->indentation = 0;
  2182. saved_cursor = YYCURSOR;
  2183. zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
  2184. while (YYCURSOR < YYLIMIT && (*YYCURSOR == ' ' || *YYCURSOR == '\t')) {
  2185. if (*YYCURSOR == '\t') {
  2186. spacing |= HEREDOC_USING_TABS;
  2187. } else {
  2188. spacing |= HEREDOC_USING_SPACES;
  2189. }
  2190. ++YYCURSOR;
  2191. ++indentation;
  2192. }
  2193. if (YYCURSOR == YYLIMIT) {
  2194. YYCURSOR = saved_cursor;
  2195. RETURN_TOKEN(T_START_HEREDOC);
  2196. }
  2197. /* Check for ending label on the next line */
  2198. if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
  2199. if (!IS_LABEL_SUCCESSOR(YYCURSOR[heredoc_label->length])) {
  2200. if (spacing == (HEREDOC_USING_SPACES | HEREDOC_USING_TABS)) {
  2201. zend_throw_exception(zend_ce_parse_error, "Invalid indentation - tabs and spaces cannot be mixed", 0);
  2202. if (PARSER_MODE()) {
  2203. RETURN_TOKEN(T_ERROR);
  2204. }
  2205. }
  2206. YYCURSOR = saved_cursor;
  2207. heredoc_label->indentation = indentation;
  2208. BEGIN(ST_END_HEREDOC);
  2209. RETURN_TOKEN(T_START_HEREDOC);
  2210. }
  2211. }
  2212. YYCURSOR = saved_cursor;
  2213. if (is_heredoc && !SCNG(heredoc_scan_ahead)) {
  2214. zend_lex_state current_state;
  2215. zend_string *saved_doc_comment = CG(doc_comment);
  2216. int heredoc_nesting_level = 1;
  2217. int first_token = 0;
  2218. int error = 0;
  2219. zend_save_lexical_state(&current_state);
  2220. SCNG(heredoc_scan_ahead) = 1;
  2221. SCNG(heredoc_indentation) = 0;
  2222. SCNG(heredoc_indentation_uses_spaces) = 0;
  2223. LANG_SCNG(on_event) = NULL;
  2224. CG(doc_comment) = NULL;
  2225. zend_ptr_stack_reverse_apply(&current_state.heredoc_label_stack, copy_heredoc_label_stack);
  2226. zend_exception_save();
  2227. while (heredoc_nesting_level) {
  2228. zval zv;
  2229. int retval;
  2230. ZVAL_UNDEF(&zv);
  2231. retval = lex_scan(&zv, NULL);
  2232. zval_ptr_dtor_nogc(&zv);
  2233. if (EG(exception)) {
  2234. zend_clear_exception();
  2235. break;
  2236. }
  2237. if (!first_token) {
  2238. first_token = retval;
  2239. }
  2240. switch (retval) {
  2241. case T_START_HEREDOC:
  2242. ++heredoc_nesting_level;
  2243. break;
  2244. case T_END_HEREDOC:
  2245. --heredoc_nesting_level;
  2246. break;
  2247. case END:
  2248. heredoc_nesting_level = 0;
  2249. }
  2250. }
  2251. zend_exception_restore();
  2252. if (
  2253. (first_token == T_VARIABLE
  2254. || first_token == T_DOLLAR_OPEN_CURLY_BRACES
  2255. || first_token == T_CURLY_OPEN
  2256. ) && SCNG(heredoc_indentation)) {
  2257. zend_throw_exception_ex(zend_ce_parse_error, 0, "Invalid body indentation level (expecting an indentation level of at least %d)", SCNG(heredoc_indentation));
  2258. error = 1;
  2259. }
  2260. heredoc_label->indentation = SCNG(heredoc_indentation);
  2261. heredoc_label->indentation_uses_spaces = SCNG(heredoc_indentation_uses_spaces);
  2262. zend_restore_lexical_state(&current_state);
  2263. SCNG(heredoc_scan_ahead) = 0;
  2264. CG(increment_lineno) = 0;
  2265. CG(doc_comment) = saved_doc_comment;
  2266. if (PARSER_MODE() && error) {
  2267. RETURN_TOKEN(T_ERROR);
  2268. }
  2269. }
  2270. RETURN_TOKEN(T_START_HEREDOC);
  2271. }
  2272. <ST_IN_SCRIPTING>[`] {
  2273. BEGIN(ST_BACKQUOTE);
  2274. RETURN_TOKEN('`');
  2275. }
  2276. <ST_END_HEREDOC>{ANY_CHAR} {
  2277. zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
  2278. yyleng = heredoc_label->indentation + heredoc_label->length;
  2279. YYCURSOR += yyleng - 1;
  2280. heredoc_label_dtor(heredoc_label);
  2281. efree(heredoc_label);
  2282. BEGIN(ST_IN_SCRIPTING);
  2283. RETURN_TOKEN(T_END_HEREDOC);
  2284. }
  2285. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
  2286. yy_push_state(ST_IN_SCRIPTING);
  2287. yyless(1);
  2288. enter_nesting('{');
  2289. RETURN_TOKEN(T_CURLY_OPEN);
  2290. }
  2291. <ST_DOUBLE_QUOTES>["] {
  2292. BEGIN(ST_IN_SCRIPTING);
  2293. RETURN_TOKEN('"');
  2294. }
  2295. <ST_BACKQUOTE>[`] {
  2296. BEGIN(ST_IN_SCRIPTING);
  2297. RETURN_TOKEN('`');
  2298. }
  2299. <ST_DOUBLE_QUOTES>{ANY_CHAR} {
  2300. if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
  2301. YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
  2302. SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
  2303. goto double_quotes_scan_done;
  2304. }
  2305. if (YYCURSOR > YYLIMIT) {
  2306. RETURN_END_TOKEN;
  2307. }
  2308. if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
  2309. YYCURSOR++;
  2310. }
  2311. while (YYCURSOR < YYLIMIT) {
  2312. switch (*YYCURSOR++) {
  2313. case '"':
  2314. break;
  2315. case '$':
  2316. if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
  2317. break;
  2318. }
  2319. continue;
  2320. case '{':
  2321. if (*YYCURSOR == '$') {
  2322. break;
  2323. }
  2324. continue;
  2325. case '\\':
  2326. if (YYCURSOR < YYLIMIT) {
  2327. YYCURSOR++;
  2328. }
  2329. ZEND_FALLTHROUGH;
  2330. default:
  2331. continue;
  2332. }
  2333. YYCURSOR--;
  2334. break;
  2335. }
  2336. double_quotes_scan_done:
  2337. yyleng = YYCURSOR - SCNG(yy_text);
  2338. if (EXPECTED(zend_scan_escape_string(zendlval, yytext, yyleng, '"') == SUCCESS)
  2339. || !PARSER_MODE()) {
  2340. RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
  2341. } else {
  2342. RETURN_TOKEN(T_ERROR);
  2343. }
  2344. }
  2345. <ST_BACKQUOTE>{ANY_CHAR} {
  2346. if (YYCURSOR > YYLIMIT) {
  2347. RETURN_END_TOKEN;
  2348. }
  2349. if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
  2350. YYCURSOR++;
  2351. }
  2352. while (YYCURSOR < YYLIMIT) {
  2353. switch (*YYCURSOR++) {
  2354. case '`':
  2355. break;
  2356. case '$':
  2357. if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
  2358. break;
  2359. }
  2360. continue;
  2361. case '{':
  2362. if (*YYCURSOR == '$') {
  2363. break;
  2364. }
  2365. continue;
  2366. case '\\':
  2367. if (YYCURSOR < YYLIMIT) {
  2368. YYCURSOR++;
  2369. }
  2370. ZEND_FALLTHROUGH;
  2371. default:
  2372. continue;
  2373. }
  2374. YYCURSOR--;
  2375. break;
  2376. }
  2377. yyleng = YYCURSOR - SCNG(yy_text);
  2378. if (EXPECTED(zend_scan_escape_string(zendlval, yytext, yyleng, '`') == SUCCESS)
  2379. || !PARSER_MODE()) {
  2380. RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
  2381. } else {
  2382. RETURN_TOKEN(T_ERROR);
  2383. }
  2384. }
  2385. <ST_HEREDOC>{ANY_CHAR} {
  2386. zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
  2387. int newline = 0, indentation = 0, spacing = 0;
  2388. if (YYCURSOR > YYLIMIT) {
  2389. RETURN_END_TOKEN;
  2390. }
  2391. YYCURSOR--;
  2392. while (YYCURSOR < YYLIMIT) {
  2393. switch (*YYCURSOR++) {
  2394. case '\r':
  2395. if (*YYCURSOR == '\n') {
  2396. YYCURSOR++;
  2397. }
  2398. ZEND_FALLTHROUGH;
  2399. case '\n':
  2400. indentation = spacing = 0;
  2401. while (YYCURSOR < YYLIMIT && (*YYCURSOR == ' ' || *YYCURSOR == '\t')) {
  2402. if (*YYCURSOR == '\t') {
  2403. spacing |= HEREDOC_USING_TABS;
  2404. } else {
  2405. spacing |= HEREDOC_USING_SPACES;
  2406. }
  2407. ++YYCURSOR;
  2408. ++indentation;
  2409. }
  2410. if (YYCURSOR == YYLIMIT) {
  2411. yyleng = YYCURSOR - SCNG(yy_text);
  2412. HANDLE_NEWLINES(yytext, yyleng);
  2413. ZVAL_NULL(zendlval);
  2414. RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
  2415. }
  2416. /* Check for ending label on the next line */
  2417. if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
  2418. if (IS_LABEL_SUCCESSOR(YYCURSOR[heredoc_label->length])) {
  2419. continue;
  2420. }
  2421. if (spacing == (HEREDOC_USING_SPACES | HEREDOC_USING_TABS)) {
  2422. zend_throw_exception(zend_ce_parse_error, "Invalid indentation - tabs and spaces cannot be mixed", 0);
  2423. if (PARSER_MODE()) {
  2424. RETURN_TOKEN(T_ERROR);
  2425. }
  2426. }
  2427. /* newline before label will be subtracted from returned text, but
  2428. * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
  2429. if (YYCURSOR[-indentation - 2] == '\r' && YYCURSOR[-indentation - 1] == '\n') {
  2430. newline = 2; /* Windows newline */
  2431. } else {
  2432. newline = 1;
  2433. }
  2434. CG(increment_lineno) = 1; /* For newline before label */
  2435. if (SCNG(heredoc_scan_ahead)) {
  2436. SCNG(heredoc_indentation) = indentation;
  2437. SCNG(heredoc_indentation_uses_spaces) = (spacing == HEREDOC_USING_SPACES);
  2438. } else {
  2439. YYCURSOR -= indentation;
  2440. }
  2441. BEGIN(ST_END_HEREDOC);
  2442. goto heredoc_scan_done;
  2443. }
  2444. continue;
  2445. case '$':
  2446. if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
  2447. break;
  2448. }
  2449. continue;
  2450. case '{':
  2451. if (*YYCURSOR == '$') {
  2452. break;
  2453. }
  2454. continue;
  2455. case '\\':
  2456. if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
  2457. YYCURSOR++;
  2458. }
  2459. ZEND_FALLTHROUGH;
  2460. default:
  2461. continue;
  2462. }
  2463. YYCURSOR--;
  2464. break;
  2465. }
  2466. heredoc_scan_done:
  2467. yyleng = YYCURSOR - SCNG(yy_text);
  2468. ZVAL_STRINGL(zendlval, yytext, yyleng - newline);
  2469. if (!SCNG(heredoc_scan_ahead) && !EG(exception) && PARSER_MODE()) {
  2470. bool newline_at_start = *(yytext - 1) == '\n' || *(yytext - 1) == '\r';
  2471. zend_string *copy = Z_STR_P(zendlval);
  2472. if (!strip_multiline_string_indentation(
  2473. zendlval, heredoc_label->indentation, heredoc_label->indentation_uses_spaces,
  2474. newline_at_start, newline != 0)) {
  2475. RETURN_TOKEN(T_ERROR);
  2476. }
  2477. if (UNEXPECTED(zend_scan_escape_string(zendlval, ZSTR_VAL(copy), ZSTR_LEN(copy), 0) != SUCCESS)) {
  2478. zend_string_efree(copy);
  2479. RETURN_TOKEN(T_ERROR);
  2480. }
  2481. zend_string_efree(copy);
  2482. } else {
  2483. HANDLE_NEWLINES(yytext, yyleng - newline);
  2484. }
  2485. RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
  2486. }
  2487. <ST_NOWDOC>{ANY_CHAR} {
  2488. zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
  2489. int newline = 0, indentation = 0, spacing = -1;
  2490. if (YYCURSOR > YYLIMIT) {
  2491. RETURN_END_TOKEN;
  2492. }
  2493. YYCURSOR--;
  2494. while (YYCURSOR < YYLIMIT) {
  2495. switch (*YYCURSOR++) {
  2496. case '\r':
  2497. if (*YYCURSOR == '\n') {
  2498. YYCURSOR++;
  2499. }
  2500. ZEND_FALLTHROUGH;
  2501. case '\n':
  2502. indentation = spacing = 0;
  2503. while (YYCURSOR < YYLIMIT && (*YYCURSOR == ' ' || *YYCURSOR == '\t')) {
  2504. if (*YYCURSOR == '\t') {
  2505. spacing |= HEREDOC_USING_TABS;
  2506. } else {
  2507. spacing |= HEREDOC_USING_SPACES;
  2508. }
  2509. ++YYCURSOR;
  2510. ++indentation;
  2511. }
  2512. if (YYCURSOR == YYLIMIT) {
  2513. yyleng = YYCURSOR - SCNG(yy_text);
  2514. HANDLE_NEWLINES(yytext, yyleng);
  2515. ZVAL_NULL(zendlval);
  2516. RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
  2517. }
  2518. /* Check for ending label on the next line */
  2519. if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
  2520. if (IS_LABEL_SUCCESSOR(YYCURSOR[heredoc_label->length])) {
  2521. continue;
  2522. }
  2523. if (spacing == (HEREDOC_USING_SPACES | HEREDOC_USING_TABS)) {
  2524. zend_throw_exception(zend_ce_parse_error, "Invalid indentation - tabs and spaces cannot be mixed", 0);
  2525. if (PARSER_MODE()) {
  2526. RETURN_TOKEN(T_ERROR);
  2527. }
  2528. }
  2529. /* newline before label will be subtracted from returned text, but
  2530. * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
  2531. if (YYCURSOR[-indentation - 2] == '\r' && YYCURSOR[-indentation - 1] == '\n') {
  2532. newline = 2; /* Windows newline */
  2533. } else {
  2534. newline = 1;
  2535. }
  2536. CG(increment_lineno) = 1; /* For newline before label */
  2537. YYCURSOR -= indentation;
  2538. heredoc_label->indentation = indentation;
  2539. BEGIN(ST_END_HEREDOC);
  2540. goto nowdoc_scan_done;
  2541. }
  2542. ZEND_FALLTHROUGH;
  2543. default:
  2544. continue;
  2545. }
  2546. }
  2547. nowdoc_scan_done:
  2548. yyleng = YYCURSOR - SCNG(yy_text);
  2549. ZVAL_STRINGL(zendlval, yytext, yyleng - newline);
  2550. if (!EG(exception) && spacing != -1 && PARSER_MODE()) {
  2551. bool newline_at_start = *(yytext - 1) == '\n' || *(yytext - 1) == '\r';
  2552. if (!strip_multiline_string_indentation(
  2553. zendlval, indentation, spacing == HEREDOC_USING_SPACES,
  2554. newline_at_start, newline != 0)) {
  2555. RETURN_TOKEN(T_ERROR);
  2556. }
  2557. }
  2558. HANDLE_NEWLINES(yytext, yyleng - newline);
  2559. RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
  2560. }
  2561. <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
  2562. if (YYCURSOR > YYLIMIT) {
  2563. RETURN_END_TOKEN;
  2564. }
  2565. RETURN_TOKEN(T_BAD_CHARACTER);
  2566. }
  2567. */
  2568. emit_token_with_str:
  2569. zend_copy_value(zendlval, (yytext + offset), (yyleng - offset));
  2570. emit_token_with_val:
  2571. if (PARSER_MODE()) {
  2572. ZEND_ASSERT(Z_TYPE_P(zendlval) != IS_UNDEF);
  2573. elem->ast = zend_ast_create_zval_with_lineno(zendlval, start_line);
  2574. }
  2575. emit_token:
  2576. if (SCNG(on_event)) {
  2577. SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context));
  2578. }
  2579. return token;
  2580. emit_token_with_ident:
  2581. if (PARSER_MODE()) {
  2582. elem->ident = SCNG(yy_text);
  2583. }
  2584. if (SCNG(on_event)) {
  2585. SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context));
  2586. }
  2587. return token;
  2588. return_whitespace:
  2589. HANDLE_NEWLINES(yytext, yyleng);
  2590. if (SCNG(on_event)) {
  2591. SCNG(on_event)(ON_TOKEN, T_WHITESPACE, start_line, yytext, yyleng, SCNG(on_event_context));
  2592. }
  2593. if (PARSER_MODE()) {
  2594. start_line = CG(zend_lineno);
  2595. goto restart;
  2596. } else {
  2597. return T_WHITESPACE;
  2598. }
  2599. skip_token:
  2600. if (SCNG(on_event)) {
  2601. SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context));
  2602. }
  2603. start_line = CG(zend_lineno);
  2604. goto restart;
  2605. }