zend_language_scanner.l 66 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814
  1. /*
  2. +----------------------------------------------------------------------+
  3. | Zend Engine |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1998-2018 Zend Technologies Ltd. (http://www.zend.com) |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 2.00 of the Zend license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.zend.com/license/2_00.txt. |
  11. | If you did not receive a copy of the Zend license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@zend.com so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Authors: Marcus Boerger <helly@php.net> |
  16. | Nuno Lopes <nlopess@php.net> |
  17. | Scott MacVicar <scottmac@php.net> |
  18. | Flex version authors: |
  19. | Andi Gutmans <andi@php.net> |
  20. | Zeev Suraski <zeev@php.net> |
  21. +----------------------------------------------------------------------+
  22. */
  23. #if 0
  24. # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
  25. #else
  26. # define YYDEBUG(s, c)
  27. #endif
  28. #include "zend_language_scanner_defs.h"
  29. #include <errno.h>
  30. #include "zend.h"
  31. #ifdef ZEND_WIN32
  32. # include <Winuser.h>
  33. #endif
  34. #include "zend_alloc.h"
  35. #include <zend_language_parser.h>
  36. #include "zend_compile.h"
  37. #include "zend_language_scanner.h"
  38. #include "zend_highlight.h"
  39. #include "zend_constants.h"
  40. #include "zend_variables.h"
  41. #include "zend_operators.h"
  42. #include "zend_API.h"
  43. #include "zend_strtod.h"
  44. #include "zend_exceptions.h"
  45. #include "zend_virtual_cwd.h"
  46. #include "tsrm_config_common.h"
  47. #define YYCTYPE unsigned char
  48. #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
  49. #define YYCURSOR SCNG(yy_cursor)
  50. #define YYLIMIT SCNG(yy_limit)
  51. #define YYMARKER SCNG(yy_marker)
  52. #define YYGETCONDITION() SCNG(yy_state)
  53. #define YYSETCONDITION(s) SCNG(yy_state) = s
  54. #define STATE(name) yyc##name
  55. /* emulate flex constructs */
  56. #define BEGIN(state) YYSETCONDITION(STATE(state))
  57. #define YYSTATE YYGETCONDITION()
  58. #define yytext ((char*)SCNG(yy_text))
  59. #define yyleng SCNG(yy_leng)
  60. #define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
  61. yyleng = (unsigned int)x; } while(0)
  62. #define yymore() goto yymore_restart
  63. /* perform sanity check. If this message is triggered you should
  64. increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
  65. /*!max:re2c */
  66. #if ZEND_MMAP_AHEAD < YYMAXFILL
  67. # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
  68. #endif
  69. #ifdef HAVE_STDARG_H
  70. # include <stdarg.h>
  71. #endif
  72. #ifdef HAVE_UNISTD_H
  73. # include <unistd.h>
  74. #endif
  75. /* Globals Macros */
  76. #define SCNG LANG_SCNG
  77. #ifdef ZTS
  78. ZEND_API ts_rsrc_id language_scanner_globals_id;
  79. #else
  80. ZEND_API zend_php_scanner_globals language_scanner_globals;
  81. #endif
  82. #define HANDLE_NEWLINES(s, l) \
  83. do { \
  84. char *p = (s), *boundary = p+(l); \
  85. \
  86. while (p<boundary) { \
  87. if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \
  88. CG(zend_lineno)++; \
  89. } \
  90. p++; \
  91. } \
  92. } while (0)
  93. #define HANDLE_NEWLINE(c) \
  94. { \
  95. if (c == '\n' || c == '\r') { \
  96. CG(zend_lineno)++; \
  97. } \
  98. }
  99. /* To save initial string length after scanning to first variable */
  100. #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
  101. #define GET_DOUBLE_QUOTES_SCANNED_LENGTH() SCNG(scanned_string_len)
  102. #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x80)
  103. #define IS_LABEL_SUCCESSOR(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || ((c) >= '0' && (c) <= '9') || (c) == '_' || (c) >= 0x80)
  104. #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
  105. #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
  106. BEGIN_EXTERN_C()
  107. static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
  108. {
  109. const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
  110. ZEND_ASSERT(internal_encoding);
  111. return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding));
  112. }
  113. static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
  114. {
  115. return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding));
  116. }
  117. static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
  118. {
  119. return zend_multibyte_encoding_converter(to, to_length, from, from_length,
  120. LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8);
  121. }
  122. static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
  123. {
  124. const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
  125. ZEND_ASSERT(internal_encoding);
  126. return zend_multibyte_encoding_converter(to, to_length, from, from_length,
  127. internal_encoding, zend_multibyte_encoding_utf8);
  128. }
  129. static void _yy_push_state(int new_state)
  130. {
  131. zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
  132. YYSETCONDITION(new_state);
  133. }
  134. #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
  135. static void yy_pop_state(void)
  136. {
  137. int *stack_state = zend_stack_top(&SCNG(state_stack));
  138. YYSETCONDITION(*stack_state);
  139. zend_stack_del_top(&SCNG(state_stack));
  140. }
  141. static void yy_scan_buffer(char *str, unsigned int len)
  142. {
  143. YYCURSOR = (YYCTYPE*)str;
  144. YYLIMIT = YYCURSOR + len;
  145. if (!SCNG(yy_start)) {
  146. SCNG(yy_start) = YYCURSOR;
  147. }
  148. }
  149. void startup_scanner(void)
  150. {
  151. CG(parse_error) = 0;
  152. CG(doc_comment) = NULL;
  153. CG(extra_fn_flags) = 0;
  154. zend_stack_init(&SCNG(state_stack), sizeof(int));
  155. zend_ptr_stack_init(&SCNG(heredoc_label_stack));
  156. SCNG(heredoc_scan_ahead) = 0;
  157. }
  158. static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
  159. efree(heredoc_label->label);
  160. }
  161. void shutdown_scanner(void)
  162. {
  163. CG(parse_error) = 0;
  164. RESET_DOC_COMMENT();
  165. zend_stack_destroy(&SCNG(state_stack));
  166. zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
  167. zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
  168. SCNG(heredoc_scan_ahead) = 0;
  169. SCNG(on_event) = NULL;
  170. }
  171. ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
  172. {
  173. lex_state->yy_leng = SCNG(yy_leng);
  174. lex_state->yy_start = SCNG(yy_start);
  175. lex_state->yy_text = SCNG(yy_text);
  176. lex_state->yy_cursor = SCNG(yy_cursor);
  177. lex_state->yy_marker = SCNG(yy_marker);
  178. lex_state->yy_limit = SCNG(yy_limit);
  179. lex_state->state_stack = SCNG(state_stack);
  180. zend_stack_init(&SCNG(state_stack), sizeof(int));
  181. lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
  182. zend_ptr_stack_init(&SCNG(heredoc_label_stack));
  183. lex_state->in = SCNG(yy_in);
  184. lex_state->yy_state = YYSTATE;
  185. lex_state->filename = zend_get_compiled_filename();
  186. lex_state->lineno = CG(zend_lineno);
  187. lex_state->script_org = SCNG(script_org);
  188. lex_state->script_org_size = SCNG(script_org_size);
  189. lex_state->script_filtered = SCNG(script_filtered);
  190. lex_state->script_filtered_size = SCNG(script_filtered_size);
  191. lex_state->input_filter = SCNG(input_filter);
  192. lex_state->output_filter = SCNG(output_filter);
  193. lex_state->script_encoding = SCNG(script_encoding);
  194. lex_state->on_event = SCNG(on_event);
  195. lex_state->on_event_context = SCNG(on_event_context);
  196. lex_state->ast = CG(ast);
  197. lex_state->ast_arena = CG(ast_arena);
  198. }
  199. ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
  200. {
  201. SCNG(yy_leng) = lex_state->yy_leng;
  202. SCNG(yy_start) = lex_state->yy_start;
  203. SCNG(yy_text) = lex_state->yy_text;
  204. SCNG(yy_cursor) = lex_state->yy_cursor;
  205. SCNG(yy_marker) = lex_state->yy_marker;
  206. SCNG(yy_limit) = lex_state->yy_limit;
  207. zend_stack_destroy(&SCNG(state_stack));
  208. SCNG(state_stack) = lex_state->state_stack;
  209. zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
  210. zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
  211. SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
  212. SCNG(yy_in) = lex_state->in;
  213. YYSETCONDITION(lex_state->yy_state);
  214. CG(zend_lineno) = lex_state->lineno;
  215. zend_restore_compiled_filename(lex_state->filename);
  216. if (SCNG(script_filtered)) {
  217. efree(SCNG(script_filtered));
  218. SCNG(script_filtered) = NULL;
  219. }
  220. SCNG(script_org) = lex_state->script_org;
  221. SCNG(script_org_size) = lex_state->script_org_size;
  222. SCNG(script_filtered) = lex_state->script_filtered;
  223. SCNG(script_filtered_size) = lex_state->script_filtered_size;
  224. SCNG(input_filter) = lex_state->input_filter;
  225. SCNG(output_filter) = lex_state->output_filter;
  226. SCNG(script_encoding) = lex_state->script_encoding;
  227. SCNG(on_event) = lex_state->on_event;
  228. SCNG(on_event_context) = lex_state->on_event_context;
  229. CG(ast) = lex_state->ast;
  230. CG(ast_arena) = lex_state->ast_arena;
  231. RESET_DOC_COMMENT();
  232. }
  233. ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
  234. {
  235. zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
  236. /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
  237. file_handle->opened_path = NULL;
  238. if (file_handle->free_filename) {
  239. file_handle->filename = NULL;
  240. }
  241. }
  242. ZEND_API void zend_lex_tstring(zval *zv)
  243. {
  244. if (SCNG(on_event)) {
  245. SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, SCNG(on_event_context));
  246. }
  247. ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng));
  248. }
  249. #define BOM_UTF32_BE "\x00\x00\xfe\xff"
  250. #define BOM_UTF32_LE "\xff\xfe\x00\x00"
  251. #define BOM_UTF16_BE "\xfe\xff"
  252. #define BOM_UTF16_LE "\xff\xfe"
  253. #define BOM_UTF8 "\xef\xbb\xbf"
  254. static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size)
  255. {
  256. const unsigned char *p;
  257. int wchar_size = 2;
  258. int le = 0;
  259. /* utf-16 or utf-32? */
  260. p = script;
  261. assert(p >= script);
  262. while ((size_t)(p-script) < script_size) {
  263. p = memchr(p, 0, script_size-(p-script)-2);
  264. if (!p) {
  265. break;
  266. }
  267. if (*(p+1) == '\0' && *(p+2) == '\0') {
  268. wchar_size = 4;
  269. break;
  270. }
  271. /* searching for UTF-32 specific byte orders, so this will do */
  272. p += 4;
  273. }
  274. /* BE or LE? */
  275. p = script;
  276. assert(p >= script);
  277. while ((size_t)(p-script) < script_size) {
  278. if (*p == '\0' && *(p+wchar_size-1) != '\0') {
  279. /* BE */
  280. le = 0;
  281. break;
  282. } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
  283. /* LE* */
  284. le = 1;
  285. break;
  286. }
  287. p += wchar_size;
  288. }
  289. if (wchar_size == 2) {
  290. return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
  291. } else {
  292. return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
  293. }
  294. return NULL;
  295. }
  296. static const zend_encoding* zend_multibyte_detect_unicode(void)
  297. {
  298. const zend_encoding *script_encoding = NULL;
  299. int bom_size;
  300. unsigned char *pos1, *pos2;
  301. if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
  302. return NULL;
  303. }
  304. /* check out BOM */
  305. if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
  306. script_encoding = zend_multibyte_encoding_utf32be;
  307. bom_size = sizeof(BOM_UTF32_BE)-1;
  308. } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
  309. script_encoding = zend_multibyte_encoding_utf32le;
  310. bom_size = sizeof(BOM_UTF32_LE)-1;
  311. } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
  312. script_encoding = zend_multibyte_encoding_utf16be;
  313. bom_size = sizeof(BOM_UTF16_BE)-1;
  314. } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
  315. script_encoding = zend_multibyte_encoding_utf16le;
  316. bom_size = sizeof(BOM_UTF16_LE)-1;
  317. } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
  318. script_encoding = zend_multibyte_encoding_utf8;
  319. bom_size = sizeof(BOM_UTF8)-1;
  320. }
  321. if (script_encoding) {
  322. /* remove BOM */
  323. LANG_SCNG(script_org) += bom_size;
  324. LANG_SCNG(script_org_size) -= bom_size;
  325. return script_encoding;
  326. }
  327. /* script contains NULL bytes -> auto-detection */
  328. if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
  329. /* check if the NULL byte is after the __HALT_COMPILER(); */
  330. pos2 = LANG_SCNG(script_org);
  331. while ((size_t)(pos1 - pos2) >= sizeof("__HALT_COMPILER();")-1) {
  332. pos2 = memchr(pos2, '_', pos1 - pos2);
  333. if (!pos2) break;
  334. pos2++;
  335. if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
  336. pos2 += sizeof("_HALT_COMPILER")-1;
  337. while (*pos2 == ' ' ||
  338. *pos2 == '\t' ||
  339. *pos2 == '\r' ||
  340. *pos2 == '\n') {
  341. pos2++;
  342. }
  343. if (*pos2 == '(') {
  344. pos2++;
  345. while (*pos2 == ' ' ||
  346. *pos2 == '\t' ||
  347. *pos2 == '\r' ||
  348. *pos2 == '\n') {
  349. pos2++;
  350. }
  351. if (*pos2 == ')') {
  352. pos2++;
  353. while (*pos2 == ' ' ||
  354. *pos2 == '\t' ||
  355. *pos2 == '\r' ||
  356. *pos2 == '\n') {
  357. pos2++;
  358. }
  359. if (*pos2 == ';') {
  360. return NULL;
  361. }
  362. }
  363. }
  364. }
  365. }
  366. /* make best effort if BOM is missing */
  367. return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size));
  368. }
  369. return NULL;
  370. }
  371. static const zend_encoding* zend_multibyte_find_script_encoding(void)
  372. {
  373. const zend_encoding *script_encoding;
  374. if (CG(detect_unicode)) {
  375. /* check out bom(byte order mark) and see if containing wchars */
  376. script_encoding = zend_multibyte_detect_unicode();
  377. if (script_encoding != NULL) {
  378. /* bom or wchar detection is prior to 'script_encoding' option */
  379. return script_encoding;
  380. }
  381. }
  382. /* if no script_encoding specified, just leave alone */
  383. if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
  384. return NULL;
  385. }
  386. /* if multiple encodings specified, detect automagically */
  387. if (CG(script_encoding_list_size) > 1) {
  388. return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size));
  389. }
  390. return CG(script_encoding_list)[0];
  391. }
  392. ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding)
  393. {
  394. const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
  395. const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding();
  396. if (!script_encoding) {
  397. return FAILURE;
  398. }
  399. /* judge input/output filter */
  400. LANG_SCNG(script_encoding) = script_encoding;
  401. LANG_SCNG(input_filter) = NULL;
  402. LANG_SCNG(output_filter) = NULL;
  403. if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
  404. if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
  405. /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
  406. LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
  407. LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
  408. } else {
  409. LANG_SCNG(input_filter) = NULL;
  410. LANG_SCNG(output_filter) = NULL;
  411. }
  412. return SUCCESS;
  413. }
  414. if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
  415. LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
  416. LANG_SCNG(output_filter) = NULL;
  417. } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
  418. LANG_SCNG(input_filter) = NULL;
  419. LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
  420. } else {
  421. /* both script and internal encodings are incompatible w/ flex */
  422. LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
  423. LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
  424. }
  425. return 0;
  426. }
  427. ZEND_API int open_file_for_scanning(zend_file_handle *file_handle)
  428. {
  429. char *buf;
  430. size_t size, offset = 0;
  431. zend_string *compiled_filename;
  432. /* The shebang line was read, get the current position to obtain the buffer start */
  433. if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
  434. if ((offset = ftell(file_handle->handle.fp)) == (size_t)-1) {
  435. offset = 0;
  436. }
  437. }
  438. if (zend_stream_fixup(file_handle, &buf, &size) == FAILURE) {
  439. return FAILURE;
  440. }
  441. zend_llist_add_element(&CG(open_files), file_handle);
  442. if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
  443. zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
  444. size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
  445. fh->handle.stream.handle = (void*)(((char*)fh) + diff);
  446. file_handle->handle.stream.handle = fh->handle.stream.handle;
  447. }
  448. /* Reset the scanner for scanning the new file */
  449. SCNG(yy_in) = file_handle;
  450. SCNG(yy_start) = NULL;
  451. if (size != (size_t)-1) {
  452. if (CG(multibyte)) {
  453. SCNG(script_org) = (unsigned char*)buf;
  454. SCNG(script_org_size) = size;
  455. SCNG(script_filtered) = NULL;
  456. zend_multibyte_set_filter(NULL);
  457. if (SCNG(input_filter)) {
  458. if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
  459. zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
  460. "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
  461. }
  462. buf = (char*)SCNG(script_filtered);
  463. size = SCNG(script_filtered_size);
  464. }
  465. }
  466. SCNG(yy_start) = (unsigned char *)buf - offset;
  467. yy_scan_buffer(buf, (unsigned int)size);
  468. } else {
  469. zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
  470. }
  471. BEGIN(INITIAL);
  472. if (file_handle->opened_path) {
  473. compiled_filename = zend_string_copy(file_handle->opened_path);
  474. } else {
  475. compiled_filename = zend_string_init(file_handle->filename, strlen(file_handle->filename), 0);
  476. }
  477. zend_set_compiled_filename(compiled_filename);
  478. zend_string_release_ex(compiled_filename, 0);
  479. if (CG(start_lineno)) {
  480. CG(zend_lineno) = CG(start_lineno);
  481. CG(start_lineno) = 0;
  482. } else {
  483. CG(zend_lineno) = 1;
  484. }
  485. RESET_DOC_COMMENT();
  486. CG(increment_lineno) = 0;
  487. return SUCCESS;
  488. }
  489. END_EXTERN_C()
  490. static zend_op_array *zend_compile(int type)
  491. {
  492. zend_op_array *op_array = NULL;
  493. zend_bool original_in_compilation = CG(in_compilation);
  494. CG(in_compilation) = 1;
  495. CG(ast) = NULL;
  496. CG(ast_arena) = zend_arena_create(1024 * 32);
  497. if (!zendparse()) {
  498. int last_lineno = CG(zend_lineno);
  499. zend_file_context original_file_context;
  500. zend_oparray_context original_oparray_context;
  501. zend_op_array *original_active_op_array = CG(active_op_array);
  502. op_array = emalloc(sizeof(zend_op_array));
  503. init_op_array(op_array, type, INITIAL_OP_ARRAY_SIZE);
  504. CG(active_op_array) = op_array;
  505. if (zend_ast_process) {
  506. zend_ast_process(CG(ast));
  507. }
  508. zend_file_context_begin(&original_file_context);
  509. zend_oparray_context_begin(&original_oparray_context);
  510. zend_compile_top_stmt(CG(ast));
  511. CG(zend_lineno) = last_lineno;
  512. zend_emit_final_return(type == ZEND_USER_FUNCTION);
  513. op_array->line_start = 1;
  514. op_array->line_end = last_lineno;
  515. pass_two(op_array);
  516. zend_oparray_context_end(&original_oparray_context);
  517. zend_file_context_end(&original_file_context);
  518. CG(active_op_array) = original_active_op_array;
  519. }
  520. zend_ast_destroy(CG(ast));
  521. zend_arena_destroy(CG(ast_arena));
  522. CG(in_compilation) = original_in_compilation;
  523. return op_array;
  524. }
  525. ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type)
  526. {
  527. zend_lex_state original_lex_state;
  528. zend_op_array *op_array = NULL;
  529. zend_save_lexical_state(&original_lex_state);
  530. if (open_file_for_scanning(file_handle)==FAILURE) {
  531. if (type==ZEND_REQUIRE) {
  532. zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename);
  533. zend_bailout();
  534. } else {
  535. zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename);
  536. }
  537. } else {
  538. op_array = zend_compile(ZEND_USER_FUNCTION);
  539. }
  540. zend_restore_lexical_state(&original_lex_state);
  541. return op_array;
  542. }
  543. zend_op_array *compile_filename(int type, zval *filename)
  544. {
  545. zend_file_handle file_handle;
  546. zval tmp;
  547. zend_op_array *retval;
  548. zend_string *opened_path = NULL;
  549. if (Z_TYPE_P(filename) != IS_STRING) {
  550. ZVAL_STR(&tmp, zval_get_string(filename));
  551. filename = &tmp;
  552. }
  553. file_handle.filename = Z_STRVAL_P(filename);
  554. file_handle.free_filename = 0;
  555. file_handle.type = ZEND_HANDLE_FILENAME;
  556. file_handle.opened_path = NULL;
  557. file_handle.handle.fp = NULL;
  558. retval = zend_compile_file(&file_handle, type);
  559. if (retval && file_handle.handle.stream.handle) {
  560. if (!file_handle.opened_path) {
  561. file_handle.opened_path = opened_path = zend_string_copy(Z_STR_P(filename));
  562. }
  563. zend_hash_add_empty_element(&EG(included_files), file_handle.opened_path);
  564. if (opened_path) {
  565. zend_string_release_ex(opened_path, 0);
  566. }
  567. }
  568. zend_destroy_file_handle(&file_handle);
  569. if (UNEXPECTED(filename == &tmp)) {
  570. zval_ptr_dtor(&tmp);
  571. }
  572. return retval;
  573. }
  574. ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename)
  575. {
  576. char *buf;
  577. size_t size, old_len;
  578. zend_string *new_compiled_filename;
  579. /* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
  580. old_len = Z_STRLEN_P(str);
  581. Z_STR_P(str) = zend_string_extend(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
  582. Z_TYPE_INFO_P(str) = IS_STRING_EX;
  583. memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
  584. SCNG(yy_in) = NULL;
  585. SCNG(yy_start) = NULL;
  586. buf = Z_STRVAL_P(str);
  587. size = old_len;
  588. if (CG(multibyte)) {
  589. SCNG(script_org) = (unsigned char*)buf;
  590. SCNG(script_org_size) = size;
  591. SCNG(script_filtered) = NULL;
  592. zend_multibyte_set_filter(zend_multibyte_get_internal_encoding());
  593. if (SCNG(input_filter)) {
  594. if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
  595. zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
  596. "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
  597. }
  598. buf = (char*)SCNG(script_filtered);
  599. size = SCNG(script_filtered_size);
  600. }
  601. }
  602. yy_scan_buffer(buf, (unsigned int)size);
  603. new_compiled_filename = zend_string_init(filename, strlen(filename), 0);
  604. zend_set_compiled_filename(new_compiled_filename);
  605. zend_string_release_ex(new_compiled_filename, 0);
  606. CG(zend_lineno) = 1;
  607. CG(increment_lineno) = 0;
  608. RESET_DOC_COMMENT();
  609. return SUCCESS;
  610. }
  611. ZEND_API size_t zend_get_scanned_file_offset(void)
  612. {
  613. size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
  614. if (SCNG(input_filter)) {
  615. size_t original_offset = offset, length = 0;
  616. do {
  617. unsigned char *p = NULL;
  618. if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset)) {
  619. return (size_t)-1;
  620. }
  621. efree(p);
  622. if (length > original_offset) {
  623. offset--;
  624. } else if (length < original_offset) {
  625. offset++;
  626. }
  627. } while (original_offset != length);
  628. }
  629. return offset;
  630. }
  631. zend_op_array *compile_string(zval *source_string, char *filename)
  632. {
  633. zend_lex_state original_lex_state;
  634. zend_op_array *op_array = NULL;
  635. zval tmp;
  636. if (UNEXPECTED(Z_TYPE_P(source_string) != IS_STRING)) {
  637. ZVAL_STR(&tmp, zval_get_string_func(source_string));
  638. } else {
  639. ZVAL_COPY(&tmp, source_string);
  640. }
  641. if (Z_STRLEN(tmp)==0) {
  642. zval_ptr_dtor(&tmp);
  643. return NULL;
  644. }
  645. zend_save_lexical_state(&original_lex_state);
  646. if (zend_prepare_string_for_scanning(&tmp, filename) == SUCCESS) {
  647. BEGIN(ST_IN_SCRIPTING);
  648. op_array = zend_compile(ZEND_EVAL_CODE);
  649. }
  650. zend_restore_lexical_state(&original_lex_state);
  651. zval_ptr_dtor(&tmp);
  652. return op_array;
  653. }
  654. BEGIN_EXTERN_C()
  655. int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
  656. {
  657. zend_lex_state original_lex_state;
  658. zend_file_handle file_handle;
  659. file_handle.type = ZEND_HANDLE_FILENAME;
  660. file_handle.filename = filename;
  661. file_handle.free_filename = 0;
  662. file_handle.opened_path = NULL;
  663. zend_save_lexical_state(&original_lex_state);
  664. if (open_file_for_scanning(&file_handle)==FAILURE) {
  665. zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename);
  666. zend_restore_lexical_state(&original_lex_state);
  667. return FAILURE;
  668. }
  669. zend_highlight(syntax_highlighter_ini);
  670. if (SCNG(script_filtered)) {
  671. efree(SCNG(script_filtered));
  672. SCNG(script_filtered) = NULL;
  673. }
  674. zend_destroy_file_handle(&file_handle);
  675. zend_restore_lexical_state(&original_lex_state);
  676. return SUCCESS;
  677. }
  678. int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name)
  679. {
  680. zend_lex_state original_lex_state;
  681. zval tmp;
  682. if (UNEXPECTED(Z_TYPE_P(str) != IS_STRING)) {
  683. ZVAL_STR(&tmp, zval_get_string_func(str));
  684. str = &tmp;
  685. }
  686. zend_save_lexical_state(&original_lex_state);
  687. if (zend_prepare_string_for_scanning(str, str_name)==FAILURE) {
  688. zend_restore_lexical_state(&original_lex_state);
  689. if (UNEXPECTED(str == &tmp)) {
  690. zval_ptr_dtor(&tmp);
  691. }
  692. return FAILURE;
  693. }
  694. BEGIN(INITIAL);
  695. zend_highlight(syntax_highlighter_ini);
  696. if (SCNG(script_filtered)) {
  697. efree(SCNG(script_filtered));
  698. SCNG(script_filtered) = NULL;
  699. }
  700. zend_restore_lexical_state(&original_lex_state);
  701. if (UNEXPECTED(str == &tmp)) {
  702. zval_ptr_dtor(&tmp);
  703. }
  704. return SUCCESS;
  705. }
  706. ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding)
  707. {
  708. size_t length;
  709. unsigned char *new_yy_start;
  710. /* convert and set */
  711. if (!SCNG(input_filter)) {
  712. if (SCNG(script_filtered)) {
  713. efree(SCNG(script_filtered));
  714. SCNG(script_filtered) = NULL;
  715. }
  716. SCNG(script_filtered_size) = 0;
  717. length = SCNG(script_org_size);
  718. new_yy_start = SCNG(script_org);
  719. } else {
  720. if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size))) {
  721. zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
  722. "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
  723. }
  724. if (SCNG(script_filtered)) {
  725. efree(SCNG(script_filtered));
  726. }
  727. SCNG(script_filtered) = new_yy_start;
  728. SCNG(script_filtered_size) = length;
  729. }
  730. SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
  731. SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
  732. SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
  733. SCNG(yy_limit) = new_yy_start + length;
  734. SCNG(yy_start) = new_yy_start;
  735. }
  736. // TODO: avoid reallocation ???
  737. # define zend_copy_value(zendlval, yytext, yyleng) \
  738. if (SCNG(output_filter)) { \
  739. size_t sz = 0; \
  740. char *s = NULL; \
  741. SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \
  742. ZVAL_STRINGL(zendlval, s, sz); \
  743. efree(s); \
  744. } else if (yyleng == 1) { \
  745. ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*(yytext))); \
  746. } else { \
  747. ZVAL_STRINGL(zendlval, yytext, yyleng); \
  748. }
  749. static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type)
  750. {
  751. register char *s, *t;
  752. char *end;
  753. if (len <= 1) {
  754. if (len < 1) {
  755. ZVAL_EMPTY_STRING(zendlval);
  756. } else {
  757. zend_uchar c = (zend_uchar)*str;
  758. if (c == '\n' || c == '\r') {
  759. CG(zend_lineno)++;
  760. }
  761. ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR(c));
  762. }
  763. goto skip_escape_conversion;
  764. }
  765. ZVAL_STRINGL(zendlval, str, len);
  766. /* convert escape sequences */
  767. s = Z_STRVAL_P(zendlval);
  768. end = s+Z_STRLEN_P(zendlval);
  769. while (1) {
  770. if (UNEXPECTED(*s=='\\')) {
  771. break;
  772. }
  773. if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
  774. CG(zend_lineno)++;
  775. }
  776. s++;
  777. if (s == end) {
  778. goto skip_escape_conversion;
  779. }
  780. }
  781. t = s;
  782. while (s<end) {
  783. if (*s=='\\') {
  784. s++;
  785. if (s >= end) {
  786. *t++ = '\\';
  787. break;
  788. }
  789. switch(*s) {
  790. case 'n':
  791. *t++ = '\n';
  792. break;
  793. case 'r':
  794. *t++ = '\r';
  795. break;
  796. case 't':
  797. *t++ = '\t';
  798. break;
  799. case 'f':
  800. *t++ = '\f';
  801. break;
  802. case 'v':
  803. *t++ = '\v';
  804. break;
  805. case 'e':
  806. #ifdef ZEND_WIN32
  807. *t++ = VK_ESCAPE;
  808. #else
  809. *t++ = '\e';
  810. #endif
  811. break;
  812. case '"':
  813. case '`':
  814. if (*s != quote_type) {
  815. *t++ = '\\';
  816. *t++ = *s;
  817. break;
  818. }
  819. case '\\':
  820. case '$':
  821. *t++ = *s;
  822. break;
  823. case 'x':
  824. case 'X':
  825. if (ZEND_IS_HEX(*(s+1))) {
  826. char hex_buf[3] = { 0, 0, 0 };
  827. hex_buf[0] = *(++s);
  828. if (ZEND_IS_HEX(*(s+1))) {
  829. hex_buf[1] = *(++s);
  830. }
  831. *t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
  832. } else {
  833. *t++ = '\\';
  834. *t++ = *s;
  835. }
  836. break;
  837. /* UTF-8 codepoint escape, format: /\\u\{\x+\}/ */
  838. case 'u':
  839. {
  840. /* cache where we started so we can parse after validating */
  841. char *start = s + 1;
  842. size_t len = 0;
  843. zend_bool valid = 1;
  844. unsigned long codepoint;
  845. if (*start != '{') {
  846. /* we silently let this pass to avoid breaking code
  847. * with JSON in string literals (e.g. "\"\u202e\""
  848. */
  849. *t++ = '\\';
  850. *t++ = 'u';
  851. break;
  852. } else {
  853. /* on the other hand, invalid \u{blah} errors */
  854. s++;
  855. len++;
  856. s++;
  857. while (*s != '}') {
  858. if (!ZEND_IS_HEX(*s)) {
  859. valid = 0;
  860. break;
  861. } else {
  862. len++;
  863. }
  864. s++;
  865. }
  866. if (*s == '}') {
  867. valid = 1;
  868. len++;
  869. }
  870. }
  871. /* \u{} is invalid */
  872. if (len <= 2) {
  873. valid = 0;
  874. }
  875. if (!valid) {
  876. zend_throw_exception(zend_ce_parse_error,
  877. "Invalid UTF-8 codepoint escape sequence", 0);
  878. zval_ptr_dtor(zendlval);
  879. ZVAL_UNDEF(zendlval);
  880. return FAILURE;
  881. }
  882. errno = 0;
  883. codepoint = strtoul(start + 1, NULL, 16);
  884. /* per RFC 3629, UTF-8 can only represent 21 bits */
  885. if (codepoint > 0x10FFFF || errno) {
  886. zend_throw_exception(zend_ce_parse_error,
  887. "Invalid UTF-8 codepoint escape sequence: Codepoint too large", 0);
  888. zval_ptr_dtor(zendlval);
  889. ZVAL_UNDEF(zendlval);
  890. return FAILURE;
  891. }
  892. /* based on https://en.wikipedia.org/wiki/UTF-8#Sample_code */
  893. if (codepoint < 0x80) {
  894. *t++ = codepoint;
  895. } else if (codepoint <= 0x7FF) {
  896. *t++ = (codepoint >> 6) + 0xC0;
  897. *t++ = (codepoint & 0x3F) + 0x80;
  898. } else if (codepoint <= 0xFFFF) {
  899. *t++ = (codepoint >> 12) + 0xE0;
  900. *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
  901. *t++ = (codepoint & 0x3F) + 0x80;
  902. } else if (codepoint <= 0x10FFFF) {
  903. *t++ = (codepoint >> 18) + 0xF0;
  904. *t++ = ((codepoint >> 12) & 0x3F) + 0x80;
  905. *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
  906. *t++ = (codepoint & 0x3F) + 0x80;
  907. }
  908. }
  909. break;
  910. default:
  911. /* check for an octal */
  912. if (ZEND_IS_OCT(*s)) {
  913. char octal_buf[4] = { 0, 0, 0, 0 };
  914. octal_buf[0] = *s;
  915. if (ZEND_IS_OCT(*(s+1))) {
  916. octal_buf[1] = *(++s);
  917. if (ZEND_IS_OCT(*(s+1))) {
  918. octal_buf[2] = *(++s);
  919. }
  920. }
  921. if (octal_buf[2] && (octal_buf[0] > '3') && !SCNG(heredoc_scan_ahead)) {
  922. /* 3 octit values must not overflow 0xFF (\377) */
  923. zend_error(E_COMPILE_WARNING, "Octal escape sequence overflow \\%s is greater than \\377", octal_buf);
  924. }
  925. *t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
  926. } else {
  927. *t++ = '\\';
  928. *t++ = *s;
  929. }
  930. break;
  931. }
  932. } else {
  933. *t++ = *s;
  934. }
  935. if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
  936. CG(zend_lineno)++;
  937. }
  938. s++;
  939. }
  940. *t = 0;
  941. Z_STRLEN_P(zendlval) = t - Z_STRVAL_P(zendlval);
  942. skip_escape_conversion:
  943. if (SCNG(output_filter)) {
  944. size_t sz = 0;
  945. unsigned char *str;
  946. // TODO: avoid realocation ???
  947. s = Z_STRVAL_P(zendlval);
  948. SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
  949. zval_ptr_dtor(zendlval);
  950. ZVAL_STRINGL(zendlval, (char *) str, sz);
  951. efree(str);
  952. }
  953. return SUCCESS;
  954. }
  955. #define HEREDOC_USING_SPACES 1
  956. #define HEREDOC_USING_TABS 2
  957. static const char *next_newline(const char *str, const char *end, size_t *newline_len) {
  958. for (; str < end; str++) {
  959. if (*str == '\r') {
  960. *newline_len = str + 1 < end && *(str + 1) == '\n' ? 2 : 1;
  961. return str;
  962. } else if (*str == '\n') {
  963. *newline_len = 1;
  964. return str;
  965. }
  966. }
  967. *newline_len = 0;
  968. return NULL;
  969. }
  970. static zend_bool strip_multiline_string_indentation(
  971. zval *zendlval, int indentation, zend_bool using_spaces,
  972. zend_bool newline_at_start, zend_bool newline_at_end)
  973. {
  974. const char *str = Z_STRVAL_P(zendlval), *end = str + Z_STRLEN_P(zendlval);
  975. char *copy = Z_STRVAL_P(zendlval);
  976. int newline_count = 0;
  977. size_t newline_len;
  978. const char *nl;
  979. if (!newline_at_start) {
  980. nl = next_newline(str, end, &newline_len);
  981. if (!nl) {
  982. return 1;
  983. }
  984. str = nl + newline_len;
  985. copy = (char *) nl + newline_len;
  986. newline_count++;
  987. } else {
  988. nl = str;
  989. }
  990. /* <= intentional */
  991. while (str <= end && nl) {
  992. size_t skip;
  993. nl = next_newline(str, end, &newline_len);
  994. if (!nl && newline_at_end) {
  995. nl = end;
  996. }
  997. /* Try to skip indentation */
  998. for (skip = 0; skip < indentation; skip++, str++) {
  999. if (str == nl) {
  1000. /* Don't require full indentation on whitespace-only lines */
  1001. break;
  1002. }
  1003. if (str == end || (*str != ' ' && *str != '\t')) {
  1004. CG(zend_lineno) += newline_count;
  1005. zend_throw_exception_ex(zend_ce_parse_error, 0,
  1006. "Invalid body indentation level (expecting an indentation level of at least %d)", indentation);
  1007. goto error;
  1008. }
  1009. if ((!using_spaces && *str == ' ') || (using_spaces && *str == '\t')) {
  1010. CG(zend_lineno) += newline_count;
  1011. zend_throw_exception(zend_ce_parse_error,
  1012. "Invalid indentation - tabs and spaces cannot be mixed", 0);
  1013. goto error;
  1014. }
  1015. }
  1016. if (str == end) {
  1017. break;
  1018. }
  1019. size_t len = nl ? (nl - str + newline_len) : (end - str);
  1020. memmove(copy, str, len);
  1021. str += len;
  1022. copy += len;
  1023. newline_count++;
  1024. }
  1025. *copy = '\0';
  1026. Z_STRLEN_P(zendlval) = copy - Z_STRVAL_P(zendlval);
  1027. return 1;
  1028. error:
  1029. zval_ptr_dtor_str(zendlval);
  1030. ZVAL_UNDEF(zendlval);
  1031. return 0;
  1032. }
  1033. static void copy_heredoc_label_stack(void *void_heredoc_label)
  1034. {
  1035. zend_heredoc_label *heredoc_label = void_heredoc_label;
  1036. zend_heredoc_label *new_heredoc_label = emalloc(sizeof(zend_heredoc_label));
  1037. *new_heredoc_label = *heredoc_label;
  1038. new_heredoc_label->label = estrndup(heredoc_label->label, heredoc_label->length);
  1039. zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) new_heredoc_label);
  1040. }
  1041. #define PARSER_MODE() \
  1042. EXPECTED(elem != NULL)
  1043. #define RETURN_TOKEN(_token) do { \
  1044. token = _token; \
  1045. goto emit_token; \
  1046. } while (0)
  1047. #define RETURN_TOKEN_WITH_VAL(_token) do { \
  1048. token = _token; \
  1049. goto emit_token_with_val; \
  1050. } while (0)
  1051. #define RETURN_TOKEN_WITH_STR(_token, _offset) do { \
  1052. token = _token; \
  1053. offset = _offset; \
  1054. goto emit_token_with_str; \
  1055. } while (0)
  1056. #define SKIP_TOKEN(_token) do { \
  1057. token = _token; \
  1058. goto skip_token; \
  1059. } while (0)
  1060. int ZEND_FASTCALL lex_scan(zval *zendlval, zend_parser_stack_elem *elem)
  1061. {
  1062. int token;
  1063. int offset;
  1064. int start_line = CG(zend_lineno);
  1065. ZVAL_UNDEF(zendlval);
  1066. restart:
  1067. SCNG(yy_text) = YYCURSOR;
  1068. /*!re2c
  1069. re2c:yyfill:check = 0;
  1070. LNUM [0-9]+
  1071. DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
  1072. EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
  1073. HNUM "0x"[0-9a-fA-F]+
  1074. BNUM "0b"[01]+
  1075. LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
  1076. WHITESPACE [ \n\r\t]+
  1077. TABS_AND_SPACES [ \t]*
  1078. TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
  1079. ANY_CHAR [^]
  1080. NEWLINE ("\r"|"\n"|"\r\n")
  1081. /* compute yyleng before each rule */
  1082. <!*> := yyleng = YYCURSOR - SCNG(yy_text);
  1083. <ST_IN_SCRIPTING>"exit" {
  1084. RETURN_TOKEN(T_EXIT);
  1085. }
  1086. <ST_IN_SCRIPTING>"die" {
  1087. RETURN_TOKEN(T_EXIT);
  1088. }
  1089. <ST_IN_SCRIPTING>"function" {
  1090. RETURN_TOKEN(T_FUNCTION);
  1091. }
  1092. <ST_IN_SCRIPTING>"const" {
  1093. RETURN_TOKEN(T_CONST);
  1094. }
  1095. <ST_IN_SCRIPTING>"return" {
  1096. RETURN_TOKEN(T_RETURN);
  1097. }
  1098. <ST_IN_SCRIPTING>"yield"{WHITESPACE}"from"[^a-zA-Z0-9_\x80-\xff] {
  1099. yyless(yyleng - 1);
  1100. HANDLE_NEWLINES(yytext, yyleng);
  1101. RETURN_TOKEN(T_YIELD_FROM);
  1102. }
  1103. <ST_IN_SCRIPTING>"yield" {
  1104. RETURN_TOKEN(T_YIELD);
  1105. }
  1106. <ST_IN_SCRIPTING>"try" {
  1107. RETURN_TOKEN(T_TRY);
  1108. }
  1109. <ST_IN_SCRIPTING>"catch" {
  1110. RETURN_TOKEN(T_CATCH);
  1111. }
  1112. <ST_IN_SCRIPTING>"finally" {
  1113. RETURN_TOKEN(T_FINALLY);
  1114. }
  1115. <ST_IN_SCRIPTING>"throw" {
  1116. RETURN_TOKEN(T_THROW);
  1117. }
  1118. <ST_IN_SCRIPTING>"if" {
  1119. RETURN_TOKEN(T_IF);
  1120. }
  1121. <ST_IN_SCRIPTING>"elseif" {
  1122. RETURN_TOKEN(T_ELSEIF);
  1123. }
  1124. <ST_IN_SCRIPTING>"endif" {
  1125. RETURN_TOKEN(T_ENDIF);
  1126. }
  1127. <ST_IN_SCRIPTING>"else" {
  1128. RETURN_TOKEN(T_ELSE);
  1129. }
  1130. <ST_IN_SCRIPTING>"while" {
  1131. RETURN_TOKEN(T_WHILE);
  1132. }
  1133. <ST_IN_SCRIPTING>"endwhile" {
  1134. RETURN_TOKEN(T_ENDWHILE);
  1135. }
  1136. <ST_IN_SCRIPTING>"do" {
  1137. RETURN_TOKEN(T_DO);
  1138. }
  1139. <ST_IN_SCRIPTING>"for" {
  1140. RETURN_TOKEN(T_FOR);
  1141. }
  1142. <ST_IN_SCRIPTING>"endfor" {
  1143. RETURN_TOKEN(T_ENDFOR);
  1144. }
  1145. <ST_IN_SCRIPTING>"foreach" {
  1146. RETURN_TOKEN(T_FOREACH);
  1147. }
  1148. <ST_IN_SCRIPTING>"endforeach" {
  1149. RETURN_TOKEN(T_ENDFOREACH);
  1150. }
  1151. <ST_IN_SCRIPTING>"declare" {
  1152. RETURN_TOKEN(T_DECLARE);
  1153. }
  1154. <ST_IN_SCRIPTING>"enddeclare" {
  1155. RETURN_TOKEN(T_ENDDECLARE);
  1156. }
  1157. <ST_IN_SCRIPTING>"instanceof" {
  1158. RETURN_TOKEN(T_INSTANCEOF);
  1159. }
  1160. <ST_IN_SCRIPTING>"as" {
  1161. RETURN_TOKEN(T_AS);
  1162. }
  1163. <ST_IN_SCRIPTING>"switch" {
  1164. RETURN_TOKEN(T_SWITCH);
  1165. }
  1166. <ST_IN_SCRIPTING>"endswitch" {
  1167. RETURN_TOKEN(T_ENDSWITCH);
  1168. }
  1169. <ST_IN_SCRIPTING>"case" {
  1170. RETURN_TOKEN(T_CASE);
  1171. }
  1172. <ST_IN_SCRIPTING>"default" {
  1173. RETURN_TOKEN(T_DEFAULT);
  1174. }
  1175. <ST_IN_SCRIPTING>"break" {
  1176. RETURN_TOKEN(T_BREAK);
  1177. }
  1178. <ST_IN_SCRIPTING>"continue" {
  1179. RETURN_TOKEN(T_CONTINUE);
  1180. }
  1181. <ST_IN_SCRIPTING>"goto" {
  1182. RETURN_TOKEN(T_GOTO);
  1183. }
  1184. <ST_IN_SCRIPTING>"echo" {
  1185. RETURN_TOKEN(T_ECHO);
  1186. }
  1187. <ST_IN_SCRIPTING>"print" {
  1188. RETURN_TOKEN(T_PRINT);
  1189. }
  1190. <ST_IN_SCRIPTING>"class" {
  1191. RETURN_TOKEN(T_CLASS);
  1192. }
  1193. <ST_IN_SCRIPTING>"interface" {
  1194. RETURN_TOKEN(T_INTERFACE);
  1195. }
  1196. <ST_IN_SCRIPTING>"trait" {
  1197. RETURN_TOKEN(T_TRAIT);
  1198. }
  1199. <ST_IN_SCRIPTING>"extends" {
  1200. RETURN_TOKEN(T_EXTENDS);
  1201. }
  1202. <ST_IN_SCRIPTING>"implements" {
  1203. RETURN_TOKEN(T_IMPLEMENTS);
  1204. }
  1205. <ST_IN_SCRIPTING>"->" {
  1206. yy_push_state(ST_LOOKING_FOR_PROPERTY);
  1207. RETURN_TOKEN(T_OBJECT_OPERATOR);
  1208. }
  1209. <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
  1210. goto return_whitespace;
  1211. }
  1212. <ST_LOOKING_FOR_PROPERTY>"->" {
  1213. RETURN_TOKEN(T_OBJECT_OPERATOR);
  1214. }
  1215. <ST_LOOKING_FOR_PROPERTY>{LABEL} {
  1216. yy_pop_state();
  1217. RETURN_TOKEN_WITH_STR(T_STRING, 0);
  1218. }
  1219. <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
  1220. yyless(0);
  1221. yy_pop_state();
  1222. goto restart;
  1223. }
  1224. <ST_IN_SCRIPTING>"::" {
  1225. RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
  1226. }
  1227. <ST_IN_SCRIPTING>"\\" {
  1228. RETURN_TOKEN(T_NS_SEPARATOR);
  1229. }
  1230. <ST_IN_SCRIPTING>"..." {
  1231. RETURN_TOKEN(T_ELLIPSIS);
  1232. }
  1233. <ST_IN_SCRIPTING>"??" {
  1234. RETURN_TOKEN(T_COALESCE);
  1235. }
  1236. <ST_IN_SCRIPTING>"new" {
  1237. RETURN_TOKEN(T_NEW);
  1238. }
  1239. <ST_IN_SCRIPTING>"clone" {
  1240. RETURN_TOKEN(T_CLONE);
  1241. }
  1242. <ST_IN_SCRIPTING>"var" {
  1243. RETURN_TOKEN(T_VAR);
  1244. }
  1245. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
  1246. RETURN_TOKEN(T_INT_CAST);
  1247. }
  1248. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
  1249. RETURN_TOKEN(T_DOUBLE_CAST);
  1250. }
  1251. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
  1252. RETURN_TOKEN(T_STRING_CAST);
  1253. }
  1254. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
  1255. RETURN_TOKEN(T_ARRAY_CAST);
  1256. }
  1257. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
  1258. RETURN_TOKEN(T_OBJECT_CAST);
  1259. }
  1260. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
  1261. RETURN_TOKEN(T_BOOL_CAST);
  1262. }
  1263. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
  1264. RETURN_TOKEN(T_UNSET_CAST);
  1265. }
  1266. <ST_IN_SCRIPTING>"eval" {
  1267. RETURN_TOKEN(T_EVAL);
  1268. }
  1269. <ST_IN_SCRIPTING>"include" {
  1270. RETURN_TOKEN(T_INCLUDE);
  1271. }
  1272. <ST_IN_SCRIPTING>"include_once" {
  1273. RETURN_TOKEN(T_INCLUDE_ONCE);
  1274. }
  1275. <ST_IN_SCRIPTING>"require" {
  1276. RETURN_TOKEN(T_REQUIRE);
  1277. }
  1278. <ST_IN_SCRIPTING>"require_once" {
  1279. RETURN_TOKEN(T_REQUIRE_ONCE);
  1280. }
  1281. <ST_IN_SCRIPTING>"namespace" {
  1282. RETURN_TOKEN(T_NAMESPACE);
  1283. }
  1284. <ST_IN_SCRIPTING>"use" {
  1285. RETURN_TOKEN(T_USE);
  1286. }
  1287. <ST_IN_SCRIPTING>"insteadof" {
  1288. RETURN_TOKEN(T_INSTEADOF);
  1289. }
  1290. <ST_IN_SCRIPTING>"global" {
  1291. RETURN_TOKEN(T_GLOBAL);
  1292. }
  1293. <ST_IN_SCRIPTING>"isset" {
  1294. RETURN_TOKEN(T_ISSET);
  1295. }
  1296. <ST_IN_SCRIPTING>"empty" {
  1297. RETURN_TOKEN(T_EMPTY);
  1298. }
  1299. <ST_IN_SCRIPTING>"__halt_compiler" {
  1300. RETURN_TOKEN(T_HALT_COMPILER);
  1301. }
  1302. <ST_IN_SCRIPTING>"static" {
  1303. RETURN_TOKEN(T_STATIC);
  1304. }
  1305. <ST_IN_SCRIPTING>"abstract" {
  1306. RETURN_TOKEN(T_ABSTRACT);
  1307. }
  1308. <ST_IN_SCRIPTING>"final" {
  1309. RETURN_TOKEN(T_FINAL);
  1310. }
  1311. <ST_IN_SCRIPTING>"private" {
  1312. RETURN_TOKEN(T_PRIVATE);
  1313. }
  1314. <ST_IN_SCRIPTING>"protected" {
  1315. RETURN_TOKEN(T_PROTECTED);
  1316. }
  1317. <ST_IN_SCRIPTING>"public" {
  1318. RETURN_TOKEN(T_PUBLIC);
  1319. }
  1320. <ST_IN_SCRIPTING>"unset" {
  1321. RETURN_TOKEN(T_UNSET);
  1322. }
  1323. <ST_IN_SCRIPTING>"=>" {
  1324. RETURN_TOKEN(T_DOUBLE_ARROW);
  1325. }
  1326. <ST_IN_SCRIPTING>"list" {
  1327. RETURN_TOKEN(T_LIST);
  1328. }
  1329. <ST_IN_SCRIPTING>"array" {
  1330. RETURN_TOKEN(T_ARRAY);
  1331. }
  1332. <ST_IN_SCRIPTING>"callable" {
  1333. RETURN_TOKEN(T_CALLABLE);
  1334. }
  1335. <ST_IN_SCRIPTING>"++" {
  1336. RETURN_TOKEN(T_INC);
  1337. }
  1338. <ST_IN_SCRIPTING>"--" {
  1339. RETURN_TOKEN(T_DEC);
  1340. }
  1341. <ST_IN_SCRIPTING>"===" {
  1342. RETURN_TOKEN(T_IS_IDENTICAL);
  1343. }
  1344. <ST_IN_SCRIPTING>"!==" {
  1345. RETURN_TOKEN(T_IS_NOT_IDENTICAL);
  1346. }
  1347. <ST_IN_SCRIPTING>"==" {
  1348. RETURN_TOKEN(T_IS_EQUAL);
  1349. }
  1350. <ST_IN_SCRIPTING>"!="|"<>" {
  1351. RETURN_TOKEN(T_IS_NOT_EQUAL);
  1352. }
  1353. <ST_IN_SCRIPTING>"<=>" {
  1354. RETURN_TOKEN(T_SPACESHIP);
  1355. }
  1356. <ST_IN_SCRIPTING>"<=" {
  1357. RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL);
  1358. }
  1359. <ST_IN_SCRIPTING>">=" {
  1360. RETURN_TOKEN(T_IS_GREATER_OR_EQUAL);
  1361. }
  1362. <ST_IN_SCRIPTING>"+=" {
  1363. RETURN_TOKEN(T_PLUS_EQUAL);
  1364. }
  1365. <ST_IN_SCRIPTING>"-=" {
  1366. RETURN_TOKEN(T_MINUS_EQUAL);
  1367. }
  1368. <ST_IN_SCRIPTING>"*=" {
  1369. RETURN_TOKEN(T_MUL_EQUAL);
  1370. }
  1371. <ST_IN_SCRIPTING>"*\*" {
  1372. RETURN_TOKEN(T_POW);
  1373. }
  1374. <ST_IN_SCRIPTING>"*\*=" {
  1375. RETURN_TOKEN(T_POW_EQUAL);
  1376. }
  1377. <ST_IN_SCRIPTING>"/=" {
  1378. RETURN_TOKEN(T_DIV_EQUAL);
  1379. }
  1380. <ST_IN_SCRIPTING>".=" {
  1381. RETURN_TOKEN(T_CONCAT_EQUAL);
  1382. }
  1383. <ST_IN_SCRIPTING>"%=" {
  1384. RETURN_TOKEN(T_MOD_EQUAL);
  1385. }
  1386. <ST_IN_SCRIPTING>"<<=" {
  1387. RETURN_TOKEN(T_SL_EQUAL);
  1388. }
  1389. <ST_IN_SCRIPTING>">>=" {
  1390. RETURN_TOKEN(T_SR_EQUAL);
  1391. }
  1392. <ST_IN_SCRIPTING>"&=" {
  1393. RETURN_TOKEN(T_AND_EQUAL);
  1394. }
  1395. <ST_IN_SCRIPTING>"|=" {
  1396. RETURN_TOKEN(T_OR_EQUAL);
  1397. }
  1398. <ST_IN_SCRIPTING>"^=" {
  1399. RETURN_TOKEN(T_XOR_EQUAL);
  1400. }
  1401. <ST_IN_SCRIPTING>"||" {
  1402. RETURN_TOKEN(T_BOOLEAN_OR);
  1403. }
  1404. <ST_IN_SCRIPTING>"&&" {
  1405. RETURN_TOKEN(T_BOOLEAN_AND);
  1406. }
  1407. <ST_IN_SCRIPTING>"OR" {
  1408. RETURN_TOKEN(T_LOGICAL_OR);
  1409. }
  1410. <ST_IN_SCRIPTING>"AND" {
  1411. RETURN_TOKEN(T_LOGICAL_AND);
  1412. }
  1413. <ST_IN_SCRIPTING>"XOR" {
  1414. RETURN_TOKEN(T_LOGICAL_XOR);
  1415. }
  1416. <ST_IN_SCRIPTING>"<<" {
  1417. RETURN_TOKEN(T_SL);
  1418. }
  1419. <ST_IN_SCRIPTING>">>" {
  1420. RETURN_TOKEN(T_SR);
  1421. }
  1422. <ST_IN_SCRIPTING>{TOKENS} {
  1423. RETURN_TOKEN(yytext[0]);
  1424. }
  1425. <ST_IN_SCRIPTING>"{" {
  1426. yy_push_state(ST_IN_SCRIPTING);
  1427. RETURN_TOKEN('{');
  1428. }
  1429. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
  1430. yy_push_state(ST_LOOKING_FOR_VARNAME);
  1431. RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
  1432. }
  1433. <ST_IN_SCRIPTING>"}" {
  1434. RESET_DOC_COMMENT();
  1435. if (!zend_stack_is_empty(&SCNG(state_stack))) {
  1436. yy_pop_state();
  1437. }
  1438. RETURN_TOKEN('}');
  1439. }
  1440. <ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
  1441. yyless(yyleng - 1);
  1442. yy_pop_state();
  1443. yy_push_state(ST_IN_SCRIPTING);
  1444. RETURN_TOKEN_WITH_STR(T_STRING_VARNAME, 0);
  1445. }
  1446. <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
  1447. yyless(0);
  1448. yy_pop_state();
  1449. yy_push_state(ST_IN_SCRIPTING);
  1450. goto restart;
  1451. }
  1452. <ST_IN_SCRIPTING>{BNUM} {
  1453. char *bin = yytext + 2; /* Skip "0b" */
  1454. int len = yyleng - 2;
  1455. char *end;
  1456. /* Skip any leading 0s */
  1457. while (*bin == '0') {
  1458. ++bin;
  1459. --len;
  1460. }
  1461. if (len < SIZEOF_ZEND_LONG * 8) {
  1462. if (len == 0) {
  1463. ZVAL_LONG(zendlval, 0);
  1464. } else {
  1465. errno = 0;
  1466. ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
  1467. ZEND_ASSERT(!errno && end == yytext + yyleng);
  1468. }
  1469. RETURN_TOKEN_WITH_VAL(T_LNUMBER);
  1470. } else {
  1471. ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
  1472. /* errno isn't checked since we allow HUGE_VAL/INF overflow */
  1473. ZEND_ASSERT(end == yytext + yyleng);
  1474. RETURN_TOKEN_WITH_VAL(T_DNUMBER);
  1475. }
  1476. }
  1477. <ST_IN_SCRIPTING>{LNUM} {
  1478. char *end;
  1479. if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
  1480. errno = 0;
  1481. /* base must be passed explicitly for correct parse error on Windows */
  1482. ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, yytext[0] == '0' ? 8 : 10));
  1483. /* This isn't an assert, we need to ensure 019 isn't valid octal
  1484. * Because the lexing itself doesn't do that for us
  1485. */
  1486. if (end != yytext + yyleng) {
  1487. zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
  1488. ZVAL_UNDEF(zendlval);
  1489. if (PARSER_MODE()) {
  1490. RETURN_TOKEN(T_ERROR);
  1491. }
  1492. RETURN_TOKEN_WITH_VAL(T_LNUMBER);
  1493. }
  1494. } else {
  1495. errno = 0;
  1496. ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
  1497. if (errno == ERANGE) { /* Overflow */
  1498. errno = 0;
  1499. if (yytext[0] == '0') { /* octal overflow */
  1500. ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end));
  1501. } else {
  1502. ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end));
  1503. }
  1504. /* Also not an assert for the same reason */
  1505. if (end != yytext + yyleng) {
  1506. zend_throw_exception(zend_ce_parse_error,
  1507. "Invalid numeric literal", 0);
  1508. ZVAL_UNDEF(zendlval);
  1509. if (PARSER_MODE()) {
  1510. RETURN_TOKEN(T_ERROR);
  1511. }
  1512. }
  1513. RETURN_TOKEN_WITH_VAL(T_DNUMBER);
  1514. }
  1515. /* Also not an assert for the same reason */
  1516. if (end != yytext + yyleng) {
  1517. zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
  1518. ZVAL_UNDEF(zendlval);
  1519. if (PARSER_MODE()) {
  1520. RETURN_TOKEN(T_ERROR);
  1521. }
  1522. RETURN_TOKEN_WITH_VAL(T_DNUMBER);
  1523. }
  1524. }
  1525. ZEND_ASSERT(!errno);
  1526. RETURN_TOKEN_WITH_VAL(T_LNUMBER);
  1527. }
  1528. <ST_IN_SCRIPTING>{HNUM} {
  1529. char *hex = yytext + 2; /* Skip "0x" */
  1530. int len = yyleng - 2;
  1531. char *end;
  1532. /* Skip any leading 0s */
  1533. while (*hex == '0') {
  1534. hex++;
  1535. len--;
  1536. }
  1537. if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
  1538. if (len == 0) {
  1539. ZVAL_LONG(zendlval, 0);
  1540. } else {
  1541. errno = 0;
  1542. ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
  1543. ZEND_ASSERT(!errno && end == hex + len);
  1544. }
  1545. RETURN_TOKEN_WITH_VAL(T_LNUMBER);
  1546. } else {
  1547. ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
  1548. /* errno isn't checked since we allow HUGE_VAL/INF overflow */
  1549. ZEND_ASSERT(end == hex + len);
  1550. RETURN_TOKEN_WITH_VAL(T_DNUMBER);
  1551. }
  1552. }
  1553. <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
  1554. if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
  1555. char *end;
  1556. errno = 0;
  1557. ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 10));
  1558. if (errno == ERANGE) {
  1559. goto string;
  1560. }
  1561. ZEND_ASSERT(end == yytext + yyleng);
  1562. } else {
  1563. string:
  1564. ZVAL_STRINGL(zendlval, yytext, yyleng);
  1565. }
  1566. RETURN_TOKEN_WITH_VAL(T_NUM_STRING);
  1567. }
  1568. <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
  1569. if (yyleng == 1) {
  1570. ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*(yytext)));
  1571. } else {
  1572. ZVAL_STRINGL(zendlval, yytext, yyleng);
  1573. }
  1574. RETURN_TOKEN_WITH_VAL(T_NUM_STRING);
  1575. }
  1576. <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
  1577. const char *end;
  1578. ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
  1579. /* errno isn't checked since we allow HUGE_VAL/INF overflow */
  1580. ZEND_ASSERT(end == yytext + yyleng);
  1581. RETURN_TOKEN_WITH_VAL(T_DNUMBER);
  1582. }
  1583. <ST_IN_SCRIPTING>"__CLASS__" {
  1584. RETURN_TOKEN(T_CLASS_C);
  1585. }
  1586. <ST_IN_SCRIPTING>"__TRAIT__" {
  1587. RETURN_TOKEN(T_TRAIT_C);
  1588. }
  1589. <ST_IN_SCRIPTING>"__FUNCTION__" {
  1590. RETURN_TOKEN(T_FUNC_C);
  1591. }
  1592. <ST_IN_SCRIPTING>"__METHOD__" {
  1593. RETURN_TOKEN(T_METHOD_C);
  1594. }
  1595. <ST_IN_SCRIPTING>"__LINE__" {
  1596. RETURN_TOKEN(T_LINE);
  1597. }
  1598. <ST_IN_SCRIPTING>"__FILE__" {
  1599. RETURN_TOKEN(T_FILE);
  1600. }
  1601. <ST_IN_SCRIPTING>"__DIR__" {
  1602. RETURN_TOKEN(T_DIR);
  1603. }
  1604. <ST_IN_SCRIPTING>"__NAMESPACE__" {
  1605. RETURN_TOKEN(T_NS_C);
  1606. }
  1607. <INITIAL>"<?=" {
  1608. BEGIN(ST_IN_SCRIPTING);
  1609. if (PARSER_MODE()) {
  1610. RETURN_TOKEN(T_ECHO);
  1611. }
  1612. RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
  1613. }
  1614. <INITIAL>"<?php"([ \t]|{NEWLINE}) {
  1615. HANDLE_NEWLINE(yytext[yyleng-1]);
  1616. BEGIN(ST_IN_SCRIPTING);
  1617. if (PARSER_MODE()) {
  1618. SKIP_TOKEN(T_OPEN_TAG);
  1619. }
  1620. RETURN_TOKEN(T_OPEN_TAG);
  1621. }
  1622. <INITIAL>"<?" {
  1623. if (CG(short_tags)) {
  1624. BEGIN(ST_IN_SCRIPTING);
  1625. if (PARSER_MODE()) {
  1626. SKIP_TOKEN(T_OPEN_TAG);
  1627. }
  1628. RETURN_TOKEN(T_OPEN_TAG);
  1629. } else {
  1630. goto inline_char_handler;
  1631. }
  1632. }
  1633. <INITIAL>{ANY_CHAR} {
  1634. if (YYCURSOR > YYLIMIT) {
  1635. RETURN_TOKEN(END);
  1636. }
  1637. inline_char_handler:
  1638. while (1) {
  1639. YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
  1640. YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
  1641. if (YYCURSOR >= YYLIMIT) {
  1642. break;
  1643. }
  1644. if (*YYCURSOR == '?') {
  1645. if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
  1646. YYCURSOR--;
  1647. break;
  1648. }
  1649. }
  1650. }
  1651. yyleng = YYCURSOR - SCNG(yy_text);
  1652. if (SCNG(output_filter)) {
  1653. size_t readsize;
  1654. char *s = NULL;
  1655. size_t sz = 0;
  1656. // TODO: avoid reallocation ???
  1657. readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng);
  1658. ZVAL_STRINGL(zendlval, s, sz);
  1659. efree(s);
  1660. if (readsize < yyleng) {
  1661. yyless(readsize);
  1662. }
  1663. } else if (yyleng == 1) {
  1664. ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*yytext));
  1665. } else {
  1666. ZVAL_STRINGL(zendlval, yytext, yyleng);
  1667. }
  1668. HANDLE_NEWLINES(yytext, yyleng);
  1669. RETURN_TOKEN_WITH_VAL(T_INLINE_HTML);
  1670. }
  1671. /* Make sure a label character follows "->", otherwise there is no property
  1672. * and "->" will be taken literally
  1673. */
  1674. <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x80-\xff] {
  1675. yyless(yyleng - 3);
  1676. yy_push_state(ST_LOOKING_FOR_PROPERTY);
  1677. RETURN_TOKEN_WITH_STR(T_VARIABLE, 1);
  1678. }
  1679. /* A [ always designates a variable offset, regardless of what follows
  1680. */
  1681. <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
  1682. yyless(yyleng - 1);
  1683. yy_push_state(ST_VAR_OFFSET);
  1684. RETURN_TOKEN_WITH_STR(T_VARIABLE, 1);
  1685. }
  1686. <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
  1687. RETURN_TOKEN_WITH_STR(T_VARIABLE, 1);
  1688. }
  1689. <ST_VAR_OFFSET>"]" {
  1690. yy_pop_state();
  1691. RETURN_TOKEN(']');
  1692. }
  1693. <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
  1694. /* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */
  1695. RETURN_TOKEN(yytext[0]);
  1696. }
  1697. <ST_VAR_OFFSET>[ \n\r\t\\'#] {
  1698. /* Invalid rule to return a more explicit parse error with proper line number */
  1699. yyless(0);
  1700. yy_pop_state();
  1701. ZVAL_NULL(zendlval);
  1702. RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
  1703. }
  1704. <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
  1705. RETURN_TOKEN_WITH_STR(T_STRING, 0);
  1706. }
  1707. <ST_IN_SCRIPTING>"#"|"//" {
  1708. while (YYCURSOR < YYLIMIT) {
  1709. switch (*YYCURSOR++) {
  1710. case '\r':
  1711. if (*YYCURSOR == '\n') {
  1712. YYCURSOR++;
  1713. }
  1714. /* fall through */
  1715. case '\n':
  1716. CG(zend_lineno)++;
  1717. break;
  1718. case '?':
  1719. if (*YYCURSOR == '>') {
  1720. YYCURSOR--;
  1721. break;
  1722. }
  1723. /* fall through */
  1724. default:
  1725. continue;
  1726. }
  1727. break;
  1728. }
  1729. yyleng = YYCURSOR - SCNG(yy_text);
  1730. if (PARSER_MODE()) {
  1731. SKIP_TOKEN(T_COMMENT);
  1732. }
  1733. RETURN_TOKEN(T_COMMENT);
  1734. }
  1735. <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
  1736. int doc_com;
  1737. if (yyleng > 2) {
  1738. doc_com = 1;
  1739. RESET_DOC_COMMENT();
  1740. } else {
  1741. doc_com = 0;
  1742. }
  1743. while (YYCURSOR < YYLIMIT) {
  1744. if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
  1745. break;
  1746. }
  1747. }
  1748. if (YYCURSOR < YYLIMIT) {
  1749. YYCURSOR++;
  1750. } else if (!SCNG(heredoc_scan_ahead)) {
  1751. zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
  1752. }
  1753. yyleng = YYCURSOR - SCNG(yy_text);
  1754. HANDLE_NEWLINES(yytext, yyleng);
  1755. if (doc_com) {
  1756. CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
  1757. if (PARSER_MODE()) {
  1758. SKIP_TOKEN(T_DOC_COMMENT);
  1759. }
  1760. RETURN_TOKEN(T_DOC_COMMENT);
  1761. }
  1762. if (PARSER_MODE()) {
  1763. SKIP_TOKEN(T_COMMENT);
  1764. }
  1765. RETURN_TOKEN(T_COMMENT);
  1766. }
  1767. <ST_IN_SCRIPTING>"?>"{NEWLINE}? {
  1768. BEGIN(INITIAL);
  1769. if (yytext[yyleng-1] != '>') {
  1770. CG(increment_lineno) = 1;
  1771. }
  1772. if (PARSER_MODE()) {
  1773. RETURN_TOKEN(';'); /* implicit ';' at php-end tag */
  1774. }
  1775. RETURN_TOKEN(T_CLOSE_TAG);
  1776. }
  1777. <ST_IN_SCRIPTING>b?['] {
  1778. register char *s, *t;
  1779. char *end;
  1780. int bprefix = (yytext[0] != '\'') ? 1 : 0;
  1781. while (1) {
  1782. if (YYCURSOR < YYLIMIT) {
  1783. if (*YYCURSOR == '\'') {
  1784. YYCURSOR++;
  1785. yyleng = YYCURSOR - SCNG(yy_text);
  1786. break;
  1787. } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
  1788. YYCURSOR++;
  1789. }
  1790. } else {
  1791. yyleng = YYLIMIT - SCNG(yy_text);
  1792. /* Unclosed single quotes; treat similar to double quotes, but without a separate token
  1793. * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
  1794. * rule, which continued in ST_IN_SCRIPTING state after the quote */
  1795. ZVAL_NULL(zendlval);
  1796. RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
  1797. }
  1798. }
  1799. if (yyleng-bprefix-2 <= 1) {
  1800. if (yyleng-bprefix-2 < 1) {
  1801. ZVAL_EMPTY_STRING(zendlval);
  1802. } else {
  1803. zend_uchar c = (zend_uchar)*(yytext+bprefix+1);
  1804. if (c == '\n' || c == '\r') {
  1805. CG(zend_lineno)++;
  1806. }
  1807. ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR(c));
  1808. }
  1809. goto skip_escape_conversion;
  1810. }
  1811. ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
  1812. /* convert escape sequences */
  1813. s = Z_STRVAL_P(zendlval);
  1814. end = s+Z_STRLEN_P(zendlval);
  1815. while (1) {
  1816. if (UNEXPECTED(*s=='\\')) {
  1817. break;
  1818. }
  1819. if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
  1820. CG(zend_lineno)++;
  1821. }
  1822. s++;
  1823. if (s == end) {
  1824. goto skip_escape_conversion;
  1825. }
  1826. }
  1827. t = s;
  1828. while (s<end) {
  1829. if (*s=='\\') {
  1830. s++;
  1831. if (*s == '\\' || *s == '\'') {
  1832. *t++ = *s;
  1833. } else {
  1834. *t++ = '\\';
  1835. *t++ = *s;
  1836. }
  1837. } else {
  1838. *t++ = *s;
  1839. }
  1840. if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
  1841. CG(zend_lineno)++;
  1842. }
  1843. s++;
  1844. }
  1845. *t = 0;
  1846. Z_STRLEN_P(zendlval) = t - Z_STRVAL_P(zendlval);
  1847. skip_escape_conversion:
  1848. if (SCNG(output_filter)) {
  1849. size_t sz = 0;
  1850. char *str = NULL;
  1851. s = Z_STRVAL_P(zendlval);
  1852. // TODO: avoid reallocation ???
  1853. SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
  1854. ZVAL_STRINGL(zendlval, str, sz);
  1855. }
  1856. RETURN_TOKEN_WITH_VAL(T_CONSTANT_ENCAPSED_STRING);
  1857. }
  1858. <ST_IN_SCRIPTING>b?["] {
  1859. int bprefix = (yytext[0] != '"') ? 1 : 0;
  1860. while (YYCURSOR < YYLIMIT) {
  1861. switch (*YYCURSOR++) {
  1862. case '"':
  1863. yyleng = YYCURSOR - SCNG(yy_text);
  1864. if (EXPECTED(zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"') == SUCCESS)
  1865. || !PARSER_MODE()) {
  1866. RETURN_TOKEN_WITH_VAL(T_CONSTANT_ENCAPSED_STRING);
  1867. } else {
  1868. RETURN_TOKEN(T_ERROR);
  1869. }
  1870. case '$':
  1871. if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
  1872. break;
  1873. }
  1874. continue;
  1875. case '{':
  1876. if (*YYCURSOR == '$') {
  1877. break;
  1878. }
  1879. continue;
  1880. case '\\':
  1881. if (YYCURSOR < YYLIMIT) {
  1882. YYCURSOR++;
  1883. }
  1884. /* fall through */
  1885. default:
  1886. continue;
  1887. }
  1888. YYCURSOR--;
  1889. break;
  1890. }
  1891. /* Remember how much was scanned to save rescanning */
  1892. SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
  1893. YYCURSOR = SCNG(yy_text) + yyleng;
  1894. BEGIN(ST_DOUBLE_QUOTES);
  1895. RETURN_TOKEN('"');
  1896. }
  1897. <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
  1898. char *s;
  1899. unsigned char *saved_cursor;
  1900. int bprefix = (yytext[0] != '<') ? 1 : 0, spacing = 0, indentation = 0;
  1901. zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
  1902. zend_bool is_heredoc = 1;
  1903. CG(zend_lineno)++;
  1904. heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
  1905. s = yytext+bprefix+3;
  1906. while ((*s == ' ') || (*s == '\t')) {
  1907. s++;
  1908. heredoc_label->length--;
  1909. }
  1910. if (*s == '\'') {
  1911. s++;
  1912. heredoc_label->length -= 2;
  1913. is_heredoc = 0;
  1914. BEGIN(ST_NOWDOC);
  1915. } else {
  1916. if (*s == '"') {
  1917. s++;
  1918. heredoc_label->length -= 2;
  1919. }
  1920. BEGIN(ST_HEREDOC);
  1921. }
  1922. heredoc_label->label = estrndup(s, heredoc_label->length);
  1923. heredoc_label->indentation = 0;
  1924. saved_cursor = YYCURSOR;
  1925. zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
  1926. while (YYCURSOR < YYLIMIT && (*YYCURSOR == ' ' || *YYCURSOR == '\t')) {
  1927. if (*YYCURSOR == '\t') {
  1928. spacing |= HEREDOC_USING_TABS;
  1929. } else {
  1930. spacing |= HEREDOC_USING_SPACES;
  1931. }
  1932. ++YYCURSOR;
  1933. ++indentation;
  1934. }
  1935. if (YYCURSOR == YYLIMIT) {
  1936. YYCURSOR = saved_cursor;
  1937. RETURN_TOKEN(T_START_HEREDOC);
  1938. }
  1939. /* Check for ending label on the next line */
  1940. if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
  1941. if (!IS_LABEL_SUCCESSOR(YYCURSOR[heredoc_label->length])) {
  1942. if (spacing == (HEREDOC_USING_SPACES | HEREDOC_USING_TABS)) {
  1943. zend_throw_exception(zend_ce_parse_error, "Invalid indentation - tabs and spaces cannot be mixed", 0);
  1944. if (PARSER_MODE()) {
  1945. RETURN_TOKEN(T_ERROR);
  1946. }
  1947. }
  1948. YYCURSOR = saved_cursor;
  1949. heredoc_label->indentation = indentation;
  1950. BEGIN(ST_END_HEREDOC);
  1951. RETURN_TOKEN(T_START_HEREDOC);
  1952. }
  1953. }
  1954. YYCURSOR = saved_cursor;
  1955. if (is_heredoc && !SCNG(heredoc_scan_ahead)) {
  1956. zend_lex_state current_state;
  1957. zend_string *saved_doc_comment = CG(doc_comment);
  1958. int heredoc_nesting_level = 1;
  1959. int first_token = 0;
  1960. int error = 0;
  1961. zend_save_lexical_state(&current_state);
  1962. SCNG(heredoc_scan_ahead) = 1;
  1963. SCNG(heredoc_indentation) = 0;
  1964. SCNG(heredoc_indentation_uses_spaces) = 0;
  1965. LANG_SCNG(on_event) = NULL;
  1966. CG(doc_comment) = NULL;
  1967. zend_ptr_stack_reverse_apply(&current_state.heredoc_label_stack, copy_heredoc_label_stack);
  1968. zend_exception_save();
  1969. while (heredoc_nesting_level) {
  1970. zval zv;
  1971. int retval;
  1972. ZVAL_UNDEF(&zv);
  1973. retval = lex_scan(&zv, NULL);
  1974. zval_ptr_dtor_nogc(&zv);
  1975. if (EG(exception)) {
  1976. zend_clear_exception();
  1977. break;
  1978. }
  1979. if (!first_token) {
  1980. first_token = retval;
  1981. }
  1982. switch (retval) {
  1983. case T_START_HEREDOC:
  1984. ++heredoc_nesting_level;
  1985. break;
  1986. case T_END_HEREDOC:
  1987. --heredoc_nesting_level;
  1988. break;
  1989. case END:
  1990. heredoc_nesting_level = 0;
  1991. }
  1992. }
  1993. zend_exception_restore();
  1994. if (
  1995. (first_token == T_VARIABLE
  1996. || first_token == T_DOLLAR_OPEN_CURLY_BRACES
  1997. || first_token == T_CURLY_OPEN
  1998. ) && SCNG(heredoc_indentation)) {
  1999. zend_throw_exception_ex(zend_ce_parse_error, 0, "Invalid body indentation level (expecting an indentation level of at least %d)", SCNG(heredoc_indentation));
  2000. error = 1;
  2001. }
  2002. heredoc_label->indentation = SCNG(heredoc_indentation);
  2003. heredoc_label->indentation_uses_spaces = SCNG(heredoc_indentation_uses_spaces);
  2004. zend_restore_lexical_state(&current_state);
  2005. SCNG(heredoc_scan_ahead) = 0;
  2006. CG(increment_lineno) = 0;
  2007. CG(doc_comment) = saved_doc_comment;
  2008. if (PARSER_MODE() && error) {
  2009. RETURN_TOKEN(T_ERROR);
  2010. }
  2011. }
  2012. RETURN_TOKEN(T_START_HEREDOC);
  2013. }
  2014. <ST_IN_SCRIPTING>[`] {
  2015. BEGIN(ST_BACKQUOTE);
  2016. RETURN_TOKEN('`');
  2017. }
  2018. <ST_END_HEREDOC>{ANY_CHAR} {
  2019. zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
  2020. yyleng = heredoc_label->indentation + heredoc_label->length;
  2021. YYCURSOR += yyleng - 1;
  2022. heredoc_label_dtor(heredoc_label);
  2023. efree(heredoc_label);
  2024. BEGIN(ST_IN_SCRIPTING);
  2025. RETURN_TOKEN(T_END_HEREDOC);
  2026. }
  2027. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
  2028. yy_push_state(ST_IN_SCRIPTING);
  2029. yyless(1);
  2030. RETURN_TOKEN(T_CURLY_OPEN);
  2031. }
  2032. <ST_DOUBLE_QUOTES>["] {
  2033. BEGIN(ST_IN_SCRIPTING);
  2034. RETURN_TOKEN('"');
  2035. }
  2036. <ST_BACKQUOTE>[`] {
  2037. BEGIN(ST_IN_SCRIPTING);
  2038. RETURN_TOKEN('`');
  2039. }
  2040. <ST_DOUBLE_QUOTES>{ANY_CHAR} {
  2041. if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
  2042. YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
  2043. SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
  2044. goto double_quotes_scan_done;
  2045. }
  2046. if (YYCURSOR > YYLIMIT) {
  2047. RETURN_TOKEN(END);
  2048. }
  2049. if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
  2050. YYCURSOR++;
  2051. }
  2052. while (YYCURSOR < YYLIMIT) {
  2053. switch (*YYCURSOR++) {
  2054. case '"':
  2055. break;
  2056. case '$':
  2057. if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
  2058. break;
  2059. }
  2060. continue;
  2061. case '{':
  2062. if (*YYCURSOR == '$') {
  2063. break;
  2064. }
  2065. continue;
  2066. case '\\':
  2067. if (YYCURSOR < YYLIMIT) {
  2068. YYCURSOR++;
  2069. }
  2070. /* fall through */
  2071. default:
  2072. continue;
  2073. }
  2074. YYCURSOR--;
  2075. break;
  2076. }
  2077. double_quotes_scan_done:
  2078. yyleng = YYCURSOR - SCNG(yy_text);
  2079. if (EXPECTED(zend_scan_escape_string(zendlval, yytext, yyleng, '"') == SUCCESS)
  2080. || !PARSER_MODE()) {
  2081. RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
  2082. } else {
  2083. RETURN_TOKEN(T_ERROR);
  2084. }
  2085. }
  2086. <ST_BACKQUOTE>{ANY_CHAR} {
  2087. if (YYCURSOR > YYLIMIT) {
  2088. RETURN_TOKEN(END);
  2089. }
  2090. if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
  2091. YYCURSOR++;
  2092. }
  2093. while (YYCURSOR < YYLIMIT) {
  2094. switch (*YYCURSOR++) {
  2095. case '`':
  2096. break;
  2097. case '$':
  2098. if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
  2099. break;
  2100. }
  2101. continue;
  2102. case '{':
  2103. if (*YYCURSOR == '$') {
  2104. break;
  2105. }
  2106. continue;
  2107. case '\\':
  2108. if (YYCURSOR < YYLIMIT) {
  2109. YYCURSOR++;
  2110. }
  2111. /* fall through */
  2112. default:
  2113. continue;
  2114. }
  2115. YYCURSOR--;
  2116. break;
  2117. }
  2118. yyleng = YYCURSOR - SCNG(yy_text);
  2119. if (EXPECTED(zend_scan_escape_string(zendlval, yytext, yyleng, '`') == SUCCESS)
  2120. || !PARSER_MODE()) {
  2121. RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
  2122. } else {
  2123. RETURN_TOKEN(T_ERROR);
  2124. }
  2125. }
  2126. <ST_HEREDOC>{ANY_CHAR} {
  2127. zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
  2128. int newline = 0, indentation = 0, spacing = 0;
  2129. if (YYCURSOR > YYLIMIT) {
  2130. RETURN_TOKEN(END);
  2131. }
  2132. YYCURSOR--;
  2133. while (YYCURSOR < YYLIMIT) {
  2134. switch (*YYCURSOR++) {
  2135. case '\r':
  2136. if (*YYCURSOR == '\n') {
  2137. YYCURSOR++;
  2138. }
  2139. /* fall through */
  2140. case '\n':
  2141. indentation = spacing = 0;
  2142. while (YYCURSOR < YYLIMIT && (*YYCURSOR == ' ' || *YYCURSOR == '\t')) {
  2143. if (*YYCURSOR == '\t') {
  2144. spacing |= HEREDOC_USING_TABS;
  2145. } else {
  2146. spacing |= HEREDOC_USING_SPACES;
  2147. }
  2148. ++YYCURSOR;
  2149. ++indentation;
  2150. }
  2151. if (YYCURSOR == YYLIMIT) {
  2152. yyleng = YYCURSOR - SCNG(yy_text);
  2153. HANDLE_NEWLINES(yytext, yyleng);
  2154. ZVAL_NULL(zendlval);
  2155. RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
  2156. }
  2157. /* Check for ending label on the next line */
  2158. if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
  2159. if (IS_LABEL_SUCCESSOR(YYCURSOR[heredoc_label->length])) {
  2160. continue;
  2161. }
  2162. if (spacing == (HEREDOC_USING_SPACES | HEREDOC_USING_TABS)) {
  2163. zend_throw_exception(zend_ce_parse_error, "Invalid indentation - tabs and spaces cannot be mixed", 0);
  2164. if (PARSER_MODE()) {
  2165. RETURN_TOKEN(T_ERROR);
  2166. }
  2167. }
  2168. /* newline before label will be subtracted from returned text, but
  2169. * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
  2170. if (YYCURSOR[-indentation - 2] == '\r' && YYCURSOR[-indentation - 1] == '\n') {
  2171. newline = 2; /* Windows newline */
  2172. } else {
  2173. newline = 1;
  2174. }
  2175. CG(increment_lineno) = 1; /* For newline before label */
  2176. if (SCNG(heredoc_scan_ahead)) {
  2177. SCNG(heredoc_indentation) = indentation;
  2178. SCNG(heredoc_indentation_uses_spaces) = (spacing == HEREDOC_USING_SPACES);
  2179. } else {
  2180. YYCURSOR -= indentation;
  2181. }
  2182. BEGIN(ST_END_HEREDOC);
  2183. goto heredoc_scan_done;
  2184. }
  2185. continue;
  2186. case '$':
  2187. if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
  2188. break;
  2189. }
  2190. continue;
  2191. case '{':
  2192. if (*YYCURSOR == '$') {
  2193. break;
  2194. }
  2195. continue;
  2196. case '\\':
  2197. if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
  2198. YYCURSOR++;
  2199. }
  2200. /* fall through */
  2201. default:
  2202. continue;
  2203. }
  2204. YYCURSOR--;
  2205. break;
  2206. }
  2207. heredoc_scan_done:
  2208. yyleng = YYCURSOR - SCNG(yy_text);
  2209. ZVAL_STRINGL(zendlval, yytext, yyleng - newline);
  2210. if (!SCNG(heredoc_scan_ahead) && !EG(exception) && PARSER_MODE()) {
  2211. zend_bool newline_at_start = *(yytext - 1) == '\n' || *(yytext - 1) == '\r';
  2212. zend_string *copy = Z_STR_P(zendlval);
  2213. if (!strip_multiline_string_indentation(
  2214. zendlval, heredoc_label->indentation, heredoc_label->indentation_uses_spaces,
  2215. newline_at_start, newline != 0)) {
  2216. RETURN_TOKEN(T_ERROR);
  2217. }
  2218. if (UNEXPECTED(zend_scan_escape_string(zendlval, ZSTR_VAL(copy), ZSTR_LEN(copy), 0) != SUCCESS)) {
  2219. zend_string_efree(copy);
  2220. RETURN_TOKEN(T_ERROR);
  2221. }
  2222. zend_string_efree(copy);
  2223. } else {
  2224. HANDLE_NEWLINES(yytext, yyleng - newline);
  2225. }
  2226. RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
  2227. }
  2228. <ST_NOWDOC>{ANY_CHAR} {
  2229. zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
  2230. int newline = 0, indentation = 0, spacing = -1;
  2231. if (YYCURSOR > YYLIMIT) {
  2232. RETURN_TOKEN(END);
  2233. }
  2234. YYCURSOR--;
  2235. while (YYCURSOR < YYLIMIT) {
  2236. switch (*YYCURSOR++) {
  2237. case '\r':
  2238. if (*YYCURSOR == '\n') {
  2239. YYCURSOR++;
  2240. }
  2241. /* fall through */
  2242. case '\n':
  2243. indentation = spacing = 0;
  2244. while (YYCURSOR < YYLIMIT && (*YYCURSOR == ' ' || *YYCURSOR == '\t')) {
  2245. if (*YYCURSOR == '\t') {
  2246. spacing |= HEREDOC_USING_TABS;
  2247. } else {
  2248. spacing |= HEREDOC_USING_SPACES;
  2249. }
  2250. ++YYCURSOR;
  2251. ++indentation;
  2252. }
  2253. if (YYCURSOR == YYLIMIT) {
  2254. yyleng = YYCURSOR - SCNG(yy_text);
  2255. HANDLE_NEWLINES(yytext, yyleng);
  2256. ZVAL_NULL(zendlval);
  2257. RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
  2258. }
  2259. /* Check for ending label on the next line */
  2260. if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
  2261. if (IS_LABEL_SUCCESSOR(YYCURSOR[heredoc_label->length])) {
  2262. continue;
  2263. }
  2264. if (spacing == (HEREDOC_USING_SPACES | HEREDOC_USING_TABS)) {
  2265. zend_throw_exception(zend_ce_parse_error, "Invalid indentation - tabs and spaces cannot be mixed", 0);
  2266. if (PARSER_MODE()) {
  2267. RETURN_TOKEN(T_ERROR);
  2268. }
  2269. }
  2270. /* newline before label will be subtracted from returned text, but
  2271. * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
  2272. if (YYCURSOR[-indentation - 2] == '\r' && YYCURSOR[-indentation - 1] == '\n') {
  2273. newline = 2; /* Windows newline */
  2274. } else {
  2275. newline = 1;
  2276. }
  2277. CG(increment_lineno) = 1; /* For newline before label */
  2278. YYCURSOR -= indentation;
  2279. heredoc_label->indentation = indentation;
  2280. BEGIN(ST_END_HEREDOC);
  2281. goto nowdoc_scan_done;
  2282. }
  2283. /* fall through */
  2284. default:
  2285. continue;
  2286. }
  2287. }
  2288. nowdoc_scan_done:
  2289. yyleng = YYCURSOR - SCNG(yy_text);
  2290. ZVAL_STRINGL(zendlval, yytext, yyleng - newline);
  2291. if (!EG(exception) && spacing != -1 && PARSER_MODE()) {
  2292. zend_bool newline_at_start = *(yytext - 1) == '\n' || *(yytext - 1) == '\r';
  2293. if (!strip_multiline_string_indentation(
  2294. zendlval, indentation, spacing == HEREDOC_USING_SPACES,
  2295. newline_at_start, newline != 0)) {
  2296. RETURN_TOKEN(T_ERROR);
  2297. }
  2298. }
  2299. HANDLE_NEWLINES(yytext, yyleng - newline);
  2300. RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
  2301. }
  2302. <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
  2303. if (YYCURSOR > YYLIMIT) {
  2304. RETURN_TOKEN(END);
  2305. }
  2306. if (!SCNG(heredoc_scan_ahead)) {
  2307. zend_error(E_COMPILE_WARNING, "Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
  2308. }
  2309. goto restart;
  2310. }
  2311. */
  2312. emit_token_with_str:
  2313. zend_copy_value(zendlval, (yytext + offset), (yyleng - offset));
  2314. emit_token_with_val:
  2315. if (PARSER_MODE()) {
  2316. ZEND_ASSERT(Z_TYPE_P(zendlval) != IS_UNDEF);
  2317. elem->ast = zend_ast_create_zval_with_lineno(zendlval, start_line);
  2318. }
  2319. emit_token:
  2320. if (SCNG(on_event)) {
  2321. SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context));
  2322. }
  2323. return token;
  2324. return_whitespace:
  2325. HANDLE_NEWLINES(yytext, yyleng);
  2326. if (SCNG(on_event)) {
  2327. SCNG(on_event)(ON_TOKEN, T_WHITESPACE, start_line, SCNG(on_event_context));
  2328. }
  2329. if (PARSER_MODE()) {
  2330. start_line = CG(zend_lineno);
  2331. goto restart;
  2332. } else {
  2333. return T_WHITESPACE;
  2334. }
  2335. skip_token:
  2336. if (SCNG(on_event)) {
  2337. SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context));
  2338. }
  2339. start_line = CG(zend_lineno);
  2340. goto restart;
  2341. }