cmListFileLexer.in.l 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568
  1. %{
  2. /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
  3. file Copyright.txt or https://cmake.org/licensing for details. */
  4. /*
  5. This file must be translated to C and modified to build everywhere.
  6. Run flex >= 2.6 like this:
  7. flex --nounistd -DFLEXINT_H --noline -ocmListFileLexer.c cmListFileLexer.in.l
  8. Modify cmListFileLexer.c:
  9. - remove trailing whitespace: sed -i 's/\s*$//' cmListFileLexer.c
  10. - remove blank lines at end of file: sed -i '${/^$/d;}' cmListFileLexer.c
  11. - #include "cmStandardLexer.h" at the top: sed -i '1i#include "cmStandardLexer.h"' cmListFileLexer.c
  12. */
  13. /* IWYU pragma: no_forward_declare yyguts_t */
  14. #ifdef WIN32
  15. #include "cmsys/Encoding.h"
  16. #endif
  17. /* Setup the proper cmListFileLexer_yylex declaration. */
  18. #define YY_EXTRA_TYPE cmListFileLexer*
  19. #define YY_DECL int cmListFileLexer_yylex (yyscan_t yyscanner, cmListFileLexer* lexer)
  20. #include "cmListFileLexer.h"
  21. /*--------------------------------------------------------------------------*/
  22. struct cmListFileLexer_s
  23. {
  24. cmListFileLexer_Token token;
  25. int bracket;
  26. int comment;
  27. int line;
  28. int column;
  29. int size;
  30. FILE* file;
  31. size_t cr;
  32. char* string_buffer;
  33. char* string_position;
  34. int string_left;
  35. yyscan_t scanner;
  36. };
  37. static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
  38. int length);
  39. static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
  40. int length);
  41. static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
  42. size_t bufferSize);
  43. static void cmListFileLexerInit(cmListFileLexer* lexer);
  44. static void cmListFileLexerDestroy(cmListFileLexer* lexer);
  45. /* Replace the lexer input function. */
  46. #undef YY_INPUT
  47. #define YY_INPUT(buf, result, max_size) \
  48. { result = cmListFileLexerInput(cmListFileLexer_yyget_extra(yyscanner), buf, max_size); }
  49. /*--------------------------------------------------------------------------*/
  50. %}
  51. %option prefix="cmListFileLexer_yy"
  52. %option reentrant
  53. %option yylineno
  54. %option noyywrap
  55. %pointer
  56. %x STRING
  57. %x BRACKET
  58. %x BRACKETEND
  59. %x COMMENT
  60. MAKEVAR \$\([A-Za-z0-9_]*\)
  61. UNQUOTED ([^ \0\t\r\n\(\)#\\\"[=]|\\.)
  62. LEGACY {MAKEVAR}|{UNQUOTED}|\"({MAKEVAR}|{UNQUOTED}|[ \t[=])*\"
  63. %%
  64. <INITIAL,COMMENT>\n {
  65. lexer->token.type = cmListFileLexer_Token_Newline;
  66. cmListFileLexerSetToken(lexer, yytext, yyleng);
  67. ++lexer->line;
  68. lexer->column = 1;
  69. BEGIN(INITIAL);
  70. return 1;
  71. }
  72. #?\[=*\[\n? {
  73. const char* bracket = yytext;
  74. lexer->comment = yytext[0] == '#';
  75. if (lexer->comment) {
  76. lexer->token.type = cmListFileLexer_Token_CommentBracket;
  77. bracket += 1;
  78. } else {
  79. lexer->token.type = cmListFileLexer_Token_ArgumentBracket;
  80. }
  81. cmListFileLexerSetToken(lexer, "", 0);
  82. lexer->bracket = strchr(bracket+1, '[') - bracket;
  83. if (yytext[yyleng-1] == '\n') {
  84. ++lexer->line;
  85. lexer->column = 1;
  86. } else {
  87. lexer->column += yyleng;
  88. }
  89. BEGIN(BRACKET);
  90. }
  91. # {
  92. lexer->column += yyleng;
  93. BEGIN(COMMENT);
  94. }
  95. <COMMENT>[^\0\n]* {
  96. lexer->column += yyleng;
  97. }
  98. \( {
  99. lexer->token.type = cmListFileLexer_Token_ParenLeft;
  100. cmListFileLexerSetToken(lexer, yytext, yyleng);
  101. lexer->column += yyleng;
  102. return 1;
  103. }
  104. \) {
  105. lexer->token.type = cmListFileLexer_Token_ParenRight;
  106. cmListFileLexerSetToken(lexer, yytext, yyleng);
  107. lexer->column += yyleng;
  108. return 1;
  109. }
  110. [A-Za-z_][A-Za-z0-9_]* {
  111. lexer->token.type = cmListFileLexer_Token_Identifier;
  112. cmListFileLexerSetToken(lexer, yytext, yyleng);
  113. lexer->column += yyleng;
  114. return 1;
  115. }
  116. <BRACKET>\]=* {
  117. /* Handle ]]====]=======]*/
  118. cmListFileLexerAppend(lexer, yytext, yyleng);
  119. lexer->column += yyleng;
  120. if (yyleng == lexer->bracket) {
  121. BEGIN(BRACKETEND);
  122. }
  123. }
  124. <BRACKETEND>\] {
  125. lexer->column += yyleng;
  126. /* Erase the partial bracket from the token. */
  127. lexer->token.length -= lexer->bracket;
  128. lexer->token.text[lexer->token.length] = 0;
  129. BEGIN(INITIAL);
  130. return 1;
  131. }
  132. <BRACKET>([^]\n])+ {
  133. cmListFileLexerAppend(lexer, yytext, yyleng);
  134. lexer->column += yyleng;
  135. }
  136. <BRACKET,BRACKETEND>\n {
  137. cmListFileLexerAppend(lexer, yytext, yyleng);
  138. ++lexer->line;
  139. lexer->column = 1;
  140. BEGIN(BRACKET);
  141. }
  142. <BRACKET,BRACKETEND>[^\0\n] {
  143. cmListFileLexerAppend(lexer, yytext, yyleng);
  144. lexer->column += yyleng;
  145. BEGIN(BRACKET);
  146. }
  147. <BRACKET,BRACKETEND><<EOF>> {
  148. lexer->token.type = cmListFileLexer_Token_BadBracket;
  149. BEGIN(INITIAL);
  150. return 1;
  151. }
  152. ({UNQUOTED}|=|\[=*{UNQUOTED})({UNQUOTED}|[[=])* {
  153. lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
  154. cmListFileLexerSetToken(lexer, yytext, yyleng);
  155. lexer->column += yyleng;
  156. return 1;
  157. }
  158. ({MAKEVAR}|{UNQUOTED}|=|\[=*{LEGACY})({LEGACY}|[[=])* {
  159. lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
  160. cmListFileLexerSetToken(lexer, yytext, yyleng);
  161. lexer->column += yyleng;
  162. return 1;
  163. }
  164. \[ {
  165. lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
  166. cmListFileLexerSetToken(lexer, yytext, yyleng);
  167. lexer->column += yyleng;
  168. return 1;
  169. }
  170. \" {
  171. lexer->token.type = cmListFileLexer_Token_ArgumentQuoted;
  172. cmListFileLexerSetToken(lexer, "", 0);
  173. lexer->column += yyleng;
  174. BEGIN(STRING);
  175. }
  176. <STRING>([^\\\n\"]|\\.)+ {
  177. cmListFileLexerAppend(lexer, yytext, yyleng);
  178. lexer->column += yyleng;
  179. }
  180. <STRING>\\\n {
  181. /* Continuation: text is not part of string */
  182. ++lexer->line;
  183. lexer->column = 1;
  184. }
  185. <STRING>\n {
  186. cmListFileLexerAppend(lexer, yytext, yyleng);
  187. ++lexer->line;
  188. lexer->column = 1;
  189. }
  190. <STRING>\" {
  191. lexer->column += yyleng;
  192. BEGIN(INITIAL);
  193. return 1;
  194. }
  195. <STRING>[^\0\n] {
  196. cmListFileLexerAppend(lexer, yytext, yyleng);
  197. lexer->column += yyleng;
  198. }
  199. <STRING><<EOF>> {
  200. lexer->token.type = cmListFileLexer_Token_BadString;
  201. BEGIN(INITIAL);
  202. return 1;
  203. }
  204. [ \t\r]+ {
  205. lexer->token.type = cmListFileLexer_Token_Space;
  206. cmListFileLexerSetToken(lexer, yytext, yyleng);
  207. lexer->column += yyleng;
  208. return 1;
  209. }
  210. . {
  211. lexer->token.type = cmListFileLexer_Token_BadCharacter;
  212. cmListFileLexerSetToken(lexer, yytext, yyleng);
  213. lexer->column += yyleng;
  214. return 1;
  215. }
  216. <<EOF>> {
  217. lexer->token.type = cmListFileLexer_Token_None;
  218. cmListFileLexerSetToken(lexer, 0, 0);
  219. return 0;
  220. }
  221. %%
  222. /*--------------------------------------------------------------------------*/
  223. static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
  224. int length)
  225. {
  226. /* Set the token line and column number. */
  227. lexer->token.line = lexer->line;
  228. lexer->token.column = lexer->column;
  229. /* Use the same buffer if possible. */
  230. if (lexer->token.text) {
  231. if (text && length < lexer->size) {
  232. strcpy(lexer->token.text, text);
  233. lexer->token.length = length;
  234. return;
  235. }
  236. free(lexer->token.text);
  237. lexer->token.text = 0;
  238. lexer->size = 0;
  239. }
  240. /* Need to extend the buffer. */
  241. if (text) {
  242. lexer->token.text = strdup(text);
  243. lexer->token.length = length;
  244. lexer->size = length + 1;
  245. } else {
  246. lexer->token.length = 0;
  247. }
  248. }
  249. /*--------------------------------------------------------------------------*/
  250. static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
  251. int length)
  252. {
  253. char* temp;
  254. int newSize;
  255. /* If the appended text will fit in the buffer, do not reallocate. */
  256. newSize = lexer->token.length + length + 1;
  257. if (lexer->token.text && newSize <= lexer->size) {
  258. strcpy(lexer->token.text + lexer->token.length, text);
  259. lexer->token.length += length;
  260. return;
  261. }
  262. /* We need to extend the buffer. */
  263. temp = malloc(newSize);
  264. if (lexer->token.text) {
  265. memcpy(temp, lexer->token.text, lexer->token.length);
  266. free(lexer->token.text);
  267. }
  268. memcpy(temp + lexer->token.length, text, length);
  269. temp[lexer->token.length + length] = 0;
  270. lexer->token.text = temp;
  271. lexer->token.length += length;
  272. lexer->size = newSize;
  273. }
  274. /*--------------------------------------------------------------------------*/
  275. static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
  276. size_t bufferSize)
  277. {
  278. if (lexer) {
  279. if (lexer->file) {
  280. /* Convert CRLF -> LF explicitly. The C FILE "t"ext mode
  281. does not convert newlines on all platforms. Move any
  282. trailing CR to the start of the buffer for the next read. */
  283. size_t cr = lexer->cr;
  284. size_t n;
  285. buffer[0] = '\r';
  286. n = fread(buffer + cr, 1, bufferSize - cr, lexer->file);
  287. if (n) {
  288. char* o = buffer;
  289. const char* i = buffer;
  290. const char* e;
  291. n += cr;
  292. cr = (buffer[n - 1] == '\r') ? 1 : 0;
  293. e = buffer + n - cr;
  294. while (i != e) {
  295. if (i[0] == '\r' && i[1] == '\n') {
  296. ++i;
  297. }
  298. *o++ = *i++;
  299. }
  300. n = o - buffer;
  301. } else {
  302. n = cr;
  303. cr = 0;
  304. }
  305. lexer->cr = cr;
  306. return n;
  307. } else if (lexer->string_left) {
  308. int length = lexer->string_left;
  309. if ((int)bufferSize < length) {
  310. length = (int)bufferSize;
  311. }
  312. memcpy(buffer, lexer->string_position, length);
  313. lexer->string_position += length;
  314. lexer->string_left -= length;
  315. return length;
  316. }
  317. }
  318. return 0;
  319. }
  320. /*--------------------------------------------------------------------------*/
  321. static void cmListFileLexerInit(cmListFileLexer* lexer)
  322. {
  323. if (lexer->file || lexer->string_buffer) {
  324. cmListFileLexer_yylex_init(&lexer->scanner);
  325. cmListFileLexer_yyset_extra(lexer, lexer->scanner);
  326. }
  327. }
  328. /*--------------------------------------------------------------------------*/
  329. static void cmListFileLexerDestroy(cmListFileLexer* lexer)
  330. {
  331. cmListFileLexerSetToken(lexer, 0, 0);
  332. if (lexer->file || lexer->string_buffer) {
  333. cmListFileLexer_yylex_destroy(lexer->scanner);
  334. if (lexer->file) {
  335. fclose(lexer->file);
  336. lexer->file = 0;
  337. }
  338. if (lexer->string_buffer) {
  339. free(lexer->string_buffer);
  340. lexer->string_buffer = 0;
  341. lexer->string_left = 0;
  342. lexer->string_position = 0;
  343. }
  344. }
  345. }
  346. /*--------------------------------------------------------------------------*/
  347. cmListFileLexer* cmListFileLexer_New(void)
  348. {
  349. cmListFileLexer* lexer = (cmListFileLexer*)malloc(sizeof(cmListFileLexer));
  350. if (!lexer) {
  351. return 0;
  352. }
  353. memset(lexer, 0, sizeof(*lexer));
  354. lexer->line = 1;
  355. lexer->column = 1;
  356. return lexer;
  357. }
  358. /*--------------------------------------------------------------------------*/
  359. void cmListFileLexer_Delete(cmListFileLexer* lexer)
  360. {
  361. cmListFileLexer_SetFileName(lexer, 0, 0);
  362. free(lexer);
  363. }
  364. /*--------------------------------------------------------------------------*/
  365. static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
  366. {
  367. unsigned char b[2];
  368. if (fread(b, 1, 2, f) == 2) {
  369. if (b[0] == 0xEF && b[1] == 0xBB) {
  370. if (fread(b, 1, 1, f) == 1 && b[0] == 0xBF) {
  371. return cmListFileLexer_BOM_UTF8;
  372. }
  373. } else if (b[0] == 0xFE && b[1] == 0xFF) {
  374. /* UTF-16 BE */
  375. return cmListFileLexer_BOM_UTF16BE;
  376. } else if (b[0] == 0 && b[1] == 0) {
  377. if (fread(b, 1, 2, f) == 2 && b[0] == 0xFE && b[1] == 0xFF) {
  378. return cmListFileLexer_BOM_UTF32BE;
  379. }
  380. } else if (b[0] == 0xFF && b[1] == 0xFE) {
  381. fpos_t p;
  382. fgetpos(f, &p);
  383. if (fread(b, 1, 2, f) == 2 && b[0] == 0 && b[1] == 0) {
  384. return cmListFileLexer_BOM_UTF32LE;
  385. }
  386. if (fsetpos(f, &p) != 0) {
  387. return cmListFileLexer_BOM_Broken;
  388. }
  389. return cmListFileLexer_BOM_UTF16LE;
  390. }
  391. }
  392. if (fseek(f, 0, SEEK_SET) != 0) {
  393. return cmListFileLexer_BOM_Broken;
  394. }
  395. return cmListFileLexer_BOM_None;
  396. }
  397. /*--------------------------------------------------------------------------*/
  398. int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name,
  399. cmListFileLexer_BOM* bom)
  400. {
  401. int result = 1;
  402. cmListFileLexerDestroy(lexer);
  403. if (name) {
  404. #ifdef _WIN32
  405. wchar_t* wname = cmsysEncoding_DupToWide(name);
  406. lexer->file = _wfopen(wname, L"rb");
  407. free(wname);
  408. #else
  409. lexer->file = fopen(name, "rb");
  410. #endif
  411. if (lexer->file) {
  412. if (bom) {
  413. *bom = cmListFileLexer_ReadBOM(lexer->file);
  414. }
  415. } else {
  416. result = 0;
  417. }
  418. }
  419. cmListFileLexerInit(lexer);
  420. return result;
  421. }
  422. /*--------------------------------------------------------------------------*/
  423. int cmListFileLexer_SetString(cmListFileLexer* lexer, const char* text)
  424. {
  425. int result = 1;
  426. cmListFileLexerDestroy(lexer);
  427. if (text) {
  428. int length = (int)strlen(text);
  429. lexer->string_buffer = (char*)malloc(length + 1);
  430. if (lexer->string_buffer) {
  431. strcpy(lexer->string_buffer, text);
  432. lexer->string_position = lexer->string_buffer;
  433. lexer->string_left = length;
  434. } else {
  435. result = 0;
  436. }
  437. }
  438. cmListFileLexerInit(lexer);
  439. return result;
  440. }
  441. /*--------------------------------------------------------------------------*/
  442. cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer* lexer)
  443. {
  444. if (!lexer->file) {
  445. return 0;
  446. }
  447. if (cmListFileLexer_yylex(lexer->scanner, lexer)) {
  448. return &lexer->token;
  449. } else {
  450. cmListFileLexer_SetFileName(lexer, 0, 0);
  451. return 0;
  452. }
  453. }
  454. /*--------------------------------------------------------------------------*/
  455. long cmListFileLexer_GetCurrentLine(cmListFileLexer* lexer)
  456. {
  457. if (lexer->file) {
  458. return lexer->line;
  459. } else {
  460. return 0;
  461. }
  462. }
  463. /*--------------------------------------------------------------------------*/
  464. long cmListFileLexer_GetCurrentColumn(cmListFileLexer* lexer)
  465. {
  466. if (lexer->file) {
  467. return lexer->column;
  468. } else {
  469. return 0;
  470. }
  471. }
  472. /*--------------------------------------------------------------------------*/
  473. const char* cmListFileLexer_GetTypeAsString(cmListFileLexer* lexer,
  474. cmListFileLexer_Type type)
  475. {
  476. (void)lexer;
  477. switch (type) {
  478. case cmListFileLexer_Token_None:
  479. return "nothing";
  480. case cmListFileLexer_Token_Space:
  481. return "space";
  482. case cmListFileLexer_Token_Newline:
  483. return "newline";
  484. case cmListFileLexer_Token_Identifier:
  485. return "identifier";
  486. case cmListFileLexer_Token_ParenLeft:
  487. return "left paren";
  488. case cmListFileLexer_Token_ParenRight:
  489. return "right paren";
  490. case cmListFileLexer_Token_ArgumentUnquoted:
  491. return "unquoted argument";
  492. case cmListFileLexer_Token_ArgumentQuoted:
  493. return "quoted argument";
  494. case cmListFileLexer_Token_ArgumentBracket:
  495. return "bracket argument";
  496. case cmListFileLexer_Token_CommentBracket:
  497. return "bracket comment";
  498. case cmListFileLexer_Token_BadCharacter:
  499. return "bad character";
  500. case cmListFileLexer_Token_BadBracket:
  501. return "unterminated bracket";
  502. case cmListFileLexer_Token_BadString:
  503. return "unterminated string";
  504. }
  505. return "unknown token";
  506. }