json_scanner.re 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 7 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2018 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Jakub Zelenka <bukka@php.net> |
  16. +----------------------------------------------------------------------+
  17. */
  18. #include "php.h"
  19. #include "php_json_scanner.h"
  20. #include "php_json_scanner_defs.h"
  21. #include "php_json_parser.h"
  22. #include "json_parser.tab.h"
  23. #define YYCTYPE php_json_ctype
  24. #define YYCURSOR s->cursor
  25. #define YYLIMIT s->limit
  26. #define YYMARKER s->marker
  27. #define YYCTXMARKER s->ctxmarker
  28. #define YYGETCONDITION() s->state
  29. #define YYSETCONDITION(yystate) s->state = yystate
  30. #define YYFILL(n)
  31. #define PHP_JSON_CONDITION_SET(condition) YYSETCONDITION(yyc##condition)
  32. #define PHP_JSON_CONDITION_GOTO(condition) goto yyc_##condition
  33. #define PHP_JSON_CONDITION_SET_AND_GOTO(condition) \
  34. PHP_JSON_CONDITION_SET(condition); \
  35. PHP_JSON_CONDITION_GOTO(condition)
  36. #define PHP_JSON_CONDITION_GOTO_STR_P2() \
  37. do { \
  38. if (s->utf8_invalid) { \
  39. PHP_JSON_CONDITION_GOTO(STR_P2_BIN); \
  40. } else { \
  41. PHP_JSON_CONDITION_GOTO(STR_P2_UTF); \
  42. } \
  43. } while(0)
  44. #define PHP_JSON_SCANNER_COPY_ESC() php_json_scanner_copy_string(s, 0)
  45. #define PHP_JSON_SCANNER_COPY_UTF() php_json_scanner_copy_string(s, 5)
  46. #define PHP_JSON_SCANNER_COPY_UTF_SP() php_json_scanner_copy_string(s, 11)
  47. #define PHP_JSON_INT_MAX_LENGTH (MAX_LENGTH_OF_LONG - 1)
  48. static void php_json_scanner_copy_string(php_json_scanner *s, int esc_size)
  49. {
  50. size_t len = s->cursor - s->str_start - esc_size - 1;
  51. if (len) {
  52. memcpy(s->pstr, s->str_start, len);
  53. s->pstr += len;
  54. }
  55. }
  56. static int php_json_hex_to_int(char code)
  57. {
  58. if (code >= '0' && code <= '9') {
  59. return code - '0';
  60. } else if (code >= 'A' && code <= 'F') {
  61. return code - ('A' - 10);
  62. } else if (code >= 'a' && code <= 'f') {
  63. return code - ('a' - 10);
  64. } else {
  65. /* this should never happened (just to suppress compiler warning) */
  66. return -1;
  67. }
  68. }
  69. static int php_json_ucs2_to_int_ex(php_json_scanner *s, int size, int start)
  70. {
  71. int i, code = 0;
  72. php_json_ctype *pc = s->cursor - start;
  73. for (i = 0; i < size; i++) {
  74. code |= php_json_hex_to_int(*(pc--)) << (i * 4);
  75. }
  76. return code;
  77. }
  78. static int php_json_ucs2_to_int(php_json_scanner *s, int size)
  79. {
  80. return php_json_ucs2_to_int_ex(s, size, 1);
  81. }
  82. void php_json_scanner_init(php_json_scanner *s, char *str, size_t str_len, int options)
  83. {
  84. s->cursor = (php_json_ctype *) str;
  85. s->limit = (php_json_ctype *) str + str_len;
  86. s->options = options;
  87. PHP_JSON_CONDITION_SET(JS);
  88. }
  89. int php_json_scan(php_json_scanner *s)
  90. {
  91. ZVAL_NULL(&s->value);
  92. std:
  93. s->token = s->cursor;
  94. /*!re2c
  95. re2c:indent:top = 1;
  96. re2c:yyfill:enable = 0;
  97. DIGIT = [0-9] ;
  98. DIGITNZ = [1-9] ;
  99. UINT = "0" | ( DIGITNZ DIGIT* ) ;
  100. INT = "-"? UINT ;
  101. HEX = DIGIT | [a-fA-F] ;
  102. HEXNZ = DIGITNZ | [a-fA-F] ;
  103. HEX7 = [0-7] ;
  104. HEXC = DIGIT | [a-cA-C] ;
  105. FLOAT = INT "." DIGIT+ ;
  106. EXP = ( INT | FLOAT ) [eE] [+-]? DIGIT+ ;
  107. NL = "\r"? "\n" ;
  108. WS = [ \t\r]+ ;
  109. EOI = "\000";
  110. CTRL = [\x00-\x1F] ;
  111. UTF8T = [\x80-\xBF] ;
  112. UTF8_1 = [\x00-\x7F] ;
  113. UTF8_2 = [\xC2-\xDF] UTF8T ;
  114. UTF8_3A = "\xE0" [\xA0-\xBF] UTF8T ;
  115. UTF8_3B = [\xE1-\xEC] UTF8T{2} ;
  116. UTF8_3C = "\xED" [\x80-\x9F] UTF8T ;
  117. UTF8_3D = [\xEE-\xEF] UTF8T{2} ;
  118. UTF8_3 = UTF8_3A | UTF8_3B | UTF8_3C | UTF8_3D ;
  119. UTF8_4A = "\xF0"[\x90-\xBF] UTF8T{2} ;
  120. UTF8_4B = [\xF1-\xF3] UTF8T{3} ;
  121. UTF8_4C = "\xF4" [\x80-\x8F] UTF8T{2} ;
  122. UTF8_4 = UTF8_4A | UTF8_4B | UTF8_4C ;
  123. UTF8 = UTF8_1 | UTF8_2 | UTF8_3 | UTF8_4 ;
  124. ANY = [^] ;
  125. ESCPREF = "\\" ;
  126. ESCSYM = ( "\"" | "\\" | "/" | [bfnrt] ) ;
  127. ESC = ESCPREF ESCSYM ;
  128. UTFSYM = "u" ;
  129. UTFPREF = ESCPREF UTFSYM ;
  130. UCS2 = UTFPREF HEX{4} ;
  131. UTF16_1 = UTFPREF "00" HEX7 HEX ;
  132. UTF16_2 = UTFPREF "0" HEX7 HEX{2} ;
  133. UTF16_3 = UTFPREF ( ( ( HEXC | [efEF] ) HEX ) | ( [dD] HEX7 ) ) HEX{2} ;
  134. UTF16_4 = UTFPREF [dD] [89abAB] HEX{2} UTFPREF [dD] [c-fC-F] HEX{2} ;
  135. <JS>"{" { return '{'; }
  136. <JS>"}" { return '}'; }
  137. <JS>"[" { return '['; }
  138. <JS>"]" { return ']'; }
  139. <JS>":" { return ':'; }
  140. <JS>"," { return ','; }
  141. <JS>"null" {
  142. ZVAL_NULL(&s->value);
  143. return PHP_JSON_T_NUL;
  144. }
  145. <JS>"true" {
  146. ZVAL_TRUE(&s->value);
  147. return PHP_JSON_T_TRUE;
  148. }
  149. <JS>"false" {
  150. ZVAL_FALSE(&s->value);
  151. return PHP_JSON_T_FALSE;
  152. }
  153. <JS>INT {
  154. zend_bool bigint = 0, negative = s->token[0] == '-';
  155. size_t digits = (size_t) (s->cursor - s->token - negative);
  156. if (digits >= PHP_JSON_INT_MAX_LENGTH) {
  157. if (digits == PHP_JSON_INT_MAX_LENGTH) {
  158. int cmp = strncmp((char *) (s->token + negative), LONG_MIN_DIGITS, PHP_JSON_INT_MAX_LENGTH);
  159. if (!(cmp < 0 || (cmp == 0 && negative))) {
  160. bigint = 1;
  161. }
  162. } else {
  163. bigint = 1;
  164. }
  165. }
  166. if (!bigint) {
  167. ZVAL_LONG(&s->value, ZEND_STRTOL((char *) s->token, NULL, 10));
  168. return PHP_JSON_T_INT;
  169. } else if (s->options & PHP_JSON_BIGINT_AS_STRING) {
  170. ZVAL_STRINGL(&s->value, (char *) s->token, s->cursor - s->token);
  171. return PHP_JSON_T_STRING;
  172. } else {
  173. ZVAL_DOUBLE(&s->value, zend_strtod((char *) s->token, NULL));
  174. return PHP_JSON_T_DOUBLE;
  175. }
  176. }
  177. <JS>FLOAT|EXP {
  178. ZVAL_DOUBLE(&s->value, zend_strtod((char *) s->token, NULL));
  179. return PHP_JSON_T_DOUBLE;
  180. }
  181. <JS>NL|WS { goto std; }
  182. <JS>EOI {
  183. if (s->limit < s->cursor) {
  184. return PHP_JSON_T_EOI;
  185. } else {
  186. s->errcode = PHP_JSON_ERROR_CTRL_CHAR;
  187. return PHP_JSON_T_ERROR;
  188. }
  189. }
  190. <JS>["] {
  191. s->str_start = s->cursor;
  192. s->str_esc = 0;
  193. s->utf8_invalid = 0;
  194. s->utf8_invalid_count = 0;
  195. PHP_JSON_CONDITION_SET_AND_GOTO(STR_P1);
  196. }
  197. <JS>CTRL {
  198. s->errcode = PHP_JSON_ERROR_CTRL_CHAR;
  199. return PHP_JSON_T_ERROR;
  200. }
  201. <JS>UTF8 {
  202. s->errcode = PHP_JSON_ERROR_SYNTAX;
  203. return PHP_JSON_T_ERROR;
  204. }
  205. <JS>ANY {
  206. s->errcode = PHP_JSON_ERROR_UTF8;
  207. return PHP_JSON_T_ERROR;
  208. }
  209. <STR_P1>CTRL {
  210. s->errcode = PHP_JSON_ERROR_CTRL_CHAR;
  211. return PHP_JSON_T_ERROR;
  212. }
  213. <STR_P1>UTF16_1 {
  214. s->str_esc += 5;
  215. PHP_JSON_CONDITION_GOTO(STR_P1);
  216. }
  217. <STR_P1>UTF16_2 {
  218. s->str_esc += 4;
  219. PHP_JSON_CONDITION_GOTO(STR_P1);
  220. }
  221. <STR_P1>UTF16_3 {
  222. s->str_esc += 3;
  223. PHP_JSON_CONDITION_GOTO(STR_P1);
  224. }
  225. <STR_P1>UTF16_4 {
  226. s->str_esc += 8;
  227. PHP_JSON_CONDITION_GOTO(STR_P1);
  228. }
  229. <STR_P1>UCS2 {
  230. s->errcode = PHP_JSON_ERROR_UTF16;
  231. return PHP_JSON_T_ERROR;
  232. }
  233. <STR_P1>ESC {
  234. s->str_esc++;
  235. PHP_JSON_CONDITION_GOTO(STR_P1);
  236. }
  237. <STR_P1>ESCPREF {
  238. s->errcode = PHP_JSON_ERROR_SYNTAX;
  239. return PHP_JSON_T_ERROR;
  240. }
  241. <STR_P1>["] {
  242. zend_string *str;
  243. size_t len = s->cursor - s->str_start - s->str_esc - 1 + s->utf8_invalid_count;
  244. if (len == 0) {
  245. PHP_JSON_CONDITION_SET(JS);
  246. ZVAL_EMPTY_STRING(&s->value);
  247. return PHP_JSON_T_ESTRING;
  248. }
  249. str = zend_string_alloc(len, 0);
  250. ZSTR_VAL(str)[len] = '\0';
  251. ZVAL_STR(&s->value, str);
  252. if (s->str_esc || s->utf8_invalid) {
  253. s->pstr = (php_json_ctype *) Z_STRVAL(s->value);
  254. s->cursor = s->str_start;
  255. PHP_JSON_CONDITION_GOTO_STR_P2();
  256. } else {
  257. memcpy(Z_STRVAL(s->value), s->str_start, len);
  258. PHP_JSON_CONDITION_SET(JS);
  259. return PHP_JSON_T_STRING;
  260. }
  261. }
  262. <STR_P1>UTF8 { PHP_JSON_CONDITION_GOTO(STR_P1); }
  263. <STR_P1>ANY {
  264. if (s->options & (PHP_JSON_INVALID_UTF8_IGNORE | PHP_JSON_INVALID_UTF8_SUBSTITUTE)) {
  265. if (s->options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) {
  266. if (s->utf8_invalid_count > INT_MAX - 2) {
  267. s->errcode = PHP_JSON_ERROR_UTF8;
  268. return PHP_JSON_T_ERROR;
  269. }
  270. s->utf8_invalid_count += 2;
  271. } else {
  272. s->utf8_invalid_count--;
  273. }
  274. s->utf8_invalid = 1;
  275. PHP_JSON_CONDITION_GOTO(STR_P1);
  276. }
  277. s->errcode = PHP_JSON_ERROR_UTF8;
  278. return PHP_JSON_T_ERROR;
  279. }
  280. <STR_P2_UTF,STR_P2_BIN>UTF16_1 {
  281. int utf16 = php_json_ucs2_to_int(s, 2);
  282. PHP_JSON_SCANNER_COPY_UTF();
  283. *(s->pstr++) = (char) utf16;
  284. s->str_start = s->cursor;
  285. PHP_JSON_CONDITION_GOTO_STR_P2();
  286. }
  287. <STR_P2_UTF,STR_P2_BIN>UTF16_2 {
  288. int utf16 = php_json_ucs2_to_int(s, 3);
  289. PHP_JSON_SCANNER_COPY_UTF();
  290. *(s->pstr++) = (char) (0xc0 | (utf16 >> 6));
  291. *(s->pstr++) = (char) (0x80 | (utf16 & 0x3f));
  292. s->str_start = s->cursor;
  293. PHP_JSON_CONDITION_GOTO_STR_P2();
  294. }
  295. <STR_P2_UTF,STR_P2_BIN>UTF16_3 {
  296. int utf16 = php_json_ucs2_to_int(s, 4);
  297. PHP_JSON_SCANNER_COPY_UTF();
  298. *(s->pstr++) = (char) (0xe0 | (utf16 >> 12));
  299. *(s->pstr++) = (char) (0x80 | ((utf16 >> 6) & 0x3f));
  300. *(s->pstr++) = (char) (0x80 | (utf16 & 0x3f));
  301. s->str_start = s->cursor;
  302. PHP_JSON_CONDITION_GOTO_STR_P2();
  303. }
  304. <STR_P2_UTF,STR_P2_BIN>UTF16_4 {
  305. int utf32, utf16_hi, utf16_lo;
  306. utf16_hi = php_json_ucs2_to_int(s, 4);
  307. utf16_lo = php_json_ucs2_to_int_ex(s, 4, 7);
  308. utf32 = ((utf16_lo & 0x3FF) << 10) + (utf16_hi & 0x3FF) + 0x10000;
  309. PHP_JSON_SCANNER_COPY_UTF_SP();
  310. *(s->pstr++) = (char) (0xf0 | (utf32 >> 18));
  311. *(s->pstr++) = (char) (0x80 | ((utf32 >> 12) & 0x3f));
  312. *(s->pstr++) = (char) (0x80 | ((utf32 >> 6) & 0x3f));
  313. *(s->pstr++) = (char) (0x80 | (utf32 & 0x3f));
  314. s->str_start = s->cursor;
  315. PHP_JSON_CONDITION_GOTO_STR_P2();
  316. }
  317. <STR_P2_UTF,STR_P2_BIN>ESCPREF {
  318. char esc;
  319. PHP_JSON_SCANNER_COPY_ESC();
  320. switch (*s->cursor) {
  321. case 'b':
  322. esc = '\b';
  323. break;
  324. case 'f':
  325. esc = '\f'; break;
  326. case 'n':
  327. esc = '\n';
  328. break;
  329. case 'r':
  330. esc = '\r';
  331. break;
  332. case 't':
  333. esc = '\t';
  334. break;
  335. case '\\':
  336. case '/':
  337. case '"':
  338. esc = *s->cursor;
  339. break;
  340. default:
  341. s->errcode = PHP_JSON_ERROR_SYNTAX;
  342. return PHP_JSON_T_ERROR;
  343. }
  344. *(s->pstr++) = esc;
  345. ++YYCURSOR;
  346. s->str_start = s->cursor;
  347. PHP_JSON_CONDITION_GOTO_STR_P2();
  348. }
  349. <STR_P2_UTF,STR_P2_BIN>["] => JS {
  350. PHP_JSON_SCANNER_COPY_ESC();
  351. return PHP_JSON_T_STRING;
  352. }
  353. <STR_P2_BIN>UTF8 { PHP_JSON_CONDITION_GOTO(STR_P2_BIN); }
  354. <STR_P2_BIN>ANY {
  355. if (s->utf8_invalid) {
  356. PHP_JSON_SCANNER_COPY_ESC();
  357. if (s->options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) {
  358. *(s->pstr++) = (char) (0xe0 | (0xfffd >> 12));
  359. *(s->pstr++) = (char) (0x80 | ((0xfffd >> 6) & 0x3f));
  360. *(s->pstr++) = (char) (0x80 | (0xfffd & 0x3f));
  361. }
  362. s->str_start = s->cursor;
  363. }
  364. PHP_JSON_CONDITION_GOTO(STR_P2_BIN);
  365. }
  366. <STR_P2_UTF>ANY { PHP_JSON_CONDITION_GOTO(STR_P2_UTF); }
  367. <*>ANY {
  368. s->errcode = PHP_JSON_ERROR_SYNTAX;
  369. return PHP_JSON_T_ERROR;
  370. }
  371. */
  372. }