url_scanner_ex.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066
  1. /* Generated by re2c 0.14.3 */
  2. /*
  3. +----------------------------------------------------------------------+
  4. | PHP Version 5 |
  5. +----------------------------------------------------------------------+
  6. | Copyright (c) 1997-2016 The PHP Group |
  7. +----------------------------------------------------------------------+
  8. | This source file is subject to version 3.01 of the PHP license, |
  9. | that is bundled with this package in the file LICENSE, and is |
  10. | available through the world-wide-web at the following url: |
  11. | http://www.php.net/license/3_01.txt |
  12. | If you did not receive a copy of the PHP license and are unable to |
  13. | obtain it through the world-wide-web, please send a note to |
  14. | license@php.net so we can mail you a copy immediately. |
  15. +----------------------------------------------------------------------+
  16. | Author: Sascha Schumann <sascha@schumann.cx> |
  17. +----------------------------------------------------------------------+
  18. */
  19. /* $Id$ */
  20. #include "php.h"
  21. #ifdef HAVE_UNISTD_H
  22. #include <unistd.h>
  23. #endif
  24. #ifdef HAVE_LIMITS_H
  25. #include <limits.h>
  26. #endif
  27. #include <stdio.h>
  28. #include <stdlib.h>
  29. #include <string.h>
  30. #include "php_ini.h"
  31. #include "php_globals.h"
  32. #define STATE_TAG SOME_OTHER_STATE_TAG
  33. #include "basic_functions.h"
  34. #include "url.h"
  35. #undef STATE_TAG
  36. #define url_scanner url_scanner_ex
  37. #include "php_smart_str.h"
  38. static PHP_INI_MH(OnUpdateTags)
  39. {
  40. url_adapt_state_ex_t *ctx;
  41. char *key;
  42. char *lasts;
  43. char *tmp;
  44. ctx = &BG(url_adapt_state_ex);
  45. tmp = estrndup(new_value, new_value_length);
  46. if (ctx->tags)
  47. zend_hash_destroy(ctx->tags);
  48. else {
  49. ctx->tags = malloc(sizeof(HashTable));
  50. if (!ctx->tags) {
  51. return FAILURE;
  52. }
  53. }
  54. zend_hash_init(ctx->tags, 0, NULL, NULL, 1);
  55. for (key = php_strtok_r(tmp, ",", &lasts);
  56. key;
  57. key = php_strtok_r(NULL, ",", &lasts)) {
  58. char *val;
  59. val = strchr(key, '=');
  60. if (val) {
  61. char *q;
  62. int keylen;
  63. *val++ = '\0';
  64. for (q = key; *q; q++)
  65. *q = tolower(*q);
  66. keylen = q - key;
  67. /* key is stored withOUT NUL
  68. val is stored WITH NUL */
  69. zend_hash_add(ctx->tags, key, keylen, val, strlen(val)+1, NULL);
  70. }
  71. }
  72. efree(tmp);
  73. return SUCCESS;
  74. }
  75. PHP_INI_BEGIN()
  76. STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=,fieldset=", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals)
  77. PHP_INI_END()
  78. #define YYFILL(n) goto done
  79. #define YYCTYPE unsigned char
  80. #define YYCURSOR p
  81. #define YYLIMIT q
  82. #define YYMARKER r
  83. static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator TSRMLS_DC)
  84. {
  85. register const char *p, *q;
  86. const char *bash = NULL;
  87. const char *sep = "?";
  88. /*
  89. * Don't modify "//example.com" full path, unless
  90. * HTTP_HOST matches.
  91. */
  92. if (url->c[0] == '/' && url->c[1] == '/') {
  93. zval **tmp, **http_host;
  94. size_t target_len, host_len;
  95. if (zend_hash_find(&EG(symbol_table), "_SERVER", sizeof("_SERVER"), (void **)&tmp) == FAILURE
  96. || Z_TYPE_PP(tmp) != IS_ARRAY
  97. || zend_hash_find(Z_ARRVAL_PP(tmp), "HTTP_HOST", sizeof("HTTP_HOST"), (void **)&http_host) == FAILURE
  98. || Z_TYPE_PP(http_host) != IS_STRING) {
  99. smart_str_append(dest, url);
  100. return;
  101. }
  102. /* HTTP_HOST could be "example.com:8888", etc. */
  103. /* Need to find end of URL in buffer */
  104. host_len = strcspn(Z_STRVAL_PP(http_host), ":");
  105. target_len = strcspn(url->c+2, "/\"'?>\r\n");
  106. if (host_len
  107. && host_len == target_len
  108. && strncasecmp(Z_STRVAL_PP(http_host), url->c+2, host_len)) {
  109. smart_str_append(dest, url);
  110. return;
  111. }
  112. }
  113. q = (p = url->c) + url->len;
  114. scan:
  115. {
  116. YYCTYPE yych;
  117. static const unsigned char yybm[] = {
  118. 128, 128, 128, 128, 128, 128, 128, 128,
  119. 128, 128, 128, 128, 128, 128, 128, 128,
  120. 128, 128, 128, 128, 128, 128, 128, 128,
  121. 128, 128, 128, 128, 128, 128, 128, 128,
  122. 128, 128, 128, 0, 128, 128, 128, 128,
  123. 128, 128, 128, 128, 128, 128, 128, 128,
  124. 128, 128, 128, 128, 128, 128, 128, 128,
  125. 128, 128, 0, 128, 128, 128, 128, 0,
  126. 128, 128, 128, 128, 128, 128, 128, 128,
  127. 128, 128, 128, 128, 128, 128, 128, 128,
  128. 128, 128, 128, 128, 128, 128, 128, 128,
  129. 128, 128, 128, 128, 128, 128, 128, 128,
  130. 128, 128, 128, 128, 128, 128, 128, 128,
  131. 128, 128, 128, 128, 128, 128, 128, 128,
  132. 128, 128, 128, 128, 128, 128, 128, 128,
  133. 128, 128, 128, 128, 128, 128, 128, 128,
  134. 128, 128, 128, 128, 128, 128, 128, 128,
  135. 128, 128, 128, 128, 128, 128, 128, 128,
  136. 128, 128, 128, 128, 128, 128, 128, 128,
  137. 128, 128, 128, 128, 128, 128, 128, 128,
  138. 128, 128, 128, 128, 128, 128, 128, 128,
  139. 128, 128, 128, 128, 128, 128, 128, 128,
  140. 128, 128, 128, 128, 128, 128, 128, 128,
  141. 128, 128, 128, 128, 128, 128, 128, 128,
  142. 128, 128, 128, 128, 128, 128, 128, 128,
  143. 128, 128, 128, 128, 128, 128, 128, 128,
  144. 128, 128, 128, 128, 128, 128, 128, 128,
  145. 128, 128, 128, 128, 128, 128, 128, 128,
  146. 128, 128, 128, 128, 128, 128, 128, 128,
  147. 128, 128, 128, 128, 128, 128, 128, 128,
  148. 128, 128, 128, 128, 128, 128, 128, 128,
  149. 128, 128, 128, 128, 128, 128, 128, 128,
  150. };
  151. if (YYLIMIT <= YYCURSOR) YYFILL(1);
  152. yych = *YYCURSOR;
  153. if (yybm[0+yych] & 128) {
  154. goto yy8;
  155. }
  156. if (yych <= '#') goto yy6;
  157. if (yych >= ';') goto yy4;
  158. ++YYCURSOR;
  159. { smart_str_append(dest, url); return; }
  160. yy4:
  161. ++YYCURSOR;
  162. { sep = separator; goto scan; }
  163. yy6:
  164. ++YYCURSOR;
  165. { bash = p - 1; goto done; }
  166. yy8:
  167. ++YYCURSOR;
  168. if (YYLIMIT <= YYCURSOR) YYFILL(1);
  169. yych = *YYCURSOR;
  170. if (yybm[0+yych] & 128) {
  171. goto yy8;
  172. }
  173. { goto scan; }
  174. }
  175. done:
  176. /* Don't modify URLs of the format "#mark" */
  177. if (bash && bash - url->c == 0) {
  178. smart_str_append(dest, url);
  179. return;
  180. }
  181. if (bash)
  182. smart_str_appendl(dest, url->c, bash - url->c);
  183. else
  184. smart_str_append(dest, url);
  185. smart_str_appends(dest, sep);
  186. smart_str_append(dest, url_app);
  187. if (bash)
  188. smart_str_appendl(dest, bash, q - bash);
  189. }
  190. #undef YYFILL
  191. #undef YYCTYPE
  192. #undef YYCURSOR
  193. #undef YYLIMIT
  194. #undef YYMARKER
  195. static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type TSRMLS_DC)
  196. {
  197. char f = 0;
  198. if (strncasecmp(ctx->arg.c, ctx->lookup_data, ctx->arg.len) == 0)
  199. f = 1;
  200. if (quotes)
  201. smart_str_appendc(&ctx->result, type);
  202. if (f) {
  203. append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output TSRMLS_CC);
  204. } else {
  205. smart_str_append(&ctx->result, &ctx->val);
  206. }
  207. if (quotes)
  208. smart_str_appendc(&ctx->result, type);
  209. }
  210. enum {
  211. STATE_PLAIN = 0,
  212. STATE_TAG,
  213. STATE_NEXT_ARG,
  214. STATE_ARG,
  215. STATE_BEFORE_VAL,
  216. STATE_VAL
  217. };
  218. #define YYFILL(n) goto stop
  219. #define YYCTYPE unsigned char
  220. #define YYCURSOR xp
  221. #define YYLIMIT end
  222. #define YYMARKER q
  223. #define STATE ctx->state
  224. #define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR TSRMLS_DC
  225. #define STD_ARGS ctx, start, xp TSRMLS_CC
  226. #if SCANNER_DEBUG
  227. #define scdebug(x) printf x
  228. #else
  229. #define scdebug(x)
  230. #endif
  231. static inline void passthru(STD_PARA)
  232. {
  233. scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start));
  234. smart_str_appendl(&ctx->result, start, YYCURSOR - start);
  235. }
  236. /*
  237. * This function appends a hidden input field after a <form> or
  238. * <fieldset>. The latter is important for XHTML.
  239. */
  240. static void handle_form(STD_PARA)
  241. {
  242. int doit = 0;
  243. if (ctx->form_app.len > 0) {
  244. switch (ctx->tag.len) {
  245. case sizeof("form") - 1:
  246. if (!strncasecmp(ctx->tag.c, "form", sizeof("form") - 1)) {
  247. doit = 1;
  248. }
  249. if (doit && ctx->val.c && ctx->lookup_data && *ctx->lookup_data) {
  250. char *e, *p = zend_memnstr(ctx->val.c, "://", sizeof("://") - 1, ctx->val.c + ctx->val.len);
  251. if (p) {
  252. e = memchr(p, '/', (ctx->val.c + ctx->val.len) - p);
  253. if (!e) {
  254. e = ctx->val.c + ctx->val.len;
  255. }
  256. if ((e - p) && strncasecmp(p, ctx->lookup_data, (e - p))) {
  257. doit = 0;
  258. }
  259. }
  260. }
  261. break;
  262. case sizeof("fieldset") - 1:
  263. if (!strncasecmp(ctx->tag.c, "fieldset", sizeof("fieldset") - 1)) {
  264. doit = 1;
  265. }
  266. break;
  267. }
  268. if (doit)
  269. smart_str_append(&ctx->result, &ctx->form_app);
  270. }
  271. }
  272. /*
  273. * HANDLE_TAG copies the HTML Tag and checks whether we
  274. * have that tag in our table. If we might modify it,
  275. * we continue to scan the tag, otherwise we simply copy the complete
  276. * HTML stuff to the result buffer.
  277. */
  278. static inline void handle_tag(STD_PARA)
  279. {
  280. int ok = 0;
  281. unsigned int i;
  282. ctx->tag.len = 0;
  283. smart_str_appendl(&ctx->tag, start, YYCURSOR - start);
  284. for (i = 0; i < ctx->tag.len; i++)
  285. ctx->tag.c[i] = tolower((int)(unsigned char)ctx->tag.c[i]);
  286. if (zend_hash_find(ctx->tags, ctx->tag.c, ctx->tag.len, (void **) &ctx->lookup_data) == SUCCESS)
  287. ok = 1;
  288. STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN;
  289. }
  290. static inline void handle_arg(STD_PARA)
  291. {
  292. ctx->arg.len = 0;
  293. smart_str_appendl(&ctx->arg, start, YYCURSOR - start);
  294. }
  295. static inline void handle_val(STD_PARA, char quotes, char type)
  296. {
  297. smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2);
  298. tag_arg(ctx, quotes, type TSRMLS_CC);
  299. }
  300. static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen TSRMLS_DC)
  301. {
  302. char *end, *q;
  303. char *xp;
  304. char *start;
  305. int rest;
  306. smart_str_appendl(&ctx->buf, newdata, newlen);
  307. YYCURSOR = ctx->buf.c;
  308. YYLIMIT = ctx->buf.c + ctx->buf.len;
  309. switch (STATE) {
  310. case STATE_PLAIN: goto state_plain;
  311. case STATE_TAG: goto state_tag;
  312. case STATE_NEXT_ARG: goto state_next_arg;
  313. case STATE_ARG: goto state_arg;
  314. case STATE_BEFORE_VAL: goto state_before_val;
  315. case STATE_VAL: goto state_val;
  316. }
  317. state_plain_begin:
  318. STATE = STATE_PLAIN;
  319. state_plain:
  320. start = YYCURSOR;
  321. {
  322. YYCTYPE yych;
  323. static const unsigned char yybm[] = {
  324. 128, 128, 128, 128, 128, 128, 128, 128,
  325. 128, 128, 128, 128, 128, 128, 128, 128,
  326. 128, 128, 128, 128, 128, 128, 128, 128,
  327. 128, 128, 128, 128, 128, 128, 128, 128,
  328. 128, 128, 128, 128, 128, 128, 128, 128,
  329. 128, 128, 128, 128, 128, 128, 128, 128,
  330. 128, 128, 128, 128, 128, 128, 128, 128,
  331. 128, 128, 128, 128, 0, 128, 128, 128,
  332. 128, 128, 128, 128, 128, 128, 128, 128,
  333. 128, 128, 128, 128, 128, 128, 128, 128,
  334. 128, 128, 128, 128, 128, 128, 128, 128,
  335. 128, 128, 128, 128, 128, 128, 128, 128,
  336. 128, 128, 128, 128, 128, 128, 128, 128,
  337. 128, 128, 128, 128, 128, 128, 128, 128,
  338. 128, 128, 128, 128, 128, 128, 128, 128,
  339. 128, 128, 128, 128, 128, 128, 128, 128,
  340. 128, 128, 128, 128, 128, 128, 128, 128,
  341. 128, 128, 128, 128, 128, 128, 128, 128,
  342. 128, 128, 128, 128, 128, 128, 128, 128,
  343. 128, 128, 128, 128, 128, 128, 128, 128,
  344. 128, 128, 128, 128, 128, 128, 128, 128,
  345. 128, 128, 128, 128, 128, 128, 128, 128,
  346. 128, 128, 128, 128, 128, 128, 128, 128,
  347. 128, 128, 128, 128, 128, 128, 128, 128,
  348. 128, 128, 128, 128, 128, 128, 128, 128,
  349. 128, 128, 128, 128, 128, 128, 128, 128,
  350. 128, 128, 128, 128, 128, 128, 128, 128,
  351. 128, 128, 128, 128, 128, 128, 128, 128,
  352. 128, 128, 128, 128, 128, 128, 128, 128,
  353. 128, 128, 128, 128, 128, 128, 128, 128,
  354. 128, 128, 128, 128, 128, 128, 128, 128,
  355. 128, 128, 128, 128, 128, 128, 128, 128,
  356. };
  357. if (YYLIMIT <= YYCURSOR) YYFILL(1);
  358. yych = *YYCURSOR;
  359. if (yybm[0+yych] & 128) {
  360. goto yy15;
  361. }
  362. ++YYCURSOR;
  363. { passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
  364. yy15:
  365. ++YYCURSOR;
  366. if (YYLIMIT <= YYCURSOR) YYFILL(1);
  367. yych = *YYCURSOR;
  368. if (yybm[0+yych] & 128) {
  369. goto yy15;
  370. }
  371. { passthru(STD_ARGS); goto state_plain; }
  372. }
  373. state_tag:
  374. start = YYCURSOR;
  375. {
  376. YYCTYPE yych;
  377. static const unsigned char yybm[] = {
  378. 0, 0, 0, 0, 0, 0, 0, 0,
  379. 0, 0, 0, 0, 0, 0, 0, 0,
  380. 0, 0, 0, 0, 0, 0, 0, 0,
  381. 0, 0, 0, 0, 0, 0, 0, 0,
  382. 0, 0, 0, 0, 0, 0, 0, 0,
  383. 0, 0, 0, 0, 0, 0, 0, 0,
  384. 0, 0, 0, 0, 0, 0, 0, 0,
  385. 0, 0, 128, 0, 0, 0, 0, 0,
  386. 0, 128, 128, 128, 128, 128, 128, 128,
  387. 128, 128, 128, 128, 128, 128, 128, 128,
  388. 128, 128, 128, 128, 128, 128, 128, 128,
  389. 128, 128, 128, 0, 0, 0, 0, 0,
  390. 0, 128, 128, 128, 128, 128, 128, 128,
  391. 128, 128, 128, 128, 128, 128, 128, 128,
  392. 128, 128, 128, 128, 128, 128, 128, 128,
  393. 128, 128, 128, 0, 0, 0, 0, 0,
  394. 0, 0, 0, 0, 0, 0, 0, 0,
  395. 0, 0, 0, 0, 0, 0, 0, 0,
  396. 0, 0, 0, 0, 0, 0, 0, 0,
  397. 0, 0, 0, 0, 0, 0, 0, 0,
  398. 0, 0, 0, 0, 0, 0, 0, 0,
  399. 0, 0, 0, 0, 0, 0, 0, 0,
  400. 0, 0, 0, 0, 0, 0, 0, 0,
  401. 0, 0, 0, 0, 0, 0, 0, 0,
  402. 0, 0, 0, 0, 0, 0, 0, 0,
  403. 0, 0, 0, 0, 0, 0, 0, 0,
  404. 0, 0, 0, 0, 0, 0, 0, 0,
  405. 0, 0, 0, 0, 0, 0, 0, 0,
  406. 0, 0, 0, 0, 0, 0, 0, 0,
  407. 0, 0, 0, 0, 0, 0, 0, 0,
  408. 0, 0, 0, 0, 0, 0, 0, 0,
  409. 0, 0, 0, 0, 0, 0, 0, 0,
  410. };
  411. if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
  412. yych = *YYCURSOR;
  413. if (yych <= '@') {
  414. if (yych != ':') goto yy22;
  415. } else {
  416. if (yych <= 'Z') goto yy20;
  417. if (yych <= '`') goto yy22;
  418. if (yych >= '{') goto yy22;
  419. }
  420. yy20:
  421. ++YYCURSOR;
  422. yych = *YYCURSOR;
  423. goto yy25;
  424. yy21:
  425. { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
  426. yy22:
  427. ++YYCURSOR;
  428. { passthru(STD_ARGS); goto state_plain_begin; }
  429. yy24:
  430. ++YYCURSOR;
  431. if (YYLIMIT <= YYCURSOR) YYFILL(1);
  432. yych = *YYCURSOR;
  433. yy25:
  434. if (yybm[0+yych] & 128) {
  435. goto yy24;
  436. }
  437. goto yy21;
  438. }
  439. state_next_arg_begin:
  440. STATE = STATE_NEXT_ARG;
  441. state_next_arg:
  442. start = YYCURSOR;
  443. {
  444. YYCTYPE yych;
  445. static const unsigned char yybm[] = {
  446. 0, 0, 0, 0, 0, 0, 0, 0,
  447. 0, 128, 128, 128, 0, 128, 0, 0,
  448. 0, 0, 0, 0, 0, 0, 0, 0,
  449. 0, 0, 0, 0, 0, 0, 0, 0,
  450. 128, 0, 0, 0, 0, 0, 0, 0,
  451. 0, 0, 0, 0, 0, 0, 0, 0,
  452. 0, 0, 0, 0, 0, 0, 0, 0,
  453. 0, 0, 0, 0, 0, 0, 0, 0,
  454. 0, 0, 0, 0, 0, 0, 0, 0,
  455. 0, 0, 0, 0, 0, 0, 0, 0,
  456. 0, 0, 0, 0, 0, 0, 0, 0,
  457. 0, 0, 0, 0, 0, 0, 0, 0,
  458. 0, 0, 0, 0, 0, 0, 0, 0,
  459. 0, 0, 0, 0, 0, 0, 0, 0,
  460. 0, 0, 0, 0, 0, 0, 0, 0,
  461. 0, 0, 0, 0, 0, 0, 0, 0,
  462. 0, 0, 0, 0, 0, 0, 0, 0,
  463. 0, 0, 0, 0, 0, 0, 0, 0,
  464. 0, 0, 0, 0, 0, 0, 0, 0,
  465. 0, 0, 0, 0, 0, 0, 0, 0,
  466. 0, 0, 0, 0, 0, 0, 0, 0,
  467. 0, 0, 0, 0, 0, 0, 0, 0,
  468. 0, 0, 0, 0, 0, 0, 0, 0,
  469. 0, 0, 0, 0, 0, 0, 0, 0,
  470. 0, 0, 0, 0, 0, 0, 0, 0,
  471. 0, 0, 0, 0, 0, 0, 0, 0,
  472. 0, 0, 0, 0, 0, 0, 0, 0,
  473. 0, 0, 0, 0, 0, 0, 0, 0,
  474. 0, 0, 0, 0, 0, 0, 0, 0,
  475. 0, 0, 0, 0, 0, 0, 0, 0,
  476. 0, 0, 0, 0, 0, 0, 0, 0,
  477. 0, 0, 0, 0, 0, 0, 0, 0,
  478. };
  479. if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
  480. yych = *YYCURSOR;
  481. if (yych <= '.') {
  482. if (yych <= '\f') {
  483. if (yych <= 0x08) goto yy36;
  484. if (yych <= '\v') goto yy32;
  485. goto yy36;
  486. } else {
  487. if (yych <= '\r') goto yy32;
  488. if (yych == ' ') goto yy32;
  489. goto yy36;
  490. }
  491. } else {
  492. if (yych <= '@') {
  493. if (yych <= '/') goto yy28;
  494. if (yych == '>') goto yy30;
  495. goto yy36;
  496. } else {
  497. if (yych <= 'Z') goto yy34;
  498. if (yych <= '`') goto yy36;
  499. if (yych <= 'z') goto yy34;
  500. goto yy36;
  501. }
  502. }
  503. yy28:
  504. ++YYCURSOR;
  505. if ((yych = *YYCURSOR) == '>') goto yy39;
  506. yy29:
  507. { passthru(STD_ARGS); goto state_plain_begin; }
  508. yy30:
  509. ++YYCURSOR;
  510. yy31:
  511. { passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
  512. yy32:
  513. ++YYCURSOR;
  514. yych = *YYCURSOR;
  515. goto yy38;
  516. yy33:
  517. { passthru(STD_ARGS); goto state_next_arg; }
  518. yy34:
  519. ++YYCURSOR;
  520. { --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
  521. yy36:
  522. yych = *++YYCURSOR;
  523. goto yy29;
  524. yy37:
  525. ++YYCURSOR;
  526. if (YYLIMIT <= YYCURSOR) YYFILL(1);
  527. yych = *YYCURSOR;
  528. yy38:
  529. if (yybm[0+yych] & 128) {
  530. goto yy37;
  531. }
  532. goto yy33;
  533. yy39:
  534. ++YYCURSOR;
  535. yych = *YYCURSOR;
  536. goto yy31;
  537. }
  538. state_arg:
  539. start = YYCURSOR;
  540. {
  541. YYCTYPE yych;
  542. static const unsigned char yybm[] = {
  543. 0, 0, 0, 0, 0, 0, 0, 0,
  544. 0, 0, 0, 0, 0, 0, 0, 0,
  545. 0, 0, 0, 0, 0, 0, 0, 0,
  546. 0, 0, 0, 0, 0, 0, 0, 0,
  547. 0, 0, 0, 0, 0, 0, 0, 0,
  548. 0, 0, 0, 0, 0, 128, 0, 0,
  549. 0, 0, 0, 0, 0, 0, 0, 0,
  550. 0, 0, 0, 0, 0, 0, 0, 0,
  551. 0, 128, 128, 128, 128, 128, 128, 128,
  552. 128, 128, 128, 128, 128, 128, 128, 128,
  553. 128, 128, 128, 128, 128, 128, 128, 128,
  554. 128, 128, 128, 0, 0, 0, 0, 0,
  555. 0, 128, 128, 128, 128, 128, 128, 128,
  556. 128, 128, 128, 128, 128, 128, 128, 128,
  557. 128, 128, 128, 128, 128, 128, 128, 128,
  558. 128, 128, 128, 0, 0, 0, 0, 0,
  559. 0, 0, 0, 0, 0, 0, 0, 0,
  560. 0, 0, 0, 0, 0, 0, 0, 0,
  561. 0, 0, 0, 0, 0, 0, 0, 0,
  562. 0, 0, 0, 0, 0, 0, 0, 0,
  563. 0, 0, 0, 0, 0, 0, 0, 0,
  564. 0, 0, 0, 0, 0, 0, 0, 0,
  565. 0, 0, 0, 0, 0, 0, 0, 0,
  566. 0, 0, 0, 0, 0, 0, 0, 0,
  567. 0, 0, 0, 0, 0, 0, 0, 0,
  568. 0, 0, 0, 0, 0, 0, 0, 0,
  569. 0, 0, 0, 0, 0, 0, 0, 0,
  570. 0, 0, 0, 0, 0, 0, 0, 0,
  571. 0, 0, 0, 0, 0, 0, 0, 0,
  572. 0, 0, 0, 0, 0, 0, 0, 0,
  573. 0, 0, 0, 0, 0, 0, 0, 0,
  574. 0, 0, 0, 0, 0, 0, 0, 0,
  575. };
  576. if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
  577. yych = *YYCURSOR;
  578. if (yych <= '@') goto yy44;
  579. if (yych <= 'Z') goto yy42;
  580. if (yych <= '`') goto yy44;
  581. if (yych >= '{') goto yy44;
  582. yy42:
  583. ++YYCURSOR;
  584. yych = *YYCURSOR;
  585. goto yy47;
  586. yy43:
  587. { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
  588. yy44:
  589. ++YYCURSOR;
  590. { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
  591. yy46:
  592. ++YYCURSOR;
  593. if (YYLIMIT <= YYCURSOR) YYFILL(1);
  594. yych = *YYCURSOR;
  595. yy47:
  596. if (yybm[0+yych] & 128) {
  597. goto yy46;
  598. }
  599. goto yy43;
  600. }
  601. state_before_val:
  602. start = YYCURSOR;
  603. {
  604. YYCTYPE yych;
  605. static const unsigned char yybm[] = {
  606. 0, 0, 0, 0, 0, 0, 0, 0,
  607. 0, 0, 0, 0, 0, 0, 0, 0,
  608. 0, 0, 0, 0, 0, 0, 0, 0,
  609. 0, 0, 0, 0, 0, 0, 0, 0,
  610. 128, 0, 0, 0, 0, 0, 0, 0,
  611. 0, 0, 0, 0, 0, 0, 0, 0,
  612. 0, 0, 0, 0, 0, 0, 0, 0,
  613. 0, 0, 0, 0, 0, 0, 0, 0,
  614. 0, 0, 0, 0, 0, 0, 0, 0,
  615. 0, 0, 0, 0, 0, 0, 0, 0,
  616. 0, 0, 0, 0, 0, 0, 0, 0,
  617. 0, 0, 0, 0, 0, 0, 0, 0,
  618. 0, 0, 0, 0, 0, 0, 0, 0,
  619. 0, 0, 0, 0, 0, 0, 0, 0,
  620. 0, 0, 0, 0, 0, 0, 0, 0,
  621. 0, 0, 0, 0, 0, 0, 0, 0,
  622. 0, 0, 0, 0, 0, 0, 0, 0,
  623. 0, 0, 0, 0, 0, 0, 0, 0,
  624. 0, 0, 0, 0, 0, 0, 0, 0,
  625. 0, 0, 0, 0, 0, 0, 0, 0,
  626. 0, 0, 0, 0, 0, 0, 0, 0,
  627. 0, 0, 0, 0, 0, 0, 0, 0,
  628. 0, 0, 0, 0, 0, 0, 0, 0,
  629. 0, 0, 0, 0, 0, 0, 0, 0,
  630. 0, 0, 0, 0, 0, 0, 0, 0,
  631. 0, 0, 0, 0, 0, 0, 0, 0,
  632. 0, 0, 0, 0, 0, 0, 0, 0,
  633. 0, 0, 0, 0, 0, 0, 0, 0,
  634. 0, 0, 0, 0, 0, 0, 0, 0,
  635. 0, 0, 0, 0, 0, 0, 0, 0,
  636. 0, 0, 0, 0, 0, 0, 0, 0,
  637. 0, 0, 0, 0, 0, 0, 0, 0,
  638. };
  639. if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
  640. yych = *YYCURSOR;
  641. if (yych == ' ') goto yy50;
  642. if (yych == '=') goto yy52;
  643. goto yy54;
  644. yy50:
  645. yych = *(YYMARKER = ++YYCURSOR);
  646. if (yych == ' ') goto yy57;
  647. if (yych == '=') goto yy55;
  648. yy51:
  649. { --YYCURSOR; goto state_next_arg_begin; }
  650. yy52:
  651. ++YYCURSOR;
  652. yych = *YYCURSOR;
  653. goto yy56;
  654. yy53:
  655. { passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
  656. yy54:
  657. yych = *++YYCURSOR;
  658. goto yy51;
  659. yy55:
  660. ++YYCURSOR;
  661. if (YYLIMIT <= YYCURSOR) YYFILL(1);
  662. yych = *YYCURSOR;
  663. yy56:
  664. if (yybm[0+yych] & 128) {
  665. goto yy55;
  666. }
  667. goto yy53;
  668. yy57:
  669. ++YYCURSOR;
  670. if (YYLIMIT <= YYCURSOR) YYFILL(1);
  671. yych = *YYCURSOR;
  672. if (yych == ' ') goto yy57;
  673. if (yych == '=') goto yy55;
  674. YYCURSOR = YYMARKER;
  675. goto yy51;
  676. }
  677. state_val:
  678. start = YYCURSOR;
  679. {
  680. YYCTYPE yych;
  681. static const unsigned char yybm[] = {
  682. 224, 224, 224, 224, 224, 224, 224, 224,
  683. 224, 192, 192, 224, 224, 192, 224, 224,
  684. 224, 224, 224, 224, 224, 224, 224, 224,
  685. 224, 224, 224, 224, 224, 224, 224, 224,
  686. 192, 224, 64, 224, 224, 224, 224, 128,
  687. 224, 224, 224, 224, 224, 224, 224, 224,
  688. 224, 224, 224, 224, 224, 224, 224, 224,
  689. 224, 224, 224, 224, 224, 224, 0, 224,
  690. 224, 224, 224, 224, 224, 224, 224, 224,
  691. 224, 224, 224, 224, 224, 224, 224, 224,
  692. 224, 224, 224, 224, 224, 224, 224, 224,
  693. 224, 224, 224, 224, 224, 224, 224, 224,
  694. 224, 224, 224, 224, 224, 224, 224, 224,
  695. 224, 224, 224, 224, 224, 224, 224, 224,
  696. 224, 224, 224, 224, 224, 224, 224, 224,
  697. 224, 224, 224, 224, 224, 224, 224, 224,
  698. 224, 224, 224, 224, 224, 224, 224, 224,
  699. 224, 224, 224, 224, 224, 224, 224, 224,
  700. 224, 224, 224, 224, 224, 224, 224, 224,
  701. 224, 224, 224, 224, 224, 224, 224, 224,
  702. 224, 224, 224, 224, 224, 224, 224, 224,
  703. 224, 224, 224, 224, 224, 224, 224, 224,
  704. 224, 224, 224, 224, 224, 224, 224, 224,
  705. 224, 224, 224, 224, 224, 224, 224, 224,
  706. 224, 224, 224, 224, 224, 224, 224, 224,
  707. 224, 224, 224, 224, 224, 224, 224, 224,
  708. 224, 224, 224, 224, 224, 224, 224, 224,
  709. 224, 224, 224, 224, 224, 224, 224, 224,
  710. 224, 224, 224, 224, 224, 224, 224, 224,
  711. 224, 224, 224, 224, 224, 224, 224, 224,
  712. 224, 224, 224, 224, 224, 224, 224, 224,
  713. 224, 224, 224, 224, 224, 224, 224, 224,
  714. };
  715. if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
  716. yych = *YYCURSOR;
  717. if (yych <= ' ') {
  718. if (yych <= '\f') {
  719. if (yych <= 0x08) goto yy65;
  720. if (yych <= '\n') goto yy67;
  721. goto yy65;
  722. } else {
  723. if (yych <= '\r') goto yy67;
  724. if (yych <= 0x1F) goto yy65;
  725. goto yy67;
  726. }
  727. } else {
  728. if (yych <= '&') {
  729. if (yych != '"') goto yy65;
  730. } else {
  731. if (yych <= '\'') goto yy64;
  732. if (yych == '>') goto yy67;
  733. goto yy65;
  734. }
  735. }
  736. yych = *(YYMARKER = ++YYCURSOR);
  737. if (yych != '>') goto yy76;
  738. yy63:
  739. { passthru(STD_ARGS); goto state_next_arg_begin; }
  740. yy64:
  741. yych = *(YYMARKER = ++YYCURSOR);
  742. if (yych == '>') goto yy63;
  743. goto yy71;
  744. yy65:
  745. ++YYCURSOR;
  746. yych = *YYCURSOR;
  747. goto yy69;
  748. yy66:
  749. { handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; }
  750. yy67:
  751. yych = *++YYCURSOR;
  752. goto yy63;
  753. yy68:
  754. ++YYCURSOR;
  755. if (YYLIMIT <= YYCURSOR) YYFILL(1);
  756. yych = *YYCURSOR;
  757. yy69:
  758. if (yybm[0+yych] & 32) {
  759. goto yy68;
  760. }
  761. goto yy66;
  762. yy70:
  763. ++YYCURSOR;
  764. if (YYLIMIT <= YYCURSOR) YYFILL(1);
  765. yych = *YYCURSOR;
  766. yy71:
  767. if (yybm[0+yych] & 64) {
  768. goto yy70;
  769. }
  770. if (yych <= '\'') goto yy73;
  771. yy72:
  772. YYCURSOR = YYMARKER;
  773. goto yy63;
  774. yy73:
  775. ++YYCURSOR;
  776. { handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
  777. yy75:
  778. ++YYCURSOR;
  779. if (YYLIMIT <= YYCURSOR) YYFILL(1);
  780. yych = *YYCURSOR;
  781. yy76:
  782. if (yybm[0+yych] & 128) {
  783. goto yy75;
  784. }
  785. if (yych >= '#') goto yy72;
  786. ++YYCURSOR;
  787. { handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
  788. }
  789. stop:
  790. rest = YYLIMIT - start;
  791. scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
  792. /* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
  793. if (rest < 0) rest = 0;
  794. if (rest) memmove(ctx->buf.c, start, rest);
  795. ctx->buf.len = rest;
  796. }
  797. char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen TSRMLS_DC)
  798. {
  799. smart_str surl = {0};
  800. smart_str buf = {0};
  801. smart_str url_app = {0};
  802. smart_str_setl(&surl, url, urllen);
  803. smart_str_appends(&url_app, name);
  804. smart_str_appendc(&url_app, '=');
  805. smart_str_appends(&url_app, value);
  806. append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output TSRMLS_CC);
  807. smart_str_0(&buf);
  808. if (newlen) *newlen = buf.len;
  809. smart_str_free(&url_app);
  810. return buf.c;
  811. }
  812. static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, zend_bool do_flush TSRMLS_DC)
  813. {
  814. url_adapt_state_ex_t *ctx;
  815. char *retval;
  816. ctx = &BG(url_adapt_state_ex);
  817. xx_mainloop(ctx, src, srclen TSRMLS_CC);
  818. *newlen = ctx->result.len;
  819. if (!ctx->result.c) {
  820. smart_str_appendl(&ctx->result, "", 0);
  821. }
  822. smart_str_0(&ctx->result);
  823. if (do_flush) {
  824. smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len);
  825. *newlen += ctx->buf.len;
  826. smart_str_free(&ctx->buf);
  827. }
  828. retval = ctx->result.c;
  829. ctx->result.c = NULL;
  830. ctx->result.len = 0;
  831. return retval;
  832. }
  833. static int php_url_scanner_ex_activate(TSRMLS_D)
  834. {
  835. url_adapt_state_ex_t *ctx;
  836. ctx = &BG(url_adapt_state_ex);
  837. memset(ctx, 0, ((size_t) &((url_adapt_state_ex_t *)0)->tags));
  838. return SUCCESS;
  839. }
  840. static int php_url_scanner_ex_deactivate(TSRMLS_D)
  841. {
  842. url_adapt_state_ex_t *ctx;
  843. ctx = &BG(url_adapt_state_ex);
  844. smart_str_free(&ctx->result);
  845. smart_str_free(&ctx->buf);
  846. smart_str_free(&ctx->tag);
  847. smart_str_free(&ctx->arg);
  848. return SUCCESS;
  849. }
  850. static void php_url_scanner_output_handler(char *output, uint output_len, char **handled_output, uint *handled_output_len, int mode TSRMLS_DC)
  851. {
  852. size_t len;
  853. if (BG(url_adapt_state_ex).url_app.len != 0) {
  854. *handled_output = url_adapt_ext(output, output_len, &len, (zend_bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0) TSRMLS_CC);
  855. if (sizeof(uint) < sizeof(size_t)) {
  856. if (len > UINT_MAX)
  857. len = UINT_MAX;
  858. }
  859. *handled_output_len = len;
  860. } else if (BG(url_adapt_state_ex).url_app.len == 0) {
  861. url_adapt_state_ex_t *ctx = &BG(url_adapt_state_ex);
  862. if (ctx->buf.len) {
  863. smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len);
  864. smart_str_appendl(&ctx->result, output, output_len);
  865. *handled_output = ctx->result.c;
  866. *handled_output_len = ctx->buf.len + output_len;
  867. ctx->result.c = NULL;
  868. ctx->result.len = 0;
  869. smart_str_free(&ctx->buf);
  870. } else {
  871. *handled_output = estrndup(output, *handled_output_len = output_len);
  872. }
  873. } else {
  874. *handled_output = NULL;
  875. }
  876. }
  877. PHPAPI int php_url_scanner_add_var(char *name, int name_len, char *value, int value_len, int urlencode TSRMLS_DC)
  878. {
  879. char *encoded = NULL;
  880. int encoded_len;
  881. smart_str val;
  882. if (! BG(url_adapt_state_ex).active) {
  883. php_url_scanner_ex_activate(TSRMLS_C);
  884. php_output_start_internal(ZEND_STRL("URL-Rewriter"), php_url_scanner_output_handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS TSRMLS_CC);
  885. BG(url_adapt_state_ex).active = 1;
  886. }
  887. if (BG(url_adapt_state_ex).url_app.len != 0) {
  888. smart_str_appends(&BG(url_adapt_state_ex).url_app, PG(arg_separator).output);
  889. }
  890. if (urlencode) {
  891. encoded = php_url_encode(value, value_len, &encoded_len);
  892. smart_str_setl(&val, encoded, encoded_len);
  893. } else {
  894. smart_str_setl(&val, value, value_len);
  895. }
  896. smart_str_appendl(&BG(url_adapt_state_ex).url_app, name, name_len);
  897. smart_str_appendc(&BG(url_adapt_state_ex).url_app, '=');
  898. smart_str_append(&BG(url_adapt_state_ex).url_app, &val);
  899. smart_str_appends(&BG(url_adapt_state_ex).form_app, "<input type=\"hidden\" name=\"");
  900. smart_str_appendl(&BG(url_adapt_state_ex).form_app, name, name_len);
  901. smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" value=\"");
  902. smart_str_append(&BG(url_adapt_state_ex).form_app, &val);
  903. smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" />");
  904. if (urlencode)
  905. efree(encoded);
  906. return SUCCESS;
  907. }
  908. PHPAPI int php_url_scanner_reset_vars(TSRMLS_D)
  909. {
  910. BG(url_adapt_state_ex).form_app.len = 0;
  911. BG(url_adapt_state_ex).url_app.len = 0;
  912. return SUCCESS;
  913. }
  914. PHP_MINIT_FUNCTION(url_scanner)
  915. {
  916. BG(url_adapt_state_ex).tags = NULL;
  917. BG(url_adapt_state_ex).form_app.c = BG(url_adapt_state_ex).url_app.c = 0;
  918. BG(url_adapt_state_ex).form_app.len = BG(url_adapt_state_ex).url_app.len = 0;
  919. REGISTER_INI_ENTRIES();
  920. return SUCCESS;
  921. }
  922. PHP_MSHUTDOWN_FUNCTION(url_scanner)
  923. {
  924. UNREGISTER_INI_ENTRIES();
  925. return SUCCESS;
  926. }
  927. PHP_RINIT_FUNCTION(url_scanner)
  928. {
  929. BG(url_adapt_state_ex).active = 0;
  930. return SUCCESS;
  931. }
  932. PHP_RSHUTDOWN_FUNCTION(url_scanner)
  933. {
  934. if (BG(url_adapt_state_ex).active) {
  935. php_url_scanner_ex_deactivate(TSRMLS_C);
  936. BG(url_adapt_state_ex).active = 0;
  937. }
  938. smart_str_free(&BG(url_adapt_state_ex).form_app);
  939. smart_str_free(&BG(url_adapt_state_ex).url_app);
  940. return SUCCESS;
  941. }