url_scanner_ex.re 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012
  1. /*
  2. +----------------------------------------------------------------------+
  3. | Copyright (c) The PHP Group |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | https://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Author: Sascha Schumann <sascha@schumann.cx> |
  14. | Yasuo Ohgaki <yohgaki@ohgaki.net> |
  15. +----------------------------------------------------------------------+
  16. */
  17. #include "php.h"
  18. #ifdef HAVE_UNISTD_H
  19. #include <unistd.h>
  20. #endif
  21. #include <limits.h>
  22. #include <stdio.h>
  23. #include <stdlib.h>
  24. #include <string.h>
  25. #include "SAPI.h"
  26. #include "php_ini.h"
  27. #include "php_globals.h"
  28. #include "php_string.h"
  29. #define STATE_TAG SOME_OTHER_STATE_TAG
  30. #include "basic_functions.h"
  31. #include "url.h"
  32. #include "html.h"
  33. #undef STATE_TAG
  34. #define url_scanner url_scanner_ex
  35. #include "zend_smart_str.h"
  36. static void tag_dtor(zval *zv)
  37. {
  38. free(Z_PTR_P(zv));
  39. }
  40. static int php_ini_on_update_tags(zend_ini_entry *entry, zend_string *new_value, void *mh_arg1, void *mh_arg2, void *mh_arg3, int stage, int type)
  41. {
  42. url_adapt_state_ex_t *ctx;
  43. char *key;
  44. char *tmp;
  45. char *lasts = NULL;
  46. if (type) {
  47. ctx = &BG(url_adapt_session_ex);
  48. } else {
  49. ctx = &BG(url_adapt_output_ex);
  50. }
  51. tmp = estrndup(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
  52. if (ctx->tags)
  53. zend_hash_destroy(ctx->tags);
  54. else {
  55. ctx->tags = malloc(sizeof(HashTable));
  56. if (!ctx->tags) {
  57. efree(tmp);
  58. return FAILURE;
  59. }
  60. }
  61. zend_hash_init(ctx->tags, 0, NULL, tag_dtor, 1);
  62. for (key = php_strtok_r(tmp, ",", &lasts);
  63. key;
  64. key = php_strtok_r(NULL, ",", &lasts)) {
  65. char *val;
  66. val = strchr(key, '=');
  67. if (val) {
  68. char *q;
  69. size_t keylen;
  70. zend_string *str;
  71. *val++ = '\0';
  72. for (q = key; *q; q++) {
  73. *q = tolower(*q);
  74. }
  75. keylen = q - key;
  76. str = zend_string_init(key, keylen, 1);
  77. GC_MAKE_PERSISTENT_LOCAL(str);
  78. zend_hash_add_mem(ctx->tags, str, val, strlen(val)+1);
  79. zend_string_release_ex(str, 1);
  80. }
  81. }
  82. efree(tmp);
  83. return SUCCESS;
  84. }
  85. static PHP_INI_MH(OnUpdateSessionTags)
  86. {
  87. return php_ini_on_update_tags(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, 1);
  88. }
  89. static PHP_INI_MH(OnUpdateOutputTags)
  90. {
  91. return php_ini_on_update_tags(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, 0);
  92. }
  93. static int php_ini_on_update_hosts(zend_ini_entry *entry, zend_string *new_value, void *mh_arg1, void *mh_arg2, void *mh_arg3, int stage, int type)
  94. {
  95. HashTable *hosts;
  96. char *key;
  97. char *tmp;
  98. char *lasts = NULL;
  99. if (type) {
  100. hosts = &BG(url_adapt_session_hosts_ht);
  101. } else {
  102. hosts = &BG(url_adapt_output_hosts_ht);
  103. }
  104. zend_hash_clean(hosts);
  105. /* Use user supplied host whitelist */
  106. tmp = estrndup(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
  107. for (key = php_strtok_r(tmp, ",", &lasts);
  108. key;
  109. key = php_strtok_r(NULL, ",", &lasts)) {
  110. size_t keylen;
  111. zend_string *tmp_key;
  112. char *q;
  113. for (q = key; *q; q++) {
  114. *q = tolower(*q);
  115. }
  116. keylen = q - key;
  117. if (keylen > 0) {
  118. tmp_key = zend_string_init(key, keylen, 0);
  119. zend_hash_add_empty_element(hosts, tmp_key);
  120. zend_string_release_ex(tmp_key, 0);
  121. }
  122. }
  123. efree(tmp);
  124. return SUCCESS;
  125. }
  126. static PHP_INI_MH(OnUpdateSessionHosts)
  127. {
  128. return php_ini_on_update_hosts(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, 1);
  129. }
  130. static PHP_INI_MH(OnUpdateOutputHosts)
  131. {
  132. return php_ini_on_update_hosts(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage, 0);
  133. }
  134. /* FIXME: OnUpdate*Hosts cannot set default to $_SERVER['HTTP_HOST'] at startup */
  135. PHP_INI_BEGIN()
  136. STD_PHP_INI_ENTRY("session.trans_sid_tags", "a=href,area=href,frame=src,form=", PHP_INI_ALL, OnUpdateSessionTags, url_adapt_session_ex, php_basic_globals, basic_globals)
  137. STD_PHP_INI_ENTRY("session.trans_sid_hosts", "", PHP_INI_ALL, OnUpdateSessionHosts, url_adapt_session_hosts_ht, php_basic_globals, basic_globals)
  138. STD_PHP_INI_ENTRY("url_rewriter.tags", "form=", PHP_INI_ALL, OnUpdateOutputTags, url_adapt_session_ex, php_basic_globals, basic_globals)
  139. STD_PHP_INI_ENTRY("url_rewriter.hosts", "", PHP_INI_ALL, OnUpdateOutputHosts, url_adapt_session_hosts_ht, php_basic_globals, basic_globals)
  140. PHP_INI_END()
  141. /*!re2c
  142. any = [\000-\377];
  143. N = (any\[<]);
  144. alpha = [a-zA-Z];
  145. alphanamespace = [a-zA-Z:];
  146. alphadash = ([a-zA-Z] | "-");
  147. */
  148. #define YYFILL(n) goto done
  149. #define YYCTYPE unsigned char
  150. #define YYCURSOR p
  151. #define YYLIMIT q
  152. #define YYMARKER r
  153. static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator)
  154. {
  155. php_url *url_parts;
  156. smart_str_0(url); /* FIXME: Bug #70480 php_url_parse_ex() crashes by processing chars exceed len */
  157. url_parts = php_url_parse_ex(ZSTR_VAL(url->s), ZSTR_LEN(url->s));
  158. /* Ignore malformed URLs */
  159. if (!url_parts) {
  160. smart_str_append_smart_str(dest, url);
  161. return;
  162. }
  163. /* Don't modify URLs of the format "#mark" */
  164. if (url_parts->fragment && '#' == ZSTR_VAL(url->s)[0]) {
  165. smart_str_append_smart_str(dest, url);
  166. php_url_free(url_parts);
  167. return;
  168. }
  169. /* Check protocol. Only http/https is allowed. */
  170. if (url_parts->scheme
  171. && !zend_string_equals_literal_ci(url_parts->scheme, "http")
  172. && !zend_string_equals_literal_ci(url_parts->scheme, "https")) {
  173. smart_str_append_smart_str(dest, url);
  174. php_url_free(url_parts);
  175. return;
  176. }
  177. /* Check host whitelist. If it's not listed, do nothing. */
  178. if (url_parts->host) {
  179. zend_string *tmp = zend_string_tolower(url_parts->host);
  180. if (!zend_hash_exists(&BG(url_adapt_session_hosts_ht), tmp)) {
  181. zend_string_release_ex(tmp, 0);
  182. smart_str_append_smart_str(dest, url);
  183. php_url_free(url_parts);
  184. return;
  185. }
  186. zend_string_release_ex(tmp, 0);
  187. }
  188. /*
  189. * When URL does not have path and query string add "/?".
  190. * i.e. If URL is only "?foo=bar", should not add "/?".
  191. */
  192. if (!url_parts->path && !url_parts->query && !url_parts->fragment) {
  193. /* URL is http://php.net or like */
  194. smart_str_append_smart_str(dest, url);
  195. smart_str_appendc(dest, '/');
  196. smart_str_appendc(dest, '?');
  197. smart_str_append_smart_str(dest, url_app);
  198. php_url_free(url_parts);
  199. return;
  200. }
  201. if (url_parts->scheme) {
  202. smart_str_appends(dest, ZSTR_VAL(url_parts->scheme));
  203. smart_str_appends(dest, "://");
  204. } else if (*(ZSTR_VAL(url->s)) == '/' && *(ZSTR_VAL(url->s)+1) == '/') {
  205. smart_str_appends(dest, "//");
  206. }
  207. if (url_parts->user) {
  208. smart_str_appends(dest, ZSTR_VAL(url_parts->user));
  209. if (url_parts->pass) {
  210. smart_str_appends(dest, ZSTR_VAL(url_parts->pass));
  211. smart_str_appendc(dest, ':');
  212. }
  213. smart_str_appendc(dest, '@');
  214. }
  215. if (url_parts->host) {
  216. smart_str_appends(dest, ZSTR_VAL(url_parts->host));
  217. }
  218. if (url_parts->port) {
  219. smart_str_appendc(dest, ':');
  220. smart_str_append_unsigned(dest, (long)url_parts->port);
  221. }
  222. if (url_parts->path) {
  223. smart_str_appends(dest, ZSTR_VAL(url_parts->path));
  224. }
  225. smart_str_appendc(dest, '?');
  226. if (url_parts->query) {
  227. smart_str_appends(dest, ZSTR_VAL(url_parts->query));
  228. smart_str_appends(dest, separator);
  229. smart_str_append_smart_str(dest, url_app);
  230. } else {
  231. smart_str_append_smart_str(dest, url_app);
  232. }
  233. if (url_parts->fragment) {
  234. smart_str_appendc(dest, '#');
  235. smart_str_appends(dest, ZSTR_VAL(url_parts->fragment));
  236. }
  237. php_url_free(url_parts);
  238. }
  239. enum {
  240. TAG_NORMAL = 0,
  241. TAG_FORM
  242. };
  243. enum {
  244. ATTR_NORMAL = 0,
  245. ATTR_ACTION
  246. };
  247. #undef YYFILL
  248. #undef YYCTYPE
  249. #undef YYCURSOR
  250. #undef YYLIMIT
  251. #undef YYMARKER
  252. static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type)
  253. {
  254. char f = 0;
  255. /* arg.s is string WITHOUT NUL.
  256. To avoid partial match, NUL is added here */
  257. ZSTR_VAL(ctx->arg.s)[ZSTR_LEN(ctx->arg.s)] = '\0';
  258. if (!strcasecmp(ZSTR_VAL(ctx->arg.s), ctx->lookup_data)) {
  259. f = 1;
  260. }
  261. if (quotes) {
  262. smart_str_appendc(&ctx->result, type);
  263. }
  264. if (f) {
  265. append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output);
  266. } else {
  267. smart_str_append_smart_str(&ctx->result, &ctx->val);
  268. }
  269. if (quotes) {
  270. smart_str_appendc(&ctx->result, type);
  271. }
  272. }
  273. enum {
  274. STATE_PLAIN = 0,
  275. STATE_TAG,
  276. STATE_NEXT_ARG,
  277. STATE_ARG,
  278. STATE_BEFORE_VAL,
  279. STATE_VAL
  280. };
  281. #define YYFILL(n) goto stop
  282. #define YYCTYPE unsigned char
  283. #define YYCURSOR xp
  284. #define YYLIMIT end
  285. #define YYMARKER q
  286. #define STATE ctx->state
  287. #define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR
  288. #define STD_ARGS ctx, start, xp
  289. #if SCANNER_DEBUG
  290. #define scdebug(x) printf x
  291. #else
  292. #define scdebug(x)
  293. #endif
  294. static inline void passthru(STD_PARA)
  295. {
  296. scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start));
  297. smart_str_appendl(&ctx->result, start, YYCURSOR - start);
  298. }
  299. static int check_http_host(char *target)
  300. {
  301. zval *host, *tmp;
  302. zend_string *host_tmp;
  303. char *colon;
  304. if ((tmp = zend_hash_find(&EG(symbol_table), ZSTR_KNOWN(ZEND_STR_AUTOGLOBAL_SERVER))) &&
  305. Z_TYPE_P(tmp) == IS_ARRAY &&
  306. (host = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("HTTP_HOST"))) &&
  307. Z_TYPE_P(host) == IS_STRING) {
  308. host_tmp = zend_string_init(Z_STRVAL_P(host), Z_STRLEN_P(host), 0);
  309. /* HTTP_HOST could be 'localhost:8888' etc. */
  310. colon = strchr(ZSTR_VAL(host_tmp), ':');
  311. if (colon) {
  312. ZSTR_LEN(host_tmp) = colon - ZSTR_VAL(host_tmp);
  313. ZSTR_VAL(host_tmp)[ZSTR_LEN(host_tmp)] = '\0';
  314. }
  315. if (!strcasecmp(ZSTR_VAL(host_tmp), target)) {
  316. zend_string_release_ex(host_tmp, 0);
  317. return SUCCESS;
  318. }
  319. zend_string_release_ex(host_tmp, 0);
  320. }
  321. return FAILURE;
  322. }
  323. static int check_host_whitelist(url_adapt_state_ex_t *ctx)
  324. {
  325. php_url *url_parts = NULL;
  326. HashTable *allowed_hosts = ctx->type ? &BG(url_adapt_session_hosts_ht) : &BG(url_adapt_output_hosts_ht);
  327. ZEND_ASSERT(ctx->tag_type == TAG_FORM);
  328. if (ctx->attr_val.s && ZSTR_LEN(ctx->attr_val.s)) {
  329. url_parts = php_url_parse_ex(ZSTR_VAL(ctx->attr_val.s), ZSTR_LEN(ctx->attr_val.s));
  330. } else {
  331. return SUCCESS; /* empty URL is valid */
  332. }
  333. if (!url_parts) {
  334. return FAILURE;
  335. }
  336. if (url_parts->scheme) {
  337. /* Only http/https should be handled.
  338. A bit hacky check this here, but saves a URL parse. */
  339. if (!zend_string_equals_literal_ci(url_parts->scheme, "http") &&
  340. !zend_string_equals_literal_ci(url_parts->scheme, "https")) {
  341. php_url_free(url_parts);
  342. return FAILURE;
  343. }
  344. }
  345. if (!url_parts->host) {
  346. php_url_free(url_parts);
  347. return SUCCESS;
  348. }
  349. if (!zend_hash_num_elements(allowed_hosts) &&
  350. check_http_host(ZSTR_VAL(url_parts->host)) == SUCCESS) {
  351. php_url_free(url_parts);
  352. return SUCCESS;
  353. }
  354. if (!zend_hash_find(allowed_hosts, url_parts->host)) {
  355. php_url_free(url_parts);
  356. return FAILURE;
  357. }
  358. php_url_free(url_parts);
  359. return SUCCESS;
  360. }
  361. /*
  362. * This function appends a hidden input field after a <form>.
  363. */
  364. static void handle_form(STD_PARA)
  365. {
  366. int doit = 0;
  367. if (ZSTR_LEN(ctx->form_app.s) > 0) {
  368. switch (ZSTR_LEN(ctx->tag.s)) {
  369. case sizeof("form") - 1:
  370. if (!strncasecmp(ZSTR_VAL(ctx->tag.s), "form", ZSTR_LEN(ctx->tag.s))
  371. && check_host_whitelist(ctx) == SUCCESS) {
  372. doit = 1;
  373. }
  374. break;
  375. }
  376. }
  377. if (doit) {
  378. smart_str_append_smart_str(&ctx->result, &ctx->form_app);
  379. }
  380. }
  381. /*
  382. * HANDLE_TAG copies the HTML Tag and checks whether we
  383. * have that tag in our table. If we might modify it,
  384. * we continue to scan the tag, otherwise we simply copy the complete
  385. * HTML stuff to the result buffer.
  386. */
  387. static inline void handle_tag(STD_PARA)
  388. {
  389. int ok = 0;
  390. unsigned int i;
  391. if (ctx->tag.s) {
  392. ZSTR_LEN(ctx->tag.s) = 0;
  393. }
  394. smart_str_appendl(&ctx->tag, start, YYCURSOR - start);
  395. for (i = 0; i < ZSTR_LEN(ctx->tag.s); i++)
  396. ZSTR_VAL(ctx->tag.s)[i] = tolower((int)(unsigned char)ZSTR_VAL(ctx->tag.s)[i]);
  397. /* intentionally using str_find here, in case the hash value is set, but the string val is changed later */
  398. if ((ctx->lookup_data = zend_hash_str_find_ptr(ctx->tags, ZSTR_VAL(ctx->tag.s), ZSTR_LEN(ctx->tag.s))) != NULL) {
  399. ok = 1;
  400. if (ZSTR_LEN(ctx->tag.s) == sizeof("form")-1
  401. && !strncasecmp(ZSTR_VAL(ctx->tag.s), "form", ZSTR_LEN(ctx->tag.s))) {
  402. ctx->tag_type = TAG_FORM;
  403. } else {
  404. ctx->tag_type = TAG_NORMAL;
  405. }
  406. }
  407. STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN;
  408. }
  409. static inline void handle_arg(STD_PARA)
  410. {
  411. if (ctx->arg.s) {
  412. ZSTR_LEN(ctx->arg.s) = 0;
  413. }
  414. smart_str_appendl(&ctx->arg, start, YYCURSOR - start);
  415. if (ctx->tag_type == TAG_FORM &&
  416. strncasecmp(ZSTR_VAL(ctx->arg.s), "action", ZSTR_LEN(ctx->arg.s)) == 0) {
  417. ctx->attr_type = ATTR_ACTION;
  418. } else {
  419. ctx->attr_type = ATTR_NORMAL;
  420. }
  421. }
  422. static inline void handle_val(STD_PARA, char quotes, char type)
  423. {
  424. smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2);
  425. if (ctx->tag_type == TAG_FORM && ctx->attr_type == ATTR_ACTION) {
  426. smart_str_setl(&ctx->attr_val, start + quotes, YYCURSOR - start - quotes * 2);
  427. }
  428. tag_arg(ctx, quotes, type);
  429. }
  430. static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen)
  431. {
  432. char *end, *q;
  433. char *xp;
  434. char *start;
  435. size_t rest;
  436. smart_str_appendl(&ctx->buf, newdata, newlen);
  437. YYCURSOR = ZSTR_VAL(ctx->buf.s);
  438. YYLIMIT = ZSTR_VAL(ctx->buf.s) + ZSTR_LEN(ctx->buf.s);
  439. switch (STATE) {
  440. case STATE_PLAIN: goto state_plain;
  441. case STATE_TAG: goto state_tag;
  442. case STATE_NEXT_ARG: goto state_next_arg;
  443. case STATE_ARG: goto state_arg;
  444. case STATE_BEFORE_VAL: goto state_before_val;
  445. case STATE_VAL: goto state_val;
  446. }
  447. state_plain_begin:
  448. STATE = STATE_PLAIN;
  449. state_plain:
  450. start = YYCURSOR;
  451. /*!re2c
  452. "<" { passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
  453. N+ { passthru(STD_ARGS); goto state_plain; }
  454. */
  455. state_tag:
  456. start = YYCURSOR;
  457. /*!re2c
  458. alphanamespace+ { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
  459. any { passthru(STD_ARGS); goto state_plain_begin; }
  460. */
  461. state_next_arg_begin:
  462. STATE = STATE_NEXT_ARG;
  463. state_next_arg:
  464. start = YYCURSOR;
  465. /*!re2c
  466. [/]? [>] { passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
  467. [ \v\r\t\n]+ { passthru(STD_ARGS); goto state_next_arg; }
  468. alpha { --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
  469. any { passthru(STD_ARGS); goto state_plain_begin; }
  470. */
  471. state_arg:
  472. start = YYCURSOR;
  473. /*!re2c
  474. alpha alphadash* { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
  475. any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
  476. */
  477. state_before_val:
  478. start = YYCURSOR;
  479. /*!re2c
  480. [ ]* "=" [ ]* { passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
  481. any { --YYCURSOR; goto state_next_arg_begin; }
  482. */
  483. state_val:
  484. start = YYCURSOR;
  485. /*!re2c
  486. ["] (any\[">])* ["] { handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
  487. ['] (any\['>])* ['] { handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
  488. (any\[ \r\t\n>'"])+ { handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; }
  489. any { passthru(STD_ARGS); goto state_next_arg_begin; }
  490. */
  491. stop:
  492. if (YYLIMIT < start) {
  493. /* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
  494. rest = 0;
  495. } else {
  496. rest = YYLIMIT - start;
  497. scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
  498. }
  499. if (rest) memmove(ZSTR_VAL(ctx->buf.s), start, rest);
  500. ZSTR_LEN(ctx->buf.s) = rest;
  501. }
  502. PHPAPI char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen, int encode)
  503. {
  504. char *result;
  505. smart_str surl = {0};
  506. smart_str buf = {0};
  507. smart_str url_app = {0};
  508. zend_string *encoded;
  509. smart_str_appendl(&surl, url, urllen);
  510. if (encode) {
  511. encoded = php_raw_url_encode(name, strlen(name));
  512. smart_str_appendl(&url_app, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
  513. zend_string_free(encoded);
  514. } else {
  515. smart_str_appends(&url_app, name);
  516. }
  517. smart_str_appendc(&url_app, '=');
  518. if (encode) {
  519. encoded = php_raw_url_encode(value, strlen(value));
  520. smart_str_appendl(&url_app, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
  521. zend_string_free(encoded);
  522. } else {
  523. smart_str_appends(&url_app, value);
  524. }
  525. append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output);
  526. smart_str_0(&buf);
  527. if (newlen) *newlen = ZSTR_LEN(buf.s);
  528. result = estrndup(ZSTR_VAL(buf.s), ZSTR_LEN(buf.s));
  529. smart_str_free(&url_app);
  530. smart_str_free(&buf);
  531. return result;
  532. }
  533. static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, bool do_flush, url_adapt_state_ex_t *ctx)
  534. {
  535. char *retval;
  536. xx_mainloop(ctx, src, srclen);
  537. if (!ctx->result.s) {
  538. smart_str_appendl(&ctx->result, "", 0);
  539. *newlen = 0;
  540. } else {
  541. *newlen = ZSTR_LEN(ctx->result.s);
  542. }
  543. smart_str_0(&ctx->result);
  544. if (do_flush) {
  545. smart_str_append(&ctx->result, ctx->buf.s);
  546. *newlen += ZSTR_LEN(ctx->buf.s);
  547. smart_str_free(&ctx->buf);
  548. smart_str_free(&ctx->val);
  549. smart_str_free(&ctx->attr_val);
  550. }
  551. retval = estrndup(ZSTR_VAL(ctx->result.s), ZSTR_LEN(ctx->result.s));
  552. smart_str_free(&ctx->result);
  553. return retval;
  554. }
  555. static int php_url_scanner_ex_activate(int type)
  556. {
  557. url_adapt_state_ex_t *ctx;
  558. if (type) {
  559. ctx = &BG(url_adapt_session_ex);
  560. } else {
  561. ctx = &BG(url_adapt_output_ex);
  562. }
  563. memset(ctx, 0, XtOffsetOf(url_adapt_state_ex_t, tags));
  564. return SUCCESS;
  565. }
  566. static int php_url_scanner_ex_deactivate(int type)
  567. {
  568. url_adapt_state_ex_t *ctx;
  569. if (type) {
  570. ctx = &BG(url_adapt_session_ex);
  571. } else {
  572. ctx = &BG(url_adapt_output_ex);
  573. }
  574. smart_str_free(&ctx->result);
  575. smart_str_free(&ctx->buf);
  576. smart_str_free(&ctx->tag);
  577. smart_str_free(&ctx->arg);
  578. smart_str_free(&ctx->attr_val);
  579. return SUCCESS;
  580. }
  581. static inline void php_url_scanner_session_handler_impl(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode, int type)
  582. {
  583. size_t len;
  584. url_adapt_state_ex_t *url_state;
  585. if (type) {
  586. url_state = &BG(url_adapt_session_ex);
  587. } else {
  588. url_state = &BG(url_adapt_output_ex);
  589. }
  590. if (ZSTR_LEN(url_state->url_app.s) != 0) {
  591. *handled_output = url_adapt_ext(output, output_len, &len, (bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0), url_state);
  592. if (sizeof(unsigned int) < sizeof(size_t)) {
  593. if (len > UINT_MAX)
  594. len = UINT_MAX;
  595. }
  596. *handled_output_len = len;
  597. } else if (ZSTR_LEN(url_state->url_app.s) == 0) {
  598. url_adapt_state_ex_t *ctx = url_state;
  599. if (ctx->buf.s && ZSTR_LEN(ctx->buf.s)) {
  600. smart_str_append(&ctx->result, ctx->buf.s);
  601. smart_str_appendl(&ctx->result, output, output_len);
  602. *handled_output = estrndup(ZSTR_VAL(ctx->result.s), ZSTR_LEN(ctx->result.s));
  603. *handled_output_len = ZSTR_LEN(ctx->buf.s) + output_len;
  604. smart_str_free(&ctx->buf);
  605. smart_str_free(&ctx->result);
  606. } else {
  607. *handled_output = estrndup(output, *handled_output_len = output_len);
  608. }
  609. } else {
  610. *handled_output = NULL;
  611. }
  612. }
  613. static void php_url_scanner_session_handler(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode)
  614. {
  615. php_url_scanner_session_handler_impl(output, output_len, handled_output, handled_output_len, mode, 1);
  616. }
  617. static void php_url_scanner_output_handler(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode)
  618. {
  619. php_url_scanner_session_handler_impl(output, output_len, handled_output, handled_output_len, mode, 0);
  620. }
  621. static inline int php_url_scanner_add_var_impl(const char *name, size_t name_len, const char *value, size_t value_len, int encode, int type)
  622. {
  623. smart_str sname = {0};
  624. smart_str svalue = {0};
  625. smart_str hname = {0};
  626. smart_str hvalue = {0};
  627. zend_string *encoded;
  628. url_adapt_state_ex_t *url_state;
  629. php_output_handler_func_t handler;
  630. if (type) {
  631. url_state = &BG(url_adapt_session_ex);
  632. handler = php_url_scanner_session_handler;
  633. } else {
  634. url_state = &BG(url_adapt_output_ex);
  635. handler = php_url_scanner_output_handler;
  636. }
  637. if (!url_state->active) {
  638. php_url_scanner_ex_activate(type);
  639. php_output_start_internal(ZEND_STRL("URL-Rewriter"), handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS);
  640. url_state->active = 1;
  641. }
  642. if (url_state->url_app.s && ZSTR_LEN(url_state->url_app.s) != 0) {
  643. smart_str_appends(&url_state->url_app, PG(arg_separator).output);
  644. }
  645. if (encode) {
  646. encoded = php_raw_url_encode(name, name_len);
  647. smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
  648. encoded = php_raw_url_encode(value, value_len);
  649. smart_str_appendl(&svalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
  650. encoded = php_escape_html_entities_ex((const unsigned char *) name, name_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, NULL, /* double_encode */ 0, /* quiet */ 1);
  651. smart_str_appendl(&hname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
  652. encoded = php_escape_html_entities_ex((const unsigned char *) value, value_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, NULL, /* double_encode */ 0, /* quiet */ 1);
  653. smart_str_appendl(&hvalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
  654. } else {
  655. smart_str_appendl(&sname, name, name_len);
  656. smart_str_appendl(&svalue, value, value_len);
  657. smart_str_appendl(&hname, name, name_len);
  658. smart_str_appendl(&hvalue, value, value_len);
  659. }
  660. smart_str_append_smart_str(&url_state->url_app, &sname);
  661. smart_str_appendc(&url_state->url_app, '=');
  662. smart_str_append_smart_str(&url_state->url_app, &svalue);
  663. smart_str_appends(&url_state->form_app, "<input type=\"hidden\" name=\"");
  664. smart_str_append_smart_str(&url_state->form_app, &hname);
  665. smart_str_appends(&url_state->form_app, "\" value=\"");
  666. smart_str_append_smart_str(&url_state->form_app, &hvalue);
  667. smart_str_appends(&url_state->form_app, "\" />");
  668. smart_str_free(&sname);
  669. smart_str_free(&svalue);
  670. smart_str_free(&hname);
  671. smart_str_free(&hvalue);
  672. return SUCCESS;
  673. }
  674. PHPAPI int php_url_scanner_add_session_var(const char *name, size_t name_len, const char *value, size_t value_len, int encode)
  675. {
  676. return php_url_scanner_add_var_impl(name, name_len, value, value_len, encode, 1);
  677. }
  678. PHPAPI int php_url_scanner_add_var(const char *name, size_t name_len, const char *value, size_t value_len, int encode)
  679. {
  680. return php_url_scanner_add_var_impl(name, name_len, value, value_len, encode, 0);
  681. }
  682. static inline void php_url_scanner_reset_vars_impl(int type) {
  683. url_adapt_state_ex_t *url_state;
  684. if (type) {
  685. url_state = &BG(url_adapt_session_ex);
  686. } else {
  687. url_state = &BG(url_adapt_output_ex);
  688. }
  689. if (url_state->form_app.s) {
  690. ZSTR_LEN(url_state->form_app.s) = 0;
  691. }
  692. if (url_state->url_app.s) {
  693. ZSTR_LEN(url_state->url_app.s) = 0;
  694. }
  695. }
  696. PHPAPI int php_url_scanner_reset_session_vars(void)
  697. {
  698. php_url_scanner_reset_vars_impl(1);
  699. return SUCCESS;
  700. }
  701. PHPAPI int php_url_scanner_reset_vars(void)
  702. {
  703. php_url_scanner_reset_vars_impl(0);
  704. return SUCCESS;
  705. }
  706. static inline int php_url_scanner_reset_var_impl(zend_string *name, int encode, int type)
  707. {
  708. char *start, *end, *limit;
  709. size_t separator_len;
  710. smart_str sname = {0};
  711. smart_str hname = {0};
  712. smart_str url_app = {0};
  713. smart_str form_app = {0};
  714. zend_string *encoded;
  715. int ret = SUCCESS;
  716. bool sep_removed = 0;
  717. url_adapt_state_ex_t *url_state;
  718. if (type) {
  719. url_state = &BG(url_adapt_session_ex);
  720. } else {
  721. url_state = &BG(url_adapt_output_ex);
  722. }
  723. /* Short circuit check. Only check url_app. */
  724. if (!url_state->url_app.s || !ZSTR_LEN(url_state->url_app.s)) {
  725. return SUCCESS;
  726. }
  727. if (encode) {
  728. encoded = php_raw_url_encode(ZSTR_VAL(name), ZSTR_LEN(name));
  729. smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
  730. zend_string_free(encoded);
  731. encoded = php_escape_html_entities_ex((const unsigned char *) ZSTR_VAL(name), ZSTR_LEN(name), 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), /* double_encode */ 0, /* quiet */ 1);
  732. smart_str_appendl(&hname, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
  733. zend_string_free(encoded);
  734. } else {
  735. smart_str_appendl(&sname, ZSTR_VAL(name), ZSTR_LEN(name));
  736. smart_str_appendl(&hname, ZSTR_VAL(name), ZSTR_LEN(name));
  737. }
  738. smart_str_0(&sname);
  739. smart_str_0(&hname);
  740. smart_str_append_smart_str(&url_app, &sname);
  741. smart_str_appendc(&url_app, '=');
  742. smart_str_0(&url_app);
  743. smart_str_appends(&form_app, "<input type=\"hidden\" name=\"");
  744. smart_str_append_smart_str(&form_app, &hname);
  745. smart_str_appends(&form_app, "\" value=\"");
  746. smart_str_0(&form_app);
  747. /* Short circuit check. Only check url_app. */
  748. start = (char *) php_memnstr(ZSTR_VAL(url_state->url_app.s),
  749. ZSTR_VAL(url_app.s), ZSTR_LEN(url_app.s),
  750. ZSTR_VAL(url_state->url_app.s) + ZSTR_LEN(url_state->url_app.s));
  751. if (!start) {
  752. ret = FAILURE;
  753. goto finish;
  754. }
  755. /* Get end of url var */
  756. limit = ZSTR_VAL(url_state->url_app.s) + ZSTR_LEN(url_state->url_app.s);
  757. end = start + ZSTR_LEN(url_app.s);
  758. separator_len = strlen(PG(arg_separator).output);
  759. while (end < limit) {
  760. if (!memcmp(end, PG(arg_separator).output, separator_len)) {
  761. end += separator_len;
  762. sep_removed = 1;
  763. break;
  764. }
  765. end++;
  766. }
  767. /* Remove all when this is the only rewrite var */
  768. if (ZSTR_LEN(url_state->url_app.s) == end - start) {
  769. php_url_scanner_reset_vars_impl(type);
  770. goto finish;
  771. }
  772. /* Check preceding separator */
  773. if (!sep_removed
  774. && (size_t)(start - PG(arg_separator).output) >= separator_len
  775. && !memcmp(start - separator_len, PG(arg_separator).output, separator_len)) {
  776. start -= separator_len;
  777. }
  778. /* Remove partially */
  779. memmove(start, end,
  780. ZSTR_LEN(url_state->url_app.s) - (end - ZSTR_VAL(url_state->url_app.s)));
  781. ZSTR_LEN(url_state->url_app.s) -= end - start;
  782. ZSTR_VAL(url_state->url_app.s)[ZSTR_LEN(url_state->url_app.s)] = '\0';
  783. /* Remove form var */
  784. start = (char *) php_memnstr(ZSTR_VAL(url_state->form_app.s),
  785. ZSTR_VAL(form_app.s), ZSTR_LEN(form_app.s),
  786. ZSTR_VAL(url_state->form_app.s) + ZSTR_LEN(url_state->form_app.s));
  787. if (!start) {
  788. /* Should not happen */
  789. ret = FAILURE;
  790. php_url_scanner_reset_vars_impl(type);
  791. goto finish;
  792. }
  793. /* Get end of form var */
  794. limit = ZSTR_VAL(url_state->form_app.s) + ZSTR_LEN(url_state->form_app.s);
  795. end = start + ZSTR_LEN(form_app.s);
  796. while (end < limit) {
  797. if (*end == '>') {
  798. end += 1;
  799. break;
  800. }
  801. end++;
  802. }
  803. /* Remove partially */
  804. memmove(start, end,
  805. ZSTR_LEN(url_state->form_app.s) - (end - ZSTR_VAL(url_state->form_app.s)));
  806. ZSTR_LEN(url_state->form_app.s) -= end - start;
  807. ZSTR_VAL(url_state->form_app.s)[ZSTR_LEN(url_state->form_app.s)] = '\0';
  808. finish:
  809. smart_str_free(&url_app);
  810. smart_str_free(&form_app);
  811. smart_str_free(&sname);
  812. smart_str_free(&hname);
  813. return ret;
  814. }
  815. PHPAPI int php_url_scanner_reset_session_var(zend_string *name, int encode)
  816. {
  817. return php_url_scanner_reset_var_impl(name, encode, 1);
  818. }
  819. PHPAPI int php_url_scanner_reset_var(zend_string *name, int encode)
  820. {
  821. return php_url_scanner_reset_var_impl(name, encode, 0);
  822. }
  823. PHP_MINIT_FUNCTION(url_scanner)
  824. {
  825. REGISTER_INI_ENTRIES();
  826. return SUCCESS;
  827. }
  828. PHP_MSHUTDOWN_FUNCTION(url_scanner)
  829. {
  830. UNREGISTER_INI_ENTRIES();
  831. return SUCCESS;
  832. }
  833. PHP_RINIT_FUNCTION(url_scanner)
  834. {
  835. BG(url_adapt_session_ex).active = 0;
  836. BG(url_adapt_session_ex).tag_type = 0;
  837. BG(url_adapt_session_ex).attr_type = 0;
  838. BG(url_adapt_output_ex).active = 0;
  839. BG(url_adapt_output_ex).tag_type = 0;
  840. BG(url_adapt_output_ex).attr_type = 0;
  841. return SUCCESS;
  842. }
  843. PHP_RSHUTDOWN_FUNCTION(url_scanner)
  844. {
  845. if (BG(url_adapt_session_ex).active) {
  846. php_url_scanner_ex_deactivate(1);
  847. BG(url_adapt_session_ex).active = 0;
  848. BG(url_adapt_session_ex).tag_type = 0;
  849. BG(url_adapt_session_ex).attr_type = 0;
  850. }
  851. smart_str_free(&BG(url_adapt_session_ex).form_app);
  852. smart_str_free(&BG(url_adapt_session_ex).url_app);
  853. if (BG(url_adapt_output_ex).active) {
  854. php_url_scanner_ex_deactivate(0);
  855. BG(url_adapt_output_ex).active = 0;
  856. BG(url_adapt_output_ex).tag_type = 0;
  857. BG(url_adapt_output_ex).attr_type = 0;
  858. }
  859. smart_str_free(&BG(url_adapt_output_ex).form_app);
  860. smart_str_free(&BG(url_adapt_output_ex).url_app);
  861. return SUCCESS;
  862. }