url_scanner_ex.re 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2016 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Sascha Schumann <sascha@schumann.cx> |
  16. +----------------------------------------------------------------------+
  17. */
  18. /* $Id$ */
  19. #include "php.h"
  20. #ifdef HAVE_UNISTD_H
  21. #include <unistd.h>
  22. #endif
  23. #ifdef HAVE_LIMITS_H
  24. #include <limits.h>
  25. #endif
  26. #include <stdio.h>
  27. #include <stdlib.h>
  28. #include <string.h>
  29. #include "php_ini.h"
  30. #include "php_globals.h"
  31. #define STATE_TAG SOME_OTHER_STATE_TAG
  32. #include "basic_functions.h"
  33. #include "url.h"
  34. #undef STATE_TAG
  35. #define url_scanner url_scanner_ex
  36. #include "php_smart_str.h"
  37. static PHP_INI_MH(OnUpdateTags)
  38. {
  39. url_adapt_state_ex_t *ctx;
  40. char *key;
  41. char *lasts;
  42. char *tmp;
  43. ctx = &BG(url_adapt_state_ex);
  44. tmp = estrndup(new_value, new_value_length);
  45. if (ctx->tags)
  46. zend_hash_destroy(ctx->tags);
  47. else {
  48. ctx->tags = malloc(sizeof(HashTable));
  49. if (!ctx->tags) {
  50. return FAILURE;
  51. }
  52. }
  53. zend_hash_init(ctx->tags, 0, NULL, NULL, 1);
  54. for (key = php_strtok_r(tmp, ",", &lasts);
  55. key;
  56. key = php_strtok_r(NULL, ",", &lasts)) {
  57. char *val;
  58. val = strchr(key, '=');
  59. if (val) {
  60. char *q;
  61. int keylen;
  62. *val++ = '\0';
  63. for (q = key; *q; q++)
  64. *q = tolower(*q);
  65. keylen = q - key;
  66. /* key is stored withOUT NUL
  67. val is stored WITH NUL */
  68. zend_hash_add(ctx->tags, key, keylen, val, strlen(val)+1, NULL);
  69. }
  70. }
  71. efree(tmp);
  72. return SUCCESS;
  73. }
  74. PHP_INI_BEGIN()
  75. STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=,fieldset=", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals)
  76. PHP_INI_END()
  77. /*!re2c
  78. any = [\000-\377];
  79. N = (any\[<]);
  80. alpha = [a-zA-Z];
  81. alphanamespace = [a-zA-Z:];
  82. alphadash = ([a-zA-Z] | "-");
  83. */
  84. #define YYFILL(n) goto done
  85. #define YYCTYPE unsigned char
  86. #define YYCURSOR p
  87. #define YYLIMIT q
  88. #define YYMARKER r
  89. static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator TSRMLS_DC)
  90. {
  91. register const char *p, *q;
  92. const char *bash = NULL;
  93. const char *sep = "?";
  94. /*
  95. * Don't modify "//example.com" full path, unless
  96. * HTTP_HOST matches.
  97. */
  98. if (url->c[0] == '/' && url->c[1] == '/') {
  99. zval **tmp, **http_host;
  100. size_t target_len, host_len;
  101. if (zend_hash_find(&EG(symbol_table), "_SERVER", sizeof("_SERVER"), (void **)&tmp) == FAILURE
  102. || Z_TYPE_PP(tmp) != IS_ARRAY
  103. || zend_hash_find(Z_ARRVAL_PP(tmp), "HTTP_HOST", sizeof("HTTP_HOST"), (void **)&http_host) == FAILURE
  104. || Z_TYPE_PP(http_host) != IS_STRING) {
  105. smart_str_append(dest, url);
  106. return;
  107. }
  108. /* HTTP_HOST could be "example.com:8888", etc. */
  109. /* Need to find end of URL in buffer */
  110. host_len = strcspn(Z_STRVAL_PP(http_host), ":");
  111. target_len = strcspn(url->c+2, "/\"'?>\r\n");
  112. if (host_len
  113. && host_len == target_len
  114. && strncasecmp(Z_STRVAL_PP(http_host), url->c+2, host_len)) {
  115. smart_str_append(dest, url);
  116. return;
  117. }
  118. }
  119. q = (p = url->c) + url->len;
  120. scan:
  121. /*!re2c
  122. ":" { smart_str_append(dest, url); return; }
  123. "?" { sep = separator; goto scan; }
  124. "#" { bash = p - 1; goto done; }
  125. (any\[:?#])+ { goto scan; }
  126. */
  127. done:
  128. /* Don't modify URLs of the format "#mark" */
  129. if (bash && bash - url->c == 0) {
  130. smart_str_append(dest, url);
  131. return;
  132. }
  133. if (bash)
  134. smart_str_appendl(dest, url->c, bash - url->c);
  135. else
  136. smart_str_append(dest, url);
  137. smart_str_appends(dest, sep);
  138. smart_str_append(dest, url_app);
  139. if (bash)
  140. smart_str_appendl(dest, bash, q - bash);
  141. }
  142. #undef YYFILL
  143. #undef YYCTYPE
  144. #undef YYCURSOR
  145. #undef YYLIMIT
  146. #undef YYMARKER
  147. static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type TSRMLS_DC)
  148. {
  149. char f = 0;
  150. if (strncasecmp(ctx->arg.c, ctx->lookup_data, ctx->arg.len) == 0)
  151. f = 1;
  152. if (quotes)
  153. smart_str_appendc(&ctx->result, type);
  154. if (f) {
  155. append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output TSRMLS_CC);
  156. } else {
  157. smart_str_append(&ctx->result, &ctx->val);
  158. }
  159. if (quotes)
  160. smart_str_appendc(&ctx->result, type);
  161. }
  162. enum {
  163. STATE_PLAIN = 0,
  164. STATE_TAG,
  165. STATE_NEXT_ARG,
  166. STATE_ARG,
  167. STATE_BEFORE_VAL,
  168. STATE_VAL
  169. };
  170. #define YYFILL(n) goto stop
  171. #define YYCTYPE unsigned char
  172. #define YYCURSOR xp
  173. #define YYLIMIT end
  174. #define YYMARKER q
  175. #define STATE ctx->state
  176. #define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR TSRMLS_DC
  177. #define STD_ARGS ctx, start, xp TSRMLS_CC
  178. #if SCANNER_DEBUG
  179. #define scdebug(x) printf x
  180. #else
  181. #define scdebug(x)
  182. #endif
  183. static inline void passthru(STD_PARA)
  184. {
  185. scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start));
  186. smart_str_appendl(&ctx->result, start, YYCURSOR - start);
  187. }
  188. /*
  189. * This function appends a hidden input field after a <form> or
  190. * <fieldset>. The latter is important for XHTML.
  191. */
  192. static void handle_form(STD_PARA)
  193. {
  194. int doit = 0;
  195. if (ctx->form_app.len > 0) {
  196. switch (ctx->tag.len) {
  197. case sizeof("form") - 1:
  198. if (!strncasecmp(ctx->tag.c, "form", sizeof("form") - 1)) {
  199. doit = 1;
  200. }
  201. if (doit && ctx->val.c && ctx->lookup_data && *ctx->lookup_data) {
  202. char *e, *p = zend_memnstr(ctx->val.c, "://", sizeof("://") - 1, ctx->val.c + ctx->val.len);
  203. if (p) {
  204. e = memchr(p, '/', (ctx->val.c + ctx->val.len) - p);
  205. if (!e) {
  206. e = ctx->val.c + ctx->val.len;
  207. }
  208. if ((e - p) && strncasecmp(p, ctx->lookup_data, (e - p))) {
  209. doit = 0;
  210. }
  211. }
  212. }
  213. break;
  214. case sizeof("fieldset") - 1:
  215. if (!strncasecmp(ctx->tag.c, "fieldset", sizeof("fieldset") - 1)) {
  216. doit = 1;
  217. }
  218. break;
  219. }
  220. if (doit)
  221. smart_str_append(&ctx->result, &ctx->form_app);
  222. }
  223. }
  224. /*
  225. * HANDLE_TAG copies the HTML Tag and checks whether we
  226. * have that tag in our table. If we might modify it,
  227. * we continue to scan the tag, otherwise we simply copy the complete
  228. * HTML stuff to the result buffer.
  229. */
  230. static inline void handle_tag(STD_PARA)
  231. {
  232. int ok = 0;
  233. unsigned int i;
  234. ctx->tag.len = 0;
  235. smart_str_appendl(&ctx->tag, start, YYCURSOR - start);
  236. for (i = 0; i < ctx->tag.len; i++)
  237. ctx->tag.c[i] = tolower((int)(unsigned char)ctx->tag.c[i]);
  238. if (zend_hash_find(ctx->tags, ctx->tag.c, ctx->tag.len, (void **) &ctx->lookup_data) == SUCCESS)
  239. ok = 1;
  240. STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN;
  241. }
  242. static inline void handle_arg(STD_PARA)
  243. {
  244. ctx->arg.len = 0;
  245. smart_str_appendl(&ctx->arg, start, YYCURSOR - start);
  246. }
  247. static inline void handle_val(STD_PARA, char quotes, char type)
  248. {
  249. smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2);
  250. tag_arg(ctx, quotes, type TSRMLS_CC);
  251. }
  252. static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen TSRMLS_DC)
  253. {
  254. char *end, *q;
  255. char *xp;
  256. char *start;
  257. int rest;
  258. smart_str_appendl(&ctx->buf, newdata, newlen);
  259. YYCURSOR = ctx->buf.c;
  260. YYLIMIT = ctx->buf.c + ctx->buf.len;
  261. switch (STATE) {
  262. case STATE_PLAIN: goto state_plain;
  263. case STATE_TAG: goto state_tag;
  264. case STATE_NEXT_ARG: goto state_next_arg;
  265. case STATE_ARG: goto state_arg;
  266. case STATE_BEFORE_VAL: goto state_before_val;
  267. case STATE_VAL: goto state_val;
  268. }
  269. state_plain_begin:
  270. STATE = STATE_PLAIN;
  271. state_plain:
  272. start = YYCURSOR;
  273. /*!re2c
  274. "<" { passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
  275. N+ { passthru(STD_ARGS); goto state_plain; }
  276. */
  277. state_tag:
  278. start = YYCURSOR;
  279. /*!re2c
  280. alphanamespace+ { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
  281. any { passthru(STD_ARGS); goto state_plain_begin; }
  282. */
  283. state_next_arg_begin:
  284. STATE = STATE_NEXT_ARG;
  285. state_next_arg:
  286. start = YYCURSOR;
  287. /*!re2c
  288. [/]? [>] { passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
  289. [ \v\r\t\n]+ { passthru(STD_ARGS); goto state_next_arg; }
  290. alpha { --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
  291. any { passthru(STD_ARGS); goto state_plain_begin; }
  292. */
  293. state_arg:
  294. start = YYCURSOR;
  295. /*!re2c
  296. alpha alphadash* { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
  297. any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
  298. */
  299. state_before_val:
  300. start = YYCURSOR;
  301. /*!re2c
  302. [ ]* "=" [ ]* { passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
  303. any { --YYCURSOR; goto state_next_arg_begin; }
  304. */
  305. state_val:
  306. start = YYCURSOR;
  307. /*!re2c
  308. ["] (any\[">])* ["] { handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
  309. ['] (any\['>])* ['] { handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
  310. (any\[ \r\t\n>'"])+ { handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; }
  311. any { passthru(STD_ARGS); goto state_next_arg_begin; }
  312. */
  313. stop:
  314. rest = YYLIMIT - start;
  315. scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
  316. /* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
  317. if (rest < 0) rest = 0;
  318. if (rest) memmove(ctx->buf.c, start, rest);
  319. ctx->buf.len = rest;
  320. }
  321. char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen TSRMLS_DC)
  322. {
  323. smart_str surl = {0};
  324. smart_str buf = {0};
  325. smart_str url_app = {0};
  326. smart_str_setl(&surl, url, urllen);
  327. smart_str_appends(&url_app, name);
  328. smart_str_appendc(&url_app, '=');
  329. smart_str_appends(&url_app, value);
  330. append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output TSRMLS_CC);
  331. smart_str_0(&buf);
  332. if (newlen) *newlen = buf.len;
  333. smart_str_free(&url_app);
  334. return buf.c;
  335. }
  336. static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, zend_bool do_flush TSRMLS_DC)
  337. {
  338. url_adapt_state_ex_t *ctx;
  339. char *retval;
  340. ctx = &BG(url_adapt_state_ex);
  341. xx_mainloop(ctx, src, srclen TSRMLS_CC);
  342. *newlen = ctx->result.len;
  343. if (!ctx->result.c) {
  344. smart_str_appendl(&ctx->result, "", 0);
  345. }
  346. smart_str_0(&ctx->result);
  347. if (do_flush) {
  348. smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len);
  349. *newlen += ctx->buf.len;
  350. smart_str_free(&ctx->buf);
  351. }
  352. retval = ctx->result.c;
  353. ctx->result.c = NULL;
  354. ctx->result.len = 0;
  355. return retval;
  356. }
  357. static int php_url_scanner_ex_activate(TSRMLS_D)
  358. {
  359. url_adapt_state_ex_t *ctx;
  360. ctx = &BG(url_adapt_state_ex);
  361. memset(ctx, 0, ((size_t) &((url_adapt_state_ex_t *)0)->tags));
  362. return SUCCESS;
  363. }
  364. static int php_url_scanner_ex_deactivate(TSRMLS_D)
  365. {
  366. url_adapt_state_ex_t *ctx;
  367. ctx = &BG(url_adapt_state_ex);
  368. smart_str_free(&ctx->result);
  369. smart_str_free(&ctx->buf);
  370. smart_str_free(&ctx->tag);
  371. smart_str_free(&ctx->arg);
  372. return SUCCESS;
  373. }
  374. static void php_url_scanner_output_handler(char *output, uint output_len, char **handled_output, uint *handled_output_len, int mode TSRMLS_DC)
  375. {
  376. size_t len;
  377. if (BG(url_adapt_state_ex).url_app.len != 0) {
  378. *handled_output = url_adapt_ext(output, output_len, &len, (zend_bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0) TSRMLS_CC);
  379. if (sizeof(uint) < sizeof(size_t)) {
  380. if (len > UINT_MAX)
  381. len = UINT_MAX;
  382. }
  383. *handled_output_len = len;
  384. } else if (BG(url_adapt_state_ex).url_app.len == 0) {
  385. url_adapt_state_ex_t *ctx = &BG(url_adapt_state_ex);
  386. if (ctx->buf.len) {
  387. smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len);
  388. smart_str_appendl(&ctx->result, output, output_len);
  389. *handled_output = ctx->result.c;
  390. *handled_output_len = ctx->buf.len + output_len;
  391. ctx->result.c = NULL;
  392. ctx->result.len = 0;
  393. smart_str_free(&ctx->buf);
  394. } else {
  395. *handled_output = estrndup(output, *handled_output_len = output_len);
  396. }
  397. } else {
  398. *handled_output = NULL;
  399. }
  400. }
  401. PHPAPI int php_url_scanner_add_var(char *name, int name_len, char *value, int value_len, int urlencode TSRMLS_DC)
  402. {
  403. char *encoded = NULL;
  404. int encoded_len;
  405. smart_str val;
  406. if (! BG(url_adapt_state_ex).active) {
  407. php_url_scanner_ex_activate(TSRMLS_C);
  408. php_output_start_internal(ZEND_STRL("URL-Rewriter"), php_url_scanner_output_handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS TSRMLS_CC);
  409. BG(url_adapt_state_ex).active = 1;
  410. }
  411. if (BG(url_adapt_state_ex).url_app.len != 0) {
  412. smart_str_appends(&BG(url_adapt_state_ex).url_app, PG(arg_separator).output);
  413. }
  414. if (urlencode) {
  415. encoded = php_url_encode(value, value_len, &encoded_len);
  416. smart_str_setl(&val, encoded, encoded_len);
  417. } else {
  418. smart_str_setl(&val, value, value_len);
  419. }
  420. smart_str_appendl(&BG(url_adapt_state_ex).url_app, name, name_len);
  421. smart_str_appendc(&BG(url_adapt_state_ex).url_app, '=');
  422. smart_str_append(&BG(url_adapt_state_ex).url_app, &val);
  423. smart_str_appends(&BG(url_adapt_state_ex).form_app, "<input type=\"hidden\" name=\"");
  424. smart_str_appendl(&BG(url_adapt_state_ex).form_app, name, name_len);
  425. smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" value=\"");
  426. smart_str_append(&BG(url_adapt_state_ex).form_app, &val);
  427. smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" />");
  428. if (urlencode)
  429. efree(encoded);
  430. return SUCCESS;
  431. }
  432. PHPAPI int php_url_scanner_reset_vars(TSRMLS_D)
  433. {
  434. BG(url_adapt_state_ex).form_app.len = 0;
  435. BG(url_adapt_state_ex).url_app.len = 0;
  436. return SUCCESS;
  437. }
  438. PHP_MINIT_FUNCTION(url_scanner)
  439. {
  440. BG(url_adapt_state_ex).tags = NULL;
  441. BG(url_adapt_state_ex).form_app.c = BG(url_adapt_state_ex).url_app.c = 0;
  442. BG(url_adapt_state_ex).form_app.len = BG(url_adapt_state_ex).url_app.len = 0;
  443. REGISTER_INI_ENTRIES();
  444. return SUCCESS;
  445. }
  446. PHP_MSHUTDOWN_FUNCTION(url_scanner)
  447. {
  448. UNREGISTER_INI_ENTRIES();
  449. return SUCCESS;
  450. }
  451. PHP_RINIT_FUNCTION(url_scanner)
  452. {
  453. BG(url_adapt_state_ex).active = 0;
  454. return SUCCESS;
  455. }
  456. PHP_RSHUTDOWN_FUNCTION(url_scanner)
  457. {
  458. if (BG(url_adapt_state_ex).active) {
  459. php_url_scanner_ex_deactivate(TSRMLS_C);
  460. BG(url_adapt_state_ex).active = 0;
  461. }
  462. smart_str_free(&BG(url_adapt_state_ex).form_app);
  463. smart_str_free(&BG(url_adapt_state_ex).url_app);
  464. return SUCCESS;
  465. }