tokenizer.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2016 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Andrei Zmievski <andrei@php.net> |
  16. +----------------------------------------------------------------------+
  17. */
  18. /* $Id$ */
  19. #ifdef HAVE_CONFIG_H
  20. #include "config.h"
  21. #endif
  22. #include "php.h"
  23. #include "php_ini.h"
  24. #include "ext/standard/info.h"
  25. #include "php_tokenizer.h"
  26. #include "zend.h"
  27. #include "zend_language_scanner.h"
  28. #include "zend_language_scanner_defs.h"
  29. #include <zend_language_parser.h>
  30. #define zendtext LANG_SCNG(yy_text)
  31. #define zendleng LANG_SCNG(yy_leng)
  32. #define zendcursor LANG_SCNG(yy_cursor)
  33. #define zendlimit LANG_SCNG(yy_limit)
  34. /* {{{ arginfo */
  35. ZEND_BEGIN_ARG_INFO_EX(arginfo_token_get_all, 0, 0, 1)
  36. ZEND_ARG_INFO(0, source)
  37. ZEND_END_ARG_INFO()
  38. ZEND_BEGIN_ARG_INFO_EX(arginfo_token_name, 0, 0, 1)
  39. ZEND_ARG_INFO(0, token)
  40. ZEND_END_ARG_INFO()
  41. /* }}} */
  42. /* {{{ tokenizer_functions[]
  43. *
  44. * Every user visible function must have an entry in tokenizer_functions[].
  45. */
  46. const zend_function_entry tokenizer_functions[] = {
  47. PHP_FE(token_get_all, arginfo_token_get_all)
  48. PHP_FE(token_name, arginfo_token_name)
  49. PHP_FE_END
  50. };
  51. /* }}} */
  52. /* {{{ tokenizer_module_entry
  53. */
  54. zend_module_entry tokenizer_module_entry = {
  55. #if ZEND_MODULE_API_NO >= 20010901
  56. STANDARD_MODULE_HEADER,
  57. #endif
  58. "tokenizer",
  59. tokenizer_functions,
  60. PHP_MINIT(tokenizer),
  61. NULL,
  62. NULL,
  63. NULL,
  64. PHP_MINFO(tokenizer),
  65. #if ZEND_MODULE_API_NO >= 20010901
  66. "0.1", /* Replace with version number for your extension */
  67. #endif
  68. STANDARD_MODULE_PROPERTIES
  69. };
  70. /* }}} */
  71. #ifdef COMPILE_DL_TOKENIZER
  72. ZEND_GET_MODULE(tokenizer)
  73. #endif
  74. /* {{{ PHP_MINIT_FUNCTION
  75. */
  76. PHP_MINIT_FUNCTION(tokenizer)
  77. {
  78. tokenizer_register_constants(INIT_FUNC_ARGS_PASSTHRU);
  79. return SUCCESS;
  80. }
  81. /* }}} */
  82. /* {{{ PHP_MINFO_FUNCTION
  83. */
  84. PHP_MINFO_FUNCTION(tokenizer)
  85. {
  86. php_info_print_table_start();
  87. php_info_print_table_row(2, "Tokenizer Support", "enabled");
  88. php_info_print_table_end();
  89. }
  90. /* }}} */
  91. static void tokenize(zval *return_value TSRMLS_DC)
  92. {
  93. zval token;
  94. zval *keyword;
  95. int token_type;
  96. zend_bool destroy;
  97. int token_line = 1;
  98. int need_tokens = -1; // for __halt_compiler lexing. -1 = disabled
  99. array_init(return_value);
  100. ZVAL_NULL(&token);
  101. while ((token_type = lex_scan(&token TSRMLS_CC))) {
  102. destroy = 1;
  103. switch (token_type) {
  104. case T_CLOSE_TAG:
  105. if (zendtext[zendleng - 1] != '>') {
  106. CG(zend_lineno)++;
  107. }
  108. case T_OPEN_TAG:
  109. case T_OPEN_TAG_WITH_ECHO:
  110. case T_WHITESPACE:
  111. case T_COMMENT:
  112. case T_DOC_COMMENT:
  113. destroy = 0;
  114. break;
  115. }
  116. if (token_type >= 256) {
  117. MAKE_STD_ZVAL(keyword);
  118. array_init(keyword);
  119. add_next_index_long(keyword, token_type);
  120. if (token_type == T_END_HEREDOC) {
  121. if (CG(increment_lineno)) {
  122. token_line = ++CG(zend_lineno);
  123. CG(increment_lineno) = 0;
  124. }
  125. }
  126. add_next_index_stringl(keyword, (char *)zendtext, zendleng, 1);
  127. add_next_index_long(keyword, token_line);
  128. add_next_index_zval(return_value, keyword);
  129. } else {
  130. add_next_index_stringl(return_value, (char *)zendtext, zendleng, 1);
  131. }
  132. if (destroy && Z_TYPE(token) != IS_NULL) {
  133. zval_dtor(&token);
  134. }
  135. ZVAL_NULL(&token);
  136. // after T_HALT_COMPILER collect the next three non-dropped tokens
  137. if (need_tokens != -1) {
  138. if (token_type != T_WHITESPACE && token_type != T_OPEN_TAG
  139. && token_type != T_COMMENT && token_type != T_DOC_COMMENT
  140. && --need_tokens == 0
  141. ) {
  142. // fetch the rest into a T_INLINE_HTML
  143. if (zendcursor != zendlimit) {
  144. MAKE_STD_ZVAL(keyword);
  145. array_init(keyword);
  146. add_next_index_long(keyword, T_INLINE_HTML);
  147. add_next_index_stringl(keyword, (char *)zendcursor, zendlimit - zendcursor, 1);
  148. add_next_index_long(keyword, token_line);
  149. add_next_index_zval(return_value, keyword);
  150. }
  151. break;
  152. }
  153. } else if (token_type == T_HALT_COMPILER) {
  154. need_tokens = 3;
  155. }
  156. token_line = CG(zend_lineno);
  157. }
  158. }
  159. /* {{{ proto array token_get_all(string source)
  160. */
  161. PHP_FUNCTION(token_get_all)
  162. {
  163. char *source = NULL;
  164. int argc = ZEND_NUM_ARGS();
  165. int source_len;
  166. zval source_z;
  167. zend_lex_state original_lex_state;
  168. if (zend_parse_parameters(argc TSRMLS_CC, "s", &source, &source_len) == FAILURE) {
  169. return;
  170. }
  171. ZVAL_STRINGL(&source_z, source, source_len, 1);
  172. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  173. if (zend_prepare_string_for_scanning(&source_z, "" TSRMLS_CC) == FAILURE) {
  174. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  175. RETURN_FALSE;
  176. }
  177. LANG_SCNG(yy_state) = yycINITIAL;
  178. tokenize(return_value TSRMLS_CC);
  179. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  180. zval_dtor(&source_z);
  181. }
  182. /* }}} */
  183. /* {{{ proto string token_name(int type)
  184. */
  185. PHP_FUNCTION(token_name)
  186. {
  187. int argc = ZEND_NUM_ARGS();
  188. long type;
  189. if (zend_parse_parameters(argc TSRMLS_CC, "l", &type) == FAILURE) {
  190. return;
  191. }
  192. RETVAL_STRING(get_token_type_name(type), 1);
  193. }
  194. /* }}} */
  195. /*
  196. * Local variables:
  197. * tab-width: 4
  198. * c-basic-offset: 4
  199. * End:
  200. * vim600: noet sw=4 ts=4 fdm=marker
  201. * vim<600: noet sw=4 ts=4
  202. */