tidy.c 51 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2016 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: John Coggeshall <john@php.net> |
  16. +----------------------------------------------------------------------+
  17. */
  18. /* $Id$ */
  19. #ifdef HAVE_CONFIG_H
  20. #include "config.h"
  21. #endif
  22. #include "php.h"
  23. #include "php_tidy.h"
  24. #if HAVE_TIDY
  25. #include "php_ini.h"
  26. #include "ext/standard/info.h"
  27. #include "tidy.h"
  28. #include "buffio.h"
  29. /* compatibility with older versions of libtidy */
  30. #ifndef TIDY_CALL
  31. #define TIDY_CALL
  32. #endif
  33. #define PHP_TIDY_MODULE_VERSION "2.0"
  34. /* {{{ ext/tidy macros
  35. */
  36. #define FIX_BUFFER(bptr) do { if ((bptr)->size) { (bptr)->bp[(bptr)->size-1] = '\0'; } } while(0)
  37. #define TIDY_SET_CONTEXT \
  38. zval *object = getThis();
  39. #define TIDY_FETCH_OBJECT \
  40. PHPTidyObj *obj; \
  41. TIDY_SET_CONTEXT; \
  42. if (object) { \
  43. if (zend_parse_parameters_none() == FAILURE) { \
  44. return; \
  45. } \
  46. } else { \
  47. if (zend_parse_method_parameters(ZEND_NUM_ARGS() TSRMLS_CC, NULL, "O", &object, tidy_ce_doc) == FAILURE) { \
  48. RETURN_FALSE; \
  49. } \
  50. } \
  51. obj = (PHPTidyObj *) zend_object_store_get_object(object TSRMLS_CC); \
  52. #define TIDY_FETCH_ONLY_OBJECT \
  53. PHPTidyObj *obj; \
  54. TIDY_SET_CONTEXT; \
  55. if (zend_parse_parameters_none() == FAILURE) { \
  56. return; \
  57. } \
  58. obj = (PHPTidyObj *) zend_object_store_get_object(object TSRMLS_CC); \
  59. #define TIDY_APPLY_CONFIG_ZVAL(_doc, _val) \
  60. if(_val) { \
  61. if(Z_TYPE_PP(_val) == IS_ARRAY) { \
  62. _php_tidy_apply_config_array(_doc, HASH_OF(*_val) TSRMLS_CC); \
  63. } else { \
  64. convert_to_string_ex(_val); \
  65. TIDY_OPEN_BASE_DIR_CHECK(Z_STRVAL_PP(_val)); \
  66. switch (tidyLoadConfig(_doc, Z_STRVAL_PP(_val))) { \
  67. case -1: \
  68. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Could not load configuration file '%s'", Z_STRVAL_PP(_val)); \
  69. break; \
  70. case 1: \
  71. php_error_docref(NULL TSRMLS_CC, E_NOTICE, "There were errors while parsing the configuration file '%s'", Z_STRVAL_PP(_val)); \
  72. break; \
  73. } \
  74. } \
  75. }
  76. #define REGISTER_TIDY_CLASS(classname, name, parent, __flags) \
  77. { \
  78. zend_class_entry ce; \
  79. INIT_CLASS_ENTRY(ce, # classname, tidy_funcs_ ## name); \
  80. ce.create_object = tidy_object_new_ ## name; \
  81. tidy_ce_ ## name = zend_register_internal_class_ex(&ce, parent, NULL TSRMLS_CC); \
  82. tidy_ce_ ## name->ce_flags |= __flags; \
  83. memcpy(&tidy_object_handlers_ ## name, zend_get_std_object_handlers(), sizeof(zend_object_handlers)); \
  84. tidy_object_handlers_ ## name.clone_obj = NULL; \
  85. }
  86. #define TIDY_TAG_CONST(tag) REGISTER_LONG_CONSTANT("TIDY_TAG_" #tag, TidyTag_##tag, CONST_CS | CONST_PERSISTENT)
  87. #define TIDY_NODE_CONST(name, type) REGISTER_LONG_CONSTANT("TIDY_NODETYPE_" #name, TidyNode_##type, CONST_CS | CONST_PERSISTENT)
  88. #ifndef TRUE
  89. #define TRUE 1
  90. #endif
  91. #ifndef FALSE
  92. #define FALSE 0
  93. #endif
  94. #define ADD_PROPERTY_STRING(_table, _key, _string) \
  95. { \
  96. zval *tmp; \
  97. MAKE_STD_ZVAL(tmp); \
  98. if (_string) { \
  99. ZVAL_STRING(tmp, (char *)_string, 1); \
  100. } else { \
  101. ZVAL_EMPTY_STRING(tmp); \
  102. } \
  103. zend_hash_update(_table, #_key, sizeof(#_key), (void *)&tmp, sizeof(zval *), NULL); \
  104. }
  105. #define ADD_PROPERTY_STRINGL(_table, _key, _string, _len) \
  106. { \
  107. zval *tmp; \
  108. MAKE_STD_ZVAL(tmp); \
  109. if (_string) { \
  110. ZVAL_STRINGL(tmp, (char *)_string, _len, 1); \
  111. } else { \
  112. ZVAL_EMPTY_STRING(tmp); \
  113. } \
  114. zend_hash_update(_table, #_key, sizeof(#_key), (void *)&tmp, sizeof(zval *), NULL); \
  115. }
  116. #define ADD_PROPERTY_LONG(_table, _key, _long) \
  117. { \
  118. zval *tmp; \
  119. MAKE_STD_ZVAL(tmp); \
  120. ZVAL_LONG(tmp, _long); \
  121. zend_hash_update(_table, #_key, sizeof(#_key), (void *)&tmp, sizeof(zval *), NULL); \
  122. }
  123. #define ADD_PROPERTY_NULL(_table, _key) \
  124. { \
  125. zval *tmp; \
  126. MAKE_STD_ZVAL(tmp); \
  127. ZVAL_NULL(tmp); \
  128. zend_hash_update(_table, #_key, sizeof(#_key), (void *)&tmp, sizeof(zval *), NULL); \
  129. }
  130. #define ADD_PROPERTY_BOOL(_table, _key, _bool) \
  131. { \
  132. zval *tmp; \
  133. MAKE_STD_ZVAL(tmp); \
  134. ZVAL_BOOL(tmp, _bool); \
  135. zend_hash_update(_table, #_key, sizeof(#_key), (void *)&tmp, sizeof(zval *), NULL); \
  136. }
  137. #define TIDY_OPEN_BASE_DIR_CHECK(filename) \
  138. if (php_check_open_basedir(filename TSRMLS_CC)) { \
  139. RETURN_FALSE; \
  140. } \
  141. #define TIDY_SET_DEFAULT_CONFIG(_doc) \
  142. if (TG(default_config) && TG(default_config)[0]) { \
  143. if (tidyLoadConfig(_doc, TG(default_config)) < 0) { \
  144. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to load Tidy configuration file at '%s'.", TG(default_config)); \
  145. } \
  146. }
  147. /* }}} */
  148. /* {{{ ext/tidy structs
  149. */
  150. typedef struct _PHPTidyDoc PHPTidyDoc;
  151. typedef struct _PHPTidyObj PHPTidyObj;
  152. typedef enum {
  153. is_node,
  154. is_doc
  155. } tidy_obj_type;
  156. typedef enum {
  157. is_root_node,
  158. is_html_node,
  159. is_head_node,
  160. is_body_node
  161. } tidy_base_nodetypes;
  162. struct _PHPTidyDoc {
  163. TidyDoc doc;
  164. TidyBuffer *errbuf;
  165. unsigned int ref_count;
  166. unsigned int initialized:1;
  167. };
  168. struct _PHPTidyObj {
  169. zend_object std;
  170. TidyNode node;
  171. tidy_obj_type type;
  172. PHPTidyDoc *ptdoc;
  173. };
  174. /* }}} */
  175. /* {{{ ext/tidy prototypes
  176. */
  177. static char *php_tidy_file_to_mem(char *, zend_bool, int * TSRMLS_DC);
  178. static void tidy_object_free_storage(void * TSRMLS_DC);
  179. static zend_object_value tidy_object_new_node(zend_class_entry * TSRMLS_DC);
  180. static zend_object_value tidy_object_new_doc(zend_class_entry * TSRMLS_DC);
  181. static zval * tidy_instanciate(zend_class_entry *, zval * TSRMLS_DC);
  182. static int tidy_doc_cast_handler(zval *, zval *, int TSRMLS_DC);
  183. static int tidy_node_cast_handler(zval *, zval *, int TSRMLS_DC);
  184. static void tidy_doc_update_properties(PHPTidyObj * TSRMLS_DC);
  185. static void tidy_add_default_properties(PHPTidyObj *, tidy_obj_type TSRMLS_DC);
  186. static void *php_tidy_get_opt_val(PHPTidyDoc *, TidyOption, TidyOptionType * TSRMLS_DC);
  187. static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes);
  188. static int _php_tidy_set_tidy_opt(TidyDoc, char *, zval * TSRMLS_DC);
  189. static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options TSRMLS_DC);
  190. static void _php_tidy_register_nodetypes(INIT_FUNC_ARGS);
  191. static void _php_tidy_register_tags(INIT_FUNC_ARGS);
  192. static PHP_INI_MH(php_tidy_set_clean_output);
  193. static void php_tidy_clean_output_start(const char *name, size_t name_len TSRMLS_DC);
  194. static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags TSRMLS_DC);
  195. static int php_tidy_output_handler(void **nothing, php_output_context *output_context);
  196. static PHP_MINIT_FUNCTION(tidy);
  197. static PHP_MSHUTDOWN_FUNCTION(tidy);
  198. static PHP_RINIT_FUNCTION(tidy);
  199. static PHP_MINFO_FUNCTION(tidy);
  200. static PHP_FUNCTION(tidy_getopt);
  201. static PHP_FUNCTION(tidy_parse_string);
  202. static PHP_FUNCTION(tidy_parse_file);
  203. static PHP_FUNCTION(tidy_clean_repair);
  204. static PHP_FUNCTION(tidy_repair_string);
  205. static PHP_FUNCTION(tidy_repair_file);
  206. static PHP_FUNCTION(tidy_diagnose);
  207. static PHP_FUNCTION(tidy_get_output);
  208. static PHP_FUNCTION(tidy_get_error_buffer);
  209. static PHP_FUNCTION(tidy_get_release);
  210. static PHP_FUNCTION(tidy_get_config);
  211. static PHP_FUNCTION(tidy_get_status);
  212. static PHP_FUNCTION(tidy_get_html_ver);
  213. #if HAVE_TIDYOPTGETDOC
  214. static PHP_FUNCTION(tidy_get_opt_doc);
  215. #endif
  216. static PHP_FUNCTION(tidy_is_xhtml);
  217. static PHP_FUNCTION(tidy_is_xml);
  218. static PHP_FUNCTION(tidy_error_count);
  219. static PHP_FUNCTION(tidy_warning_count);
  220. static PHP_FUNCTION(tidy_access_count);
  221. static PHP_FUNCTION(tidy_config_count);
  222. static PHP_FUNCTION(tidy_get_root);
  223. static PHP_FUNCTION(tidy_get_html);
  224. static PHP_FUNCTION(tidy_get_head);
  225. static PHP_FUNCTION(tidy_get_body);
  226. static TIDY_DOC_METHOD(__construct);
  227. static TIDY_DOC_METHOD(parseFile);
  228. static TIDY_DOC_METHOD(parseString);
  229. static TIDY_NODE_METHOD(hasChildren);
  230. static TIDY_NODE_METHOD(hasSiblings);
  231. static TIDY_NODE_METHOD(isComment);
  232. static TIDY_NODE_METHOD(isHtml);
  233. static TIDY_NODE_METHOD(isText);
  234. static TIDY_NODE_METHOD(isJste);
  235. static TIDY_NODE_METHOD(isAsp);
  236. static TIDY_NODE_METHOD(isPhp);
  237. static TIDY_NODE_METHOD(getParent);
  238. static TIDY_NODE_METHOD(__construct);
  239. /* }}} */
  240. ZEND_DECLARE_MODULE_GLOBALS(tidy)
  241. PHP_INI_BEGIN()
  242. STD_PHP_INI_ENTRY("tidy.default_config", "", PHP_INI_SYSTEM, OnUpdateString, default_config, zend_tidy_globals, tidy_globals)
  243. STD_PHP_INI_ENTRY("tidy.clean_output", "0", PHP_INI_USER, php_tidy_set_clean_output, clean_output, zend_tidy_globals, tidy_globals)
  244. PHP_INI_END()
  245. /* {{{ arginfo */
  246. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_parse_string, 0, 0, 1)
  247. ZEND_ARG_INFO(0, input)
  248. ZEND_ARG_INFO(0, config_options)
  249. ZEND_ARG_INFO(0, encoding)
  250. ZEND_END_ARG_INFO()
  251. ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_error_buffer, 0)
  252. ZEND_END_ARG_INFO()
  253. ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_output, 0)
  254. ZEND_END_ARG_INFO()
  255. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_parse_file, 0, 0, 1)
  256. ZEND_ARG_INFO(0, file)
  257. ZEND_ARG_INFO(0, config_options)
  258. ZEND_ARG_INFO(0, encoding)
  259. ZEND_ARG_INFO(0, use_include_path)
  260. ZEND_END_ARG_INFO()
  261. ZEND_BEGIN_ARG_INFO(arginfo_tidy_clean_repair, 0)
  262. ZEND_END_ARG_INFO()
  263. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_repair_string, 0, 0, 1)
  264. ZEND_ARG_INFO(0, data)
  265. ZEND_ARG_INFO(0, config_file)
  266. ZEND_ARG_INFO(0, encoding)
  267. ZEND_END_ARG_INFO()
  268. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_repair_file, 0, 0, 1)
  269. ZEND_ARG_INFO(0, filename)
  270. ZEND_ARG_INFO(0, config_file)
  271. ZEND_ARG_INFO(0, encoding)
  272. ZEND_ARG_INFO(0, use_include_path)
  273. ZEND_END_ARG_INFO()
  274. ZEND_BEGIN_ARG_INFO(arginfo_tidy_diagnose, 0)
  275. ZEND_END_ARG_INFO()
  276. ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_release, 0)
  277. ZEND_END_ARG_INFO()
  278. #if HAVE_TIDYOPTGETDOC
  279. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_opt_doc, 0, 0, 2)
  280. ZEND_ARG_INFO(0, resource)
  281. ZEND_ARG_INFO(0, optname)
  282. ZEND_END_ARG_INFO()
  283. #endif
  284. ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_config, 0)
  285. ZEND_END_ARG_INFO()
  286. ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_status, 0)
  287. ZEND_END_ARG_INFO()
  288. ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_html_ver, 0)
  289. ZEND_END_ARG_INFO()
  290. ZEND_BEGIN_ARG_INFO(arginfo_tidy_is_xhtml, 0)
  291. ZEND_END_ARG_INFO()
  292. ZEND_BEGIN_ARG_INFO(arginfo_tidy_is_xml, 0)
  293. ZEND_END_ARG_INFO()
  294. ZEND_BEGIN_ARG_INFO(arginfo_tidy_error_count, 0)
  295. ZEND_END_ARG_INFO()
  296. ZEND_BEGIN_ARG_INFO(arginfo_tidy_warning_count, 0)
  297. ZEND_END_ARG_INFO()
  298. ZEND_BEGIN_ARG_INFO(arginfo_tidy_access_count, 0)
  299. ZEND_END_ARG_INFO()
  300. ZEND_BEGIN_ARG_INFO(arginfo_tidy_config_count, 0)
  301. ZEND_END_ARG_INFO()
  302. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_getopt, 0, 0, 1)
  303. ZEND_ARG_INFO(0, option)
  304. ZEND_END_ARG_INFO()
  305. ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_root, 0)
  306. ZEND_END_ARG_INFO()
  307. ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_html, 0)
  308. ZEND_END_ARG_INFO()
  309. ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_head, 0)
  310. ZEND_END_ARG_INFO()
  311. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_body, 0, 0, 1)
  312. ZEND_ARG_INFO(0, tidy)
  313. ZEND_END_ARG_INFO()
  314. /* }}} */
  315. static const zend_function_entry tidy_functions[] = {
  316. PHP_FE(tidy_getopt, arginfo_tidy_getopt)
  317. PHP_FE(tidy_parse_string, arginfo_tidy_parse_string)
  318. PHP_FE(tidy_parse_file, arginfo_tidy_parse_file)
  319. PHP_FE(tidy_get_output, arginfo_tidy_get_output)
  320. PHP_FE(tidy_get_error_buffer, arginfo_tidy_get_error_buffer)
  321. PHP_FE(tidy_clean_repair, arginfo_tidy_clean_repair)
  322. PHP_FE(tidy_repair_string, arginfo_tidy_repair_string)
  323. PHP_FE(tidy_repair_file, arginfo_tidy_repair_file)
  324. PHP_FE(tidy_diagnose, arginfo_tidy_diagnose)
  325. PHP_FE(tidy_get_release, arginfo_tidy_get_release)
  326. PHP_FE(tidy_get_config, arginfo_tidy_get_config)
  327. PHP_FE(tidy_get_status, arginfo_tidy_get_status)
  328. PHP_FE(tidy_get_html_ver, arginfo_tidy_get_html_ver)
  329. PHP_FE(tidy_is_xhtml, arginfo_tidy_is_xhtml)
  330. PHP_FE(tidy_is_xml, arginfo_tidy_is_xml)
  331. PHP_FE(tidy_error_count, arginfo_tidy_error_count)
  332. PHP_FE(tidy_warning_count, arginfo_tidy_warning_count)
  333. PHP_FE(tidy_access_count, arginfo_tidy_access_count)
  334. PHP_FE(tidy_config_count, arginfo_tidy_config_count)
  335. #if HAVE_TIDYOPTGETDOC
  336. PHP_FE(tidy_get_opt_doc, arginfo_tidy_get_opt_doc)
  337. #endif
  338. PHP_FE(tidy_get_root, arginfo_tidy_get_root)
  339. PHP_FE(tidy_get_head, arginfo_tidy_get_head)
  340. PHP_FE(tidy_get_html, arginfo_tidy_get_html)
  341. PHP_FE(tidy_get_body, arginfo_tidy_get_body)
  342. PHP_FE_END
  343. };
  344. static const zend_function_entry tidy_funcs_doc[] = {
  345. TIDY_METHOD_MAP(getOpt, tidy_getopt, NULL)
  346. TIDY_METHOD_MAP(cleanRepair, tidy_clean_repair, NULL)
  347. TIDY_DOC_ME(parseFile, NULL)
  348. TIDY_DOC_ME(parseString, NULL)
  349. TIDY_METHOD_MAP(repairString, tidy_repair_string, NULL)
  350. TIDY_METHOD_MAP(repairFile, tidy_repair_file, NULL)
  351. TIDY_METHOD_MAP(diagnose, tidy_diagnose, NULL)
  352. TIDY_METHOD_MAP(getRelease, tidy_get_release, NULL)
  353. TIDY_METHOD_MAP(getConfig, tidy_get_config, NULL)
  354. TIDY_METHOD_MAP(getStatus, tidy_get_status, NULL)
  355. TIDY_METHOD_MAP(getHtmlVer, tidy_get_html_ver, NULL)
  356. #if HAVE_TIDYOPTGETDOC
  357. TIDY_METHOD_MAP(getOptDoc, tidy_get_opt_doc, NULL)
  358. #endif
  359. TIDY_METHOD_MAP(isXhtml, tidy_is_xhtml, NULL)
  360. TIDY_METHOD_MAP(isXml, tidy_is_xml, NULL)
  361. TIDY_METHOD_MAP(root, tidy_get_root, NULL)
  362. TIDY_METHOD_MAP(head, tidy_get_head, NULL)
  363. TIDY_METHOD_MAP(html, tidy_get_html, NULL)
  364. TIDY_METHOD_MAP(body, tidy_get_body, NULL)
  365. TIDY_DOC_ME(__construct, NULL)
  366. PHP_FE_END
  367. };
  368. static const zend_function_entry tidy_funcs_node[] = {
  369. TIDY_NODE_ME(hasChildren, NULL)
  370. TIDY_NODE_ME(hasSiblings, NULL)
  371. TIDY_NODE_ME(isComment, NULL)
  372. TIDY_NODE_ME(isHtml, NULL)
  373. TIDY_NODE_ME(isText, NULL)
  374. TIDY_NODE_ME(isJste, NULL)
  375. TIDY_NODE_ME(isAsp, NULL)
  376. TIDY_NODE_ME(isPhp, NULL)
  377. TIDY_NODE_ME(getParent, NULL)
  378. TIDY_NODE_PRIVATE_ME(__construct, NULL)
  379. PHP_FE_END
  380. };
  381. static zend_class_entry *tidy_ce_doc, *tidy_ce_node;
  382. static zend_object_handlers tidy_object_handlers_doc;
  383. static zend_object_handlers tidy_object_handlers_node;
  384. zend_module_entry tidy_module_entry = {
  385. STANDARD_MODULE_HEADER,
  386. "tidy",
  387. tidy_functions,
  388. PHP_MINIT(tidy),
  389. PHP_MSHUTDOWN(tidy),
  390. PHP_RINIT(tidy),
  391. NULL,
  392. PHP_MINFO(tidy),
  393. PHP_TIDY_MODULE_VERSION,
  394. PHP_MODULE_GLOBALS(tidy),
  395. NULL,
  396. NULL,
  397. NULL,
  398. STANDARD_MODULE_PROPERTIES_EX
  399. };
  400. #ifdef COMPILE_DL_TIDY
  401. ZEND_GET_MODULE(tidy)
  402. #endif
  403. static void* TIDY_CALL php_tidy_malloc(size_t len)
  404. {
  405. return emalloc(len);
  406. }
  407. static void* TIDY_CALL php_tidy_realloc(void *buf, size_t len)
  408. {
  409. return erealloc(buf, len);
  410. }
  411. static void TIDY_CALL php_tidy_free(void *buf)
  412. {
  413. efree(buf);
  414. }
  415. static void TIDY_CALL php_tidy_panic(ctmbstr msg)
  416. {
  417. TSRMLS_FETCH();
  418. php_error_docref(NULL TSRMLS_CC, E_ERROR, "Could not allocate memory for tidy! (Reason: %s)", (char *)msg);
  419. }
  420. static int _php_tidy_set_tidy_opt(TidyDoc doc, char *optname, zval *value TSRMLS_DC)
  421. {
  422. TidyOption opt = tidyGetOptionByName(doc, optname);
  423. zval conv = *value;
  424. if (!opt) {
  425. php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Unknown Tidy Configuration Option '%s'", optname);
  426. return FAILURE;
  427. }
  428. if (tidyOptIsReadOnly(opt)) {
  429. php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Attempting to set read-only option '%s'", optname);
  430. return FAILURE;
  431. }
  432. switch(tidyOptGetType(opt)) {
  433. case TidyString:
  434. if (Z_TYPE(conv) != IS_STRING) {
  435. zval_copy_ctor(&conv);
  436. convert_to_string(&conv);
  437. }
  438. if (tidyOptSetValue(doc, tidyOptGetId(opt), Z_STRVAL(conv))) {
  439. if (Z_TYPE(conv) != Z_TYPE_P(value)) {
  440. zval_dtor(&conv);
  441. }
  442. return SUCCESS;
  443. }
  444. if (Z_TYPE(conv) != Z_TYPE_P(value)) {
  445. zval_dtor(&conv);
  446. }
  447. break;
  448. case TidyInteger:
  449. if (Z_TYPE(conv) != IS_LONG) {
  450. zval_copy_ctor(&conv);
  451. convert_to_long(&conv);
  452. }
  453. if (tidyOptSetInt(doc, tidyOptGetId(opt), Z_LVAL(conv))) {
  454. return SUCCESS;
  455. }
  456. break;
  457. case TidyBoolean:
  458. if (Z_TYPE(conv) != IS_LONG) {
  459. zval_copy_ctor(&conv);
  460. convert_to_long(&conv);
  461. }
  462. if (tidyOptSetBool(doc, tidyOptGetId(opt), Z_LVAL(conv))) {
  463. return SUCCESS;
  464. }
  465. break;
  466. default:
  467. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to determine type of configuration option");
  468. break;
  469. }
  470. return FAILURE;
  471. }
  472. static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, zend_bool is_file)
  473. {
  474. char *data=NULL, *arg1, *enc = NULL;
  475. int arg1_len, enc_len = 0, data_len = 0;
  476. zend_bool use_include_path = 0;
  477. TidyDoc doc;
  478. TidyBuffer *errbuf;
  479. zval **config = NULL;
  480. if (is_file) {
  481. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "p|Zsb", &arg1, &arg1_len, &config, &enc, &enc_len, &use_include_path) == FAILURE) {
  482. RETURN_FALSE;
  483. }
  484. if (!(data = php_tidy_file_to_mem(arg1, use_include_path, &data_len TSRMLS_CC))) {
  485. RETURN_FALSE;
  486. }
  487. } else {
  488. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|Zsb", &arg1, &arg1_len, &config, &enc, &enc_len, &use_include_path) == FAILURE) {
  489. RETURN_FALSE;
  490. }
  491. data = arg1;
  492. data_len = arg1_len;
  493. }
  494. doc = tidyCreate();
  495. errbuf = emalloc(sizeof(TidyBuffer));
  496. tidyBufInit(errbuf);
  497. if (tidySetErrorBuffer(doc, errbuf) != 0) {
  498. tidyBufFree(errbuf);
  499. efree(errbuf);
  500. tidyRelease(doc);
  501. php_error_docref(NULL TSRMLS_CC, E_ERROR, "Could not set Tidy error buffer");
  502. }
  503. tidyOptSetBool(doc, TidyForceOutput, yes);
  504. tidyOptSetBool(doc, TidyMark, no);
  505. TIDY_SET_DEFAULT_CONFIG(doc);
  506. if (config) {
  507. TIDY_APPLY_CONFIG_ZVAL(doc, config);
  508. }
  509. if(enc_len) {
  510. if (tidySetCharEncoding(doc, enc) < 0) {
  511. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Could not set encoding '%s'", enc);
  512. RETVAL_FALSE;
  513. }
  514. }
  515. if (data) {
  516. TidyBuffer buf;
  517. tidyBufInit(&buf);
  518. tidyBufAttach(&buf, (byte *) data, data_len);
  519. if (tidyParseBuffer(doc, &buf) < 0) {
  520. php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", errbuf->bp);
  521. RETVAL_FALSE;
  522. } else {
  523. if (tidyCleanAndRepair(doc) >= 0) {
  524. TidyBuffer output;
  525. tidyBufInit(&output);
  526. tidySaveBuffer (doc, &output);
  527. FIX_BUFFER(&output);
  528. RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0, 1);
  529. tidyBufFree(&output);
  530. } else {
  531. RETVAL_FALSE;
  532. }
  533. }
  534. }
  535. if (is_file) {
  536. efree(data);
  537. }
  538. tidyBufFree(errbuf);
  539. efree(errbuf);
  540. tidyRelease(doc);
  541. }
  542. static char *php_tidy_file_to_mem(char *filename, zend_bool use_include_path, int *len TSRMLS_DC)
  543. {
  544. php_stream *stream;
  545. char *data = NULL;
  546. if (!(stream = php_stream_open_wrapper(filename, "rb", (use_include_path ? USE_PATH : 0), NULL))) {
  547. return NULL;
  548. }
  549. if ((*len = (int) php_stream_copy_to_mem(stream, (void*) &data, PHP_STREAM_COPY_ALL, 0)) == 0) {
  550. data = estrdup("");
  551. *len = 0;
  552. }
  553. php_stream_close(stream);
  554. return data;
  555. }
  556. static void tidy_object_free_storage(void *object TSRMLS_DC)
  557. {
  558. PHPTidyObj *intern = (PHPTidyObj *)object;
  559. zend_object_std_dtor(&intern->std TSRMLS_CC);
  560. if (intern->ptdoc) {
  561. intern->ptdoc->ref_count--;
  562. if (intern->ptdoc->ref_count <= 0) {
  563. tidyBufFree(intern->ptdoc->errbuf);
  564. efree(intern->ptdoc->errbuf);
  565. tidyRelease(intern->ptdoc->doc);
  566. efree(intern->ptdoc);
  567. }
  568. }
  569. efree(object);
  570. }
  571. static void tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers,
  572. zend_object_value *retval, tidy_obj_type objtype TSRMLS_DC)
  573. {
  574. PHPTidyObj *intern;
  575. intern = emalloc(sizeof(PHPTidyObj));
  576. memset(intern, 0, sizeof(PHPTidyObj));
  577. zend_object_std_init(&intern->std, class_type TSRMLS_CC);
  578. object_properties_init(&intern->std, class_type);
  579. switch(objtype) {
  580. case is_node:
  581. break;
  582. case is_doc:
  583. intern->ptdoc = emalloc(sizeof(PHPTidyDoc));
  584. intern->ptdoc->doc = tidyCreate();
  585. intern->ptdoc->ref_count = 1;
  586. intern->ptdoc->initialized = 0;
  587. intern->ptdoc->errbuf = emalloc(sizeof(TidyBuffer));
  588. tidyBufInit(intern->ptdoc->errbuf);
  589. if (tidySetErrorBuffer(intern->ptdoc->doc, intern->ptdoc->errbuf) != 0) {
  590. tidyBufFree(intern->ptdoc->errbuf);
  591. efree(intern->ptdoc->errbuf);
  592. tidyRelease(intern->ptdoc->doc);
  593. efree(intern->ptdoc);
  594. efree(intern);
  595. php_error_docref(NULL TSRMLS_CC, E_ERROR, "Could not set Tidy error buffer");
  596. }
  597. tidyOptSetBool(intern->ptdoc->doc, TidyForceOutput, yes);
  598. tidyOptSetBool(intern->ptdoc->doc, TidyMark, no);
  599. TIDY_SET_DEFAULT_CONFIG(intern->ptdoc->doc);
  600. tidy_add_default_properties(intern, is_doc TSRMLS_CC);
  601. break;
  602. }
  603. retval->handle = zend_objects_store_put(intern, (zend_objects_store_dtor_t)zend_objects_destroy_object, (zend_objects_free_object_storage_t) tidy_object_free_storage, NULL TSRMLS_CC);
  604. retval->handlers = handlers;
  605. }
  606. static zend_object_value tidy_object_new_node(zend_class_entry *class_type TSRMLS_DC)
  607. {
  608. zend_object_value retval;
  609. tidy_object_new(class_type, &tidy_object_handlers_node, &retval, is_node TSRMLS_CC);
  610. return retval;
  611. }
  612. static zend_object_value tidy_object_new_doc(zend_class_entry *class_type TSRMLS_DC)
  613. {
  614. zend_object_value retval;
  615. tidy_object_new(class_type, &tidy_object_handlers_doc, &retval, is_doc TSRMLS_CC);
  616. return retval;
  617. }
  618. static zval * tidy_instanciate(zend_class_entry *pce, zval *object TSRMLS_DC)
  619. {
  620. if (!object) {
  621. ALLOC_ZVAL(object);
  622. }
  623. Z_TYPE_P(object) = IS_OBJECT;
  624. object_init_ex(object, pce);
  625. Z_SET_REFCOUNT_P(object, 1);
  626. Z_SET_ISREF_P(object);
  627. return object;
  628. }
  629. static int tidy_doc_cast_handler(zval *in, zval *out, int type TSRMLS_DC)
  630. {
  631. TidyBuffer output;
  632. PHPTidyObj *obj;
  633. switch(type) {
  634. case IS_LONG:
  635. ZVAL_LONG(out, 0);
  636. break;
  637. case IS_DOUBLE:
  638. ZVAL_DOUBLE(out, 0);
  639. break;
  640. case IS_BOOL:
  641. ZVAL_BOOL(out, TRUE);
  642. break;
  643. case IS_STRING:
  644. obj = (PHPTidyObj *)zend_object_store_get_object(in TSRMLS_CC);
  645. tidyBufInit(&output);
  646. tidySaveBuffer (obj->ptdoc->doc, &output);
  647. ZVAL_STRINGL(out, (char *) output.bp, output.size ? output.size-1 : 0, 1);
  648. tidyBufFree(&output);
  649. break;
  650. default:
  651. return FAILURE;
  652. }
  653. return SUCCESS;
  654. }
  655. static int tidy_node_cast_handler(zval *in, zval *out, int type TSRMLS_DC)
  656. {
  657. TidyBuffer buf;
  658. PHPTidyObj *obj;
  659. switch(type) {
  660. case IS_LONG:
  661. ZVAL_LONG(out, 0);
  662. break;
  663. case IS_DOUBLE:
  664. ZVAL_DOUBLE(out, 0);
  665. break;
  666. case IS_BOOL:
  667. ZVAL_BOOL(out, TRUE);
  668. break;
  669. case IS_STRING:
  670. obj = (PHPTidyObj *)zend_object_store_get_object(in TSRMLS_CC);
  671. tidyBufInit(&buf);
  672. if (obj->ptdoc) {
  673. tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
  674. ZVAL_STRINGL(out, (char *) buf.bp, buf.size-1, 1);
  675. } else {
  676. ZVAL_EMPTY_STRING(out);
  677. }
  678. tidyBufFree(&buf);
  679. break;
  680. default:
  681. return FAILURE;
  682. }
  683. return SUCCESS;
  684. }
  685. static void tidy_doc_update_properties(PHPTidyObj *obj TSRMLS_DC)
  686. {
  687. TidyBuffer output;
  688. zval *temp;
  689. tidyBufInit(&output);
  690. tidySaveBuffer (obj->ptdoc->doc, &output);
  691. if (output.size) {
  692. if (!obj->std.properties) {
  693. rebuild_object_properties(&obj->std);
  694. }
  695. MAKE_STD_ZVAL(temp);
  696. ZVAL_STRINGL(temp, (char*)output.bp, output.size-1, TRUE);
  697. zend_hash_update(obj->std.properties, "value", sizeof("value"), (void *)&temp, sizeof(zval *), NULL);
  698. }
  699. tidyBufFree(&output);
  700. if (obj->ptdoc->errbuf->size) {
  701. if (!obj->std.properties) {
  702. rebuild_object_properties(&obj->std);
  703. }
  704. MAKE_STD_ZVAL(temp);
  705. ZVAL_STRINGL(temp, (char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1, TRUE);
  706. zend_hash_update(obj->std.properties, "errorBuffer", sizeof("errorBuffer"), (void *)&temp, sizeof(zval *), NULL);
  707. }
  708. }
  709. static void tidy_add_default_properties(PHPTidyObj *obj, tidy_obj_type type TSRMLS_DC)
  710. {
  711. TidyBuffer buf;
  712. TidyAttr tempattr;
  713. TidyNode tempnode;
  714. zval *attribute, *children, *temp;
  715. PHPTidyObj *newobj;
  716. switch(type) {
  717. case is_node:
  718. if (!obj->std.properties) {
  719. rebuild_object_properties(&obj->std);
  720. }
  721. tidyBufInit(&buf);
  722. tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
  723. ADD_PROPERTY_STRINGL(obj->std.properties, value, buf.bp, buf.size ? buf.size-1 : 0);
  724. tidyBufFree(&buf);
  725. ADD_PROPERTY_STRING(obj->std.properties, name, tidyNodeGetName(obj->node));
  726. ADD_PROPERTY_LONG(obj->std.properties, type, tidyNodeGetType(obj->node));
  727. ADD_PROPERTY_LONG(obj->std.properties, line, tidyNodeLine(obj->node));
  728. ADD_PROPERTY_LONG(obj->std.properties, column, tidyNodeColumn(obj->node));
  729. ADD_PROPERTY_BOOL(obj->std.properties, proprietary, tidyNodeIsProp(obj->ptdoc->doc, obj->node));
  730. switch(tidyNodeGetType(obj->node)) {
  731. case TidyNode_Root:
  732. case TidyNode_DocType:
  733. case TidyNode_Text:
  734. case TidyNode_Comment:
  735. break;
  736. default:
  737. ADD_PROPERTY_LONG(obj->std.properties, id, tidyNodeGetId(obj->node));
  738. }
  739. tempattr = tidyAttrFirst(obj->node);
  740. MAKE_STD_ZVAL(attribute);
  741. if (tempattr) {
  742. char *name, *val;
  743. array_init(attribute);
  744. do {
  745. name = (char *)tidyAttrName(tempattr);
  746. val = (char *)tidyAttrValue(tempattr);
  747. if (name && val) {
  748. add_assoc_string(attribute, name, val, TRUE);
  749. }
  750. } while((tempattr = tidyAttrNext(tempattr)));
  751. } else {
  752. ZVAL_NULL(attribute);
  753. }
  754. zend_hash_update(obj->std.properties, "attribute", sizeof("attribute"), (void *)&attribute, sizeof(zval *), NULL);
  755. tempnode = tidyGetChild(obj->node);
  756. MAKE_STD_ZVAL(children);
  757. if (tempnode) {
  758. array_init(children);
  759. do {
  760. MAKE_STD_ZVAL(temp);
  761. tidy_instanciate(tidy_ce_node, temp TSRMLS_CC);
  762. newobj = (PHPTidyObj *) zend_object_store_get_object(temp TSRMLS_CC);
  763. newobj->node = tempnode;
  764. newobj->type = is_node;
  765. newobj->ptdoc = obj->ptdoc;
  766. newobj->ptdoc->ref_count++;
  767. tidy_add_default_properties(newobj, is_node TSRMLS_CC);
  768. add_next_index_zval(children, temp);
  769. } while((tempnode = tidyGetNext(tempnode)));
  770. } else {
  771. ZVAL_NULL(children);
  772. }
  773. zend_hash_update(obj->std.properties, "child", sizeof("child"), (void *)&children, sizeof(zval *), NULL);
  774. break;
  775. case is_doc:
  776. if (!obj->std.properties) {
  777. rebuild_object_properties(&obj->std);
  778. }
  779. ADD_PROPERTY_NULL(obj->std.properties, errorBuffer);
  780. ADD_PROPERTY_NULL(obj->std.properties, value);
  781. break;
  782. }
  783. }
  784. static void *php_tidy_get_opt_val(PHPTidyDoc *ptdoc, TidyOption opt, TidyOptionType *type TSRMLS_DC)
  785. {
  786. *type = tidyOptGetType(opt);
  787. switch (*type) {
  788. case TidyString: {
  789. char *val = (char *) tidyOptGetValue(ptdoc->doc, tidyOptGetId(opt));
  790. if (val) {
  791. return (void *) estrdup(val);
  792. } else {
  793. return (void *) estrdup("");
  794. }
  795. }
  796. break;
  797. case TidyInteger:
  798. return (void *) tidyOptGetInt(ptdoc->doc, tidyOptGetId(opt));
  799. break;
  800. case TidyBoolean:
  801. return (void *) tidyOptGetBool(ptdoc->doc, tidyOptGetId(opt));
  802. break;
  803. }
  804. /* should not happen */
  805. return NULL;
  806. }
  807. static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes node_type)
  808. {
  809. PHPTidyObj *newobj;
  810. TidyNode node;
  811. TIDY_FETCH_OBJECT;
  812. switch (node_type) {
  813. case is_root_node:
  814. node = tidyGetRoot(obj->ptdoc->doc);
  815. break;
  816. case is_html_node:
  817. node = tidyGetHtml(obj->ptdoc->doc);
  818. break;
  819. case is_head_node:
  820. node = tidyGetHead(obj->ptdoc->doc);
  821. break;
  822. case is_body_node:
  823. node = tidyGetBody(obj->ptdoc->doc);
  824. break;
  825. default:
  826. RETURN_NULL();
  827. break;
  828. }
  829. if (!node) {
  830. RETURN_NULL();
  831. }
  832. tidy_instanciate(tidy_ce_node, return_value TSRMLS_CC);
  833. newobj = (PHPTidyObj *) zend_object_store_get_object(return_value TSRMLS_CC);
  834. newobj->type = is_node;
  835. newobj->ptdoc = obj->ptdoc;
  836. newobj->node = node;
  837. newobj->ptdoc->ref_count++;
  838. tidy_add_default_properties(newobj, is_node TSRMLS_CC);
  839. }
  840. static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options TSRMLS_DC)
  841. {
  842. char *opt_name;
  843. zval **opt_val;
  844. ulong opt_indx;
  845. uint opt_name_len;
  846. zend_bool clear_str;
  847. for (zend_hash_internal_pointer_reset(ht_options);
  848. zend_hash_get_current_data(ht_options, (void *) &opt_val) == SUCCESS;
  849. zend_hash_move_forward(ht_options)) {
  850. switch (zend_hash_get_current_key_ex(ht_options, &opt_name, &opt_name_len, &opt_indx, FALSE, NULL)) {
  851. case HASH_KEY_IS_STRING:
  852. clear_str = 0;
  853. break;
  854. case HASH_KEY_IS_LONG:
  855. continue; /* ignore numeric keys */
  856. default:
  857. php_error_docref(NULL TSRMLS_CC, E_ERROR, "Could not retrieve key from option array");
  858. return FAILURE;
  859. }
  860. _php_tidy_set_tidy_opt(doc, opt_name, *opt_val TSRMLS_CC);
  861. if (clear_str) {
  862. efree(opt_name);
  863. }
  864. }
  865. return SUCCESS;
  866. }
  867. static int php_tidy_parse_string(PHPTidyObj *obj, char *string, int len, char *enc TSRMLS_DC)
  868. {
  869. TidyBuffer buf;
  870. if(enc) {
  871. if (tidySetCharEncoding(obj->ptdoc->doc, enc) < 0) {
  872. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Could not set encoding '%s'", enc);
  873. return FAILURE;
  874. }
  875. }
  876. obj->ptdoc->initialized = 1;
  877. tidyBufInit(&buf);
  878. tidyBufAttach(&buf, (byte *) string, len);
  879. if (tidyParseBuffer(obj->ptdoc->doc, &buf) < 0) {
  880. php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", obj->ptdoc->errbuf->bp);
  881. return FAILURE;
  882. }
  883. tidy_doc_update_properties(obj TSRMLS_CC);
  884. return SUCCESS;
  885. }
  886. static PHP_MINIT_FUNCTION(tidy)
  887. {
  888. tidySetMallocCall(php_tidy_malloc);
  889. tidySetReallocCall(php_tidy_realloc);
  890. tidySetFreeCall(php_tidy_free);
  891. tidySetPanicCall(php_tidy_panic);
  892. REGISTER_INI_ENTRIES();
  893. REGISTER_TIDY_CLASS(tidy, doc, NULL, 0);
  894. REGISTER_TIDY_CLASS(tidyNode, node, NULL, ZEND_ACC_FINAL_CLASS);
  895. tidy_object_handlers_doc.cast_object = tidy_doc_cast_handler;
  896. tidy_object_handlers_node.cast_object = tidy_node_cast_handler;
  897. _php_tidy_register_tags(INIT_FUNC_ARGS_PASSTHRU);
  898. _php_tidy_register_nodetypes(INIT_FUNC_ARGS_PASSTHRU);
  899. php_output_handler_alias_register(ZEND_STRL("ob_tidyhandler"), php_tidy_output_handler_init TSRMLS_CC);
  900. return SUCCESS;
  901. }
  902. static PHP_RINIT_FUNCTION(tidy)
  903. {
  904. php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler") TSRMLS_CC);
  905. return SUCCESS;
  906. }
  907. static PHP_MSHUTDOWN_FUNCTION(tidy)
  908. {
  909. UNREGISTER_INI_ENTRIES();
  910. return SUCCESS;
  911. }
  912. static PHP_MINFO_FUNCTION(tidy)
  913. {
  914. php_info_print_table_start();
  915. php_info_print_table_header(2, "Tidy support", "enabled");
  916. php_info_print_table_row(2, "libTidy Release", (char *)tidyReleaseDate());
  917. php_info_print_table_row(2, "Extension Version", PHP_TIDY_MODULE_VERSION " ($Id$)");
  918. php_info_print_table_end();
  919. DISPLAY_INI_ENTRIES();
  920. }
  921. static PHP_INI_MH(php_tidy_set_clean_output)
  922. {
  923. int status;
  924. zend_bool value;
  925. if (new_value_length==2 && strcasecmp("on", new_value)==0) {
  926. value = (zend_bool) 1;
  927. } else if (new_value_length==3 && strcasecmp("yes", new_value)==0) {
  928. value = (zend_bool) 1;
  929. } else if (new_value_length==4 && strcasecmp("true", new_value)==0) {
  930. value = (zend_bool) 1;
  931. } else {
  932. value = (zend_bool) atoi(new_value);
  933. }
  934. if (stage == PHP_INI_STAGE_RUNTIME) {
  935. status = php_output_get_status(TSRMLS_C);
  936. if (value && (status & PHP_OUTPUT_WRITTEN)) {
  937. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot enable tidy.clean_output - there has already been output");
  938. return FAILURE;
  939. }
  940. if (status & PHP_OUTPUT_SENT) {
  941. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot change tidy.clean_output - headers already sent");
  942. return FAILURE;
  943. }
  944. }
  945. status = OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
  946. if (stage == PHP_INI_STAGE_RUNTIME && value) {
  947. if (!php_output_handler_started(ZEND_STRL("ob_tidyhandler") TSRMLS_CC)) {
  948. php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler") TSRMLS_CC);
  949. }
  950. }
  951. return status;
  952. }
  953. /*
  954. * NOTE: tidy does not support iterative/cumulative parsing, so chunk-sized output handler is not possible
  955. */
  956. static void php_tidy_clean_output_start(const char *name, size_t name_len TSRMLS_DC)
  957. {
  958. php_output_handler *h;
  959. if (TG(clean_output) && (h = php_tidy_output_handler_init(name, name_len, 0, PHP_OUTPUT_HANDLER_STDFLAGS TSRMLS_CC))) {
  960. php_output_handler_start(h TSRMLS_CC);
  961. }
  962. }
  963. static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags TSRMLS_DC)
  964. {
  965. if (chunk_size) {
  966. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot use a chunk size for ob_tidyhandler");
  967. return NULL;
  968. }
  969. if (!TG(clean_output)) {
  970. TG(clean_output) = 1;
  971. }
  972. return php_output_handler_create_internal(handler_name, handler_name_len, php_tidy_output_handler, chunk_size, flags TSRMLS_CC);
  973. }
  974. static int php_tidy_output_handler(void **nothing, php_output_context *output_context)
  975. {
  976. int status = FAILURE;
  977. TidyDoc doc;
  978. TidyBuffer inbuf, outbuf, errbuf;
  979. PHP_OUTPUT_TSRMLS(output_context);
  980. if (TG(clean_output) && (output_context->op & PHP_OUTPUT_HANDLER_START) && (output_context->op & PHP_OUTPUT_HANDLER_FINAL)) {
  981. doc = tidyCreate();
  982. tidyBufInit(&errbuf);
  983. if (0 == tidySetErrorBuffer(doc, &errbuf)) {
  984. tidyOptSetBool(doc, TidyForceOutput, yes);
  985. tidyOptSetBool(doc, TidyMark, no);
  986. TIDY_SET_DEFAULT_CONFIG(doc);
  987. tidyBufInit(&inbuf);
  988. tidyBufAttach(&inbuf, (byte *) output_context->in.data, output_context->in.used);
  989. if (0 <= tidyParseBuffer(doc, &inbuf) && 0 <= tidyCleanAndRepair(doc)) {
  990. tidyBufInit(&outbuf);
  991. tidySaveBuffer(doc, &outbuf);
  992. FIX_BUFFER(&outbuf);
  993. output_context->out.data = (char *) outbuf.bp;
  994. output_context->out.used = outbuf.size ? outbuf.size-1 : 0;
  995. output_context->out.free = 1;
  996. status = SUCCESS;
  997. }
  998. }
  999. tidyRelease(doc);
  1000. tidyBufFree(&errbuf);
  1001. }
  1002. return status;
  1003. }
  1004. /* {{{ proto bool tidy_parse_string(string input [, mixed config_options [, string encoding]])
  1005. Parse a document stored in a string */
  1006. static PHP_FUNCTION(tidy_parse_string)
  1007. {
  1008. char *input, *enc = NULL;
  1009. int input_len, enc_len = 0;
  1010. zval **options = NULL;
  1011. PHPTidyObj *obj;
  1012. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|Zs", &input, &input_len, &options, &enc, &enc_len) == FAILURE) {
  1013. RETURN_FALSE;
  1014. }
  1015. tidy_instanciate(tidy_ce_doc, return_value TSRMLS_CC);
  1016. obj = (PHPTidyObj *) zend_object_store_get_object(return_value TSRMLS_CC);
  1017. TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
  1018. if(php_tidy_parse_string(obj, input, input_len, enc TSRMLS_CC) == FAILURE) {
  1019. zval_dtor(return_value);
  1020. INIT_ZVAL(*return_value);
  1021. RETURN_FALSE;
  1022. }
  1023. }
  1024. /* }}} */
  1025. /* {{{ proto string tidy_get_error_buffer()
  1026. Return warnings and errors which occurred parsing the specified document*/
  1027. static PHP_FUNCTION(tidy_get_error_buffer)
  1028. {
  1029. TIDY_FETCH_OBJECT;
  1030. if (obj->ptdoc->errbuf && obj->ptdoc->errbuf->bp) {
  1031. RETURN_STRINGL((char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1, 1);
  1032. } else {
  1033. RETURN_FALSE;
  1034. }
  1035. }
  1036. /* }}} */
  1037. /* {{{ proto string tidy_get_output()
  1038. Return a string representing the parsed tidy markup */
  1039. static PHP_FUNCTION(tidy_get_output)
  1040. {
  1041. TidyBuffer output;
  1042. TIDY_FETCH_OBJECT;
  1043. tidyBufInit(&output);
  1044. tidySaveBuffer(obj->ptdoc->doc, &output);
  1045. FIX_BUFFER(&output);
  1046. RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0, 1);
  1047. tidyBufFree(&output);
  1048. }
  1049. /* }}} */
  1050. /* {{{ proto boolean tidy_parse_file(string file [, mixed config_options [, string encoding [, bool use_include_path]]])
  1051. Parse markup in file or URI */
  1052. static PHP_FUNCTION(tidy_parse_file)
  1053. {
  1054. char *inputfile, *enc = NULL;
  1055. int input_len, contents_len, enc_len = 0;
  1056. zend_bool use_include_path = 0;
  1057. char *contents;
  1058. zval **options = NULL;
  1059. PHPTidyObj *obj;
  1060. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "p|Zsb", &inputfile, &input_len,
  1061. &options, &enc, &enc_len, &use_include_path) == FAILURE) {
  1062. RETURN_FALSE;
  1063. }
  1064. tidy_instanciate(tidy_ce_doc, return_value TSRMLS_CC);
  1065. obj = (PHPTidyObj *) zend_object_store_get_object(return_value TSRMLS_CC);
  1066. if (!(contents = php_tidy_file_to_mem(inputfile, use_include_path, &contents_len TSRMLS_CC))) {
  1067. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot Load '%s' into memory%s", inputfile, (use_include_path) ? " (Using include path)" : "");
  1068. RETURN_FALSE;
  1069. }
  1070. TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
  1071. if(php_tidy_parse_string(obj, contents, contents_len, enc TSRMLS_CC) == FAILURE) {
  1072. zval_dtor(return_value);
  1073. INIT_ZVAL(*return_value);
  1074. RETVAL_FALSE;
  1075. }
  1076. efree(contents);
  1077. }
  1078. /* }}} */
  1079. /* {{{ proto boolean tidy_clean_repair()
  1080. Execute configured cleanup and repair operations on parsed markup */
  1081. static PHP_FUNCTION(tidy_clean_repair)
  1082. {
  1083. TIDY_FETCH_OBJECT;
  1084. if (tidyCleanAndRepair(obj->ptdoc->doc) >= 0) {
  1085. tidy_doc_update_properties(obj TSRMLS_CC);
  1086. RETURN_TRUE;
  1087. }
  1088. RETURN_FALSE;
  1089. }
  1090. /* }}} */
  1091. /* {{{ proto boolean tidy_repair_string(string data [, mixed config_file [, string encoding]])
  1092. Repair a string using an optionally provided configuration file */
  1093. static PHP_FUNCTION(tidy_repair_string)
  1094. {
  1095. php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, FALSE);
  1096. }
  1097. /* }}} */
  1098. /* {{{ proto boolean tidy_repair_file(string filename [, mixed config_file [, string encoding [, bool use_include_path]]])
  1099. Repair a file using an optionally provided configuration file */
  1100. static PHP_FUNCTION(tidy_repair_file)
  1101. {
  1102. php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, TRUE);
  1103. }
  1104. /* }}} */
  1105. /* {{{ proto boolean tidy_diagnose()
  1106. Run configured diagnostics on parsed and repaired markup. */
  1107. static PHP_FUNCTION(tidy_diagnose)
  1108. {
  1109. TIDY_FETCH_OBJECT;
  1110. if (obj->ptdoc->initialized && tidyRunDiagnostics(obj->ptdoc->doc) >= 0) {
  1111. tidy_doc_update_properties(obj TSRMLS_CC);
  1112. RETURN_TRUE;
  1113. }
  1114. RETURN_FALSE;
  1115. }
  1116. /* }}} */
  1117. /* {{{ proto string tidy_get_release()
  1118. Get release date (version) for Tidy library */
  1119. static PHP_FUNCTION(tidy_get_release)
  1120. {
  1121. if (zend_parse_parameters_none() == FAILURE) {
  1122. return;
  1123. }
  1124. RETURN_STRING((char *)tidyReleaseDate(), 1);
  1125. }
  1126. /* }}} */
  1127. #if HAVE_TIDYOPTGETDOC
  1128. /* {{{ proto string tidy_get_opt_doc(tidy resource, string optname)
  1129. Returns the documentation for the given option name */
  1130. static PHP_FUNCTION(tidy_get_opt_doc)
  1131. {
  1132. PHPTidyObj *obj;
  1133. char *optval, *optname;
  1134. int optname_len;
  1135. TidyOption opt;
  1136. TIDY_SET_CONTEXT;
  1137. if (object) {
  1138. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &optname, &optname_len) == FAILURE) {
  1139. RETURN_FALSE;
  1140. }
  1141. } else {
  1142. if (zend_parse_method_parameters(ZEND_NUM_ARGS() TSRMLS_CC, NULL, "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
  1143. RETURN_FALSE;
  1144. }
  1145. }
  1146. obj = (PHPTidyObj *) zend_object_store_get_object(object TSRMLS_CC);
  1147. opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
  1148. if (!opt) {
  1149. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown Tidy Configuration Option '%s'", optname);
  1150. RETURN_FALSE;
  1151. }
  1152. if ( (optval = (char *) tidyOptGetDoc(obj->ptdoc->doc, opt)) ) {
  1153. RETURN_STRING(optval, 1);
  1154. }
  1155. RETURN_FALSE;
  1156. }
  1157. /* }}} */
  1158. #endif
  1159. /* {{{ proto array tidy_get_config()
  1160. Get current Tidy configuration */
  1161. static PHP_FUNCTION(tidy_get_config)
  1162. {
  1163. TidyIterator itOpt;
  1164. char *opt_name;
  1165. void *opt_value;
  1166. TidyOptionType optt;
  1167. TIDY_FETCH_OBJECT;
  1168. itOpt = tidyGetOptionList(obj->ptdoc->doc);
  1169. array_init(return_value);
  1170. while (itOpt) {
  1171. TidyOption opt = tidyGetNextOption(obj->ptdoc->doc, &itOpt);
  1172. opt_name = (char *)tidyOptGetName(opt);
  1173. opt_value = php_tidy_get_opt_val(obj->ptdoc, opt, &optt TSRMLS_CC);
  1174. switch (optt) {
  1175. case TidyString:
  1176. add_assoc_string(return_value, opt_name, (char*)opt_value, 0);
  1177. break;
  1178. case TidyInteger:
  1179. add_assoc_long(return_value, opt_name, (long)opt_value);
  1180. break;
  1181. case TidyBoolean:
  1182. add_assoc_bool(return_value, opt_name, (long)opt_value);
  1183. break;
  1184. }
  1185. }
  1186. return;
  1187. }
  1188. /* }}} */
  1189. /* {{{ proto int tidy_get_status()
  1190. Get status of specified document. */
  1191. static PHP_FUNCTION(tidy_get_status)
  1192. {
  1193. TIDY_FETCH_OBJECT;
  1194. RETURN_LONG(tidyStatus(obj->ptdoc->doc));
  1195. }
  1196. /* }}} */
  1197. /* {{{ proto int tidy_get_html_ver()
  1198. Get the Detected HTML version for the specified document. */
  1199. static PHP_FUNCTION(tidy_get_html_ver)
  1200. {
  1201. TIDY_FETCH_OBJECT;
  1202. RETURN_LONG(tidyDetectedHtmlVersion(obj->ptdoc->doc));
  1203. }
  1204. /* }}} */
  1205. /* {{{ proto boolean tidy_is_xhtml()
  1206. Indicates if the document is a XHTML document. */
  1207. static PHP_FUNCTION(tidy_is_xhtml)
  1208. {
  1209. TIDY_FETCH_OBJECT;
  1210. RETURN_BOOL(tidyDetectedXhtml(obj->ptdoc->doc));
  1211. }
  1212. /* }}} */
  1213. /* {{{ proto boolean tidy_is_xml()
  1214. Indicates if the document is a generic (non HTML/XHTML) XML document. */
  1215. static PHP_FUNCTION(tidy_is_xml)
  1216. {
  1217. TIDY_FETCH_OBJECT;
  1218. RETURN_BOOL(tidyDetectedGenericXml(obj->ptdoc->doc));
  1219. }
  1220. /* }}} */
  1221. /* {{{ proto int tidy_error_count()
  1222. Returns the Number of Tidy errors encountered for specified document. */
  1223. static PHP_FUNCTION(tidy_error_count)
  1224. {
  1225. TIDY_FETCH_OBJECT;
  1226. RETURN_LONG(tidyErrorCount(obj->ptdoc->doc));
  1227. }
  1228. /* }}} */
  1229. /* {{{ proto int tidy_warning_count()
  1230. Returns the Number of Tidy warnings encountered for specified document. */
  1231. static PHP_FUNCTION(tidy_warning_count)
  1232. {
  1233. TIDY_FETCH_OBJECT;
  1234. RETURN_LONG(tidyWarningCount(obj->ptdoc->doc));
  1235. }
  1236. /* }}} */
  1237. /* {{{ proto int tidy_access_count()
  1238. Returns the Number of Tidy accessibility warnings encountered for specified document. */
  1239. static PHP_FUNCTION(tidy_access_count)
  1240. {
  1241. TIDY_FETCH_OBJECT;
  1242. RETURN_LONG(tidyAccessWarningCount(obj->ptdoc->doc));
  1243. }
  1244. /* }}} */
  1245. /* {{{ proto int tidy_config_count()
  1246. Returns the Number of Tidy configuration errors encountered for specified document. */
  1247. static PHP_FUNCTION(tidy_config_count)
  1248. {
  1249. TIDY_FETCH_OBJECT;
  1250. RETURN_LONG(tidyConfigErrorCount(obj->ptdoc->doc));
  1251. }
  1252. /* }}} */
  1253. /* {{{ proto mixed tidy_getopt(string option)
  1254. Returns the value of the specified configuration option for the tidy document. */
  1255. static PHP_FUNCTION(tidy_getopt)
  1256. {
  1257. PHPTidyObj *obj;
  1258. char *optname;
  1259. void *optval;
  1260. int optname_len;
  1261. TidyOption opt;
  1262. TidyOptionType optt;
  1263. TIDY_SET_CONTEXT;
  1264. if (object) {
  1265. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &optname, &optname_len) == FAILURE) {
  1266. RETURN_FALSE;
  1267. }
  1268. } else {
  1269. if (zend_parse_method_parameters(ZEND_NUM_ARGS() TSRMLS_CC, NULL, "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
  1270. RETURN_FALSE;
  1271. }
  1272. }
  1273. obj = (PHPTidyObj *) zend_object_store_get_object(object TSRMLS_CC);
  1274. opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
  1275. if (!opt) {
  1276. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown Tidy Configuration Option '%s'", optname);
  1277. RETURN_FALSE;
  1278. }
  1279. optval = php_tidy_get_opt_val(obj->ptdoc, opt, &optt TSRMLS_CC);
  1280. switch (optt) {
  1281. case TidyString:
  1282. RETURN_STRING((char *)optval, 0);
  1283. break;
  1284. case TidyInteger:
  1285. RETURN_LONG((long)optval);
  1286. break;
  1287. case TidyBoolean:
  1288. if (optval) {
  1289. RETURN_TRUE;
  1290. } else {
  1291. RETURN_FALSE;
  1292. }
  1293. break;
  1294. default:
  1295. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to determine type of configuration option");
  1296. break;
  1297. }
  1298. RETURN_FALSE;
  1299. }
  1300. /* }}} */
  1301. static TIDY_DOC_METHOD(__construct)
  1302. {
  1303. char *inputfile = NULL, *enc = NULL;
  1304. int input_len = 0, enc_len = 0, contents_len = 0;
  1305. zend_bool use_include_path = 0;
  1306. char *contents;
  1307. zval **options = NULL;
  1308. PHPTidyObj *obj;
  1309. TIDY_SET_CONTEXT;
  1310. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|pZsb", &inputfile, &input_len,
  1311. &options, &enc, &enc_len, &use_include_path) == FAILURE) {
  1312. RETURN_FALSE;
  1313. }
  1314. obj = (PHPTidyObj *)zend_object_store_get_object(object TSRMLS_CC);
  1315. if (inputfile) {
  1316. if (!(contents = php_tidy_file_to_mem(inputfile, use_include_path, &contents_len TSRMLS_CC))) {
  1317. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot Load '%s' into memory%s", inputfile, (use_include_path) ? " (Using include path)" : "");
  1318. return;
  1319. }
  1320. TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
  1321. php_tidy_parse_string(obj, contents, contents_len, enc TSRMLS_CC);
  1322. efree(contents);
  1323. }
  1324. }
  1325. static TIDY_DOC_METHOD(parseFile)
  1326. {
  1327. char *inputfile, *enc = NULL;
  1328. int input_len, enc_len = 0, contents_len = 0;
  1329. zend_bool use_include_path = 0;
  1330. char *contents;
  1331. zval **options = NULL;
  1332. PHPTidyObj *obj;
  1333. TIDY_SET_CONTEXT;
  1334. obj = (PHPTidyObj *)zend_object_store_get_object(object TSRMLS_CC);
  1335. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "p|Zsb", &inputfile, &input_len,
  1336. &options, &enc, &enc_len, &use_include_path) == FAILURE) {
  1337. RETURN_FALSE;
  1338. }
  1339. if (!(contents = php_tidy_file_to_mem(inputfile, use_include_path, &contents_len TSRMLS_CC))) {
  1340. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot Load '%s' into memory%s", inputfile, (use_include_path) ? " (Using include path)" : "");
  1341. RETURN_FALSE;
  1342. }
  1343. TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
  1344. if(php_tidy_parse_string(obj, contents, contents_len, enc TSRMLS_CC) == FAILURE) {
  1345. RETVAL_FALSE;
  1346. } else {
  1347. RETVAL_TRUE;
  1348. }
  1349. efree(contents);
  1350. }
  1351. static TIDY_DOC_METHOD(parseString)
  1352. {
  1353. char *input, *enc = NULL;
  1354. int input_len, enc_len = 0;
  1355. zval **options = NULL;
  1356. PHPTidyObj *obj;
  1357. TIDY_SET_CONTEXT;
  1358. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|Zs", &input, &input_len, &options, &enc, &enc_len) == FAILURE) {
  1359. RETURN_FALSE;
  1360. }
  1361. obj = (PHPTidyObj *)zend_object_store_get_object(object TSRMLS_CC);
  1362. TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
  1363. if(php_tidy_parse_string(obj, input, input_len, enc TSRMLS_CC) == SUCCESS) {
  1364. RETURN_TRUE;
  1365. }
  1366. RETURN_FALSE;
  1367. }
  1368. /* {{{ proto TidyNode tidy_get_root()
  1369. Returns a TidyNode Object representing the root of the tidy parse tree */
  1370. static PHP_FUNCTION(tidy_get_root)
  1371. {
  1372. php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_root_node);
  1373. }
  1374. /* }}} */
  1375. /* {{{ proto TidyNode tidy_get_html()
  1376. Returns a TidyNode Object starting from the <HTML> tag of the tidy parse tree */
  1377. static PHP_FUNCTION(tidy_get_html)
  1378. {
  1379. php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_html_node);
  1380. }
  1381. /* }}} */
  1382. /* {{{ proto TidyNode tidy_get_head()
  1383. Returns a TidyNode Object starting from the <HEAD> tag of the tidy parse tree */
  1384. static PHP_FUNCTION(tidy_get_head)
  1385. {
  1386. php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_head_node);
  1387. }
  1388. /* }}} */
  1389. /* {{{ proto TidyNode tidy_get_body(resource tidy)
  1390. Returns a TidyNode Object starting from the <BODY> tag of the tidy parse tree */
  1391. static PHP_FUNCTION(tidy_get_body)
  1392. {
  1393. php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_body_node);
  1394. }
  1395. /* }}} */
  1396. /* {{{ proto boolean tidyNode::hasChildren()
  1397. Returns true if this node has children */
  1398. static TIDY_NODE_METHOD(hasChildren)
  1399. {
  1400. TIDY_FETCH_ONLY_OBJECT;
  1401. if (tidyGetChild(obj->node)) {
  1402. RETURN_TRUE;
  1403. } else {
  1404. RETURN_FALSE;
  1405. }
  1406. }
  1407. /* }}} */
  1408. /* {{{ proto boolean tidyNode::hasSiblings()
  1409. Returns true if this node has siblings */
  1410. static TIDY_NODE_METHOD(hasSiblings)
  1411. {
  1412. TIDY_FETCH_ONLY_OBJECT;
  1413. if (obj->node && tidyGetNext(obj->node)) {
  1414. RETURN_TRUE;
  1415. } else {
  1416. RETURN_FALSE;
  1417. }
  1418. }
  1419. /* }}} */
  1420. /* {{{ proto boolean tidyNode::isComment()
  1421. Returns true if this node represents a comment */
  1422. static TIDY_NODE_METHOD(isComment)
  1423. {
  1424. TIDY_FETCH_ONLY_OBJECT;
  1425. if (tidyNodeGetType(obj->node) == TidyNode_Comment) {
  1426. RETURN_TRUE;
  1427. } else {
  1428. RETURN_FALSE;
  1429. }
  1430. }
  1431. /* }}} */
  1432. /* {{{ proto boolean tidyNode::isHtml()
  1433. Returns true if this node is part of a HTML document */
  1434. static TIDY_NODE_METHOD(isHtml)
  1435. {
  1436. TIDY_FETCH_ONLY_OBJECT;
  1437. if (tidyNodeGetType(obj->node) & (TidyNode_Start | TidyNode_End | TidyNode_StartEnd)) {
  1438. RETURN_TRUE;
  1439. }
  1440. RETURN_FALSE;
  1441. }
  1442. /* }}} */
  1443. /* {{{ proto boolean tidyNode::isText()
  1444. Returns true if this node represents text (no markup) */
  1445. static TIDY_NODE_METHOD(isText)
  1446. {
  1447. TIDY_FETCH_ONLY_OBJECT;
  1448. if (tidyNodeGetType(obj->node) == TidyNode_Text) {
  1449. RETURN_TRUE;
  1450. } else {
  1451. RETURN_FALSE;
  1452. }
  1453. }
  1454. /* }}} */
  1455. /* {{{ proto boolean tidyNode::isJste()
  1456. Returns true if this node is JSTE */
  1457. static TIDY_NODE_METHOD(isJste)
  1458. {
  1459. TIDY_FETCH_ONLY_OBJECT;
  1460. if (tidyNodeGetType(obj->node) == TidyNode_Jste) {
  1461. RETURN_TRUE;
  1462. } else {
  1463. RETURN_FALSE;
  1464. }
  1465. }
  1466. /* }}} */
  1467. /* {{{ proto boolean tidyNode::isAsp()
  1468. Returns true if this node is ASP */
  1469. static TIDY_NODE_METHOD(isAsp)
  1470. {
  1471. TIDY_FETCH_ONLY_OBJECT;
  1472. if (tidyNodeGetType(obj->node) == TidyNode_Asp) {
  1473. RETURN_TRUE;
  1474. } else {
  1475. RETURN_FALSE;
  1476. }
  1477. }
  1478. /* }}} */
  1479. /* {{{ proto boolean tidyNode::isPhp()
  1480. Returns true if this node is PHP */
  1481. static TIDY_NODE_METHOD(isPhp)
  1482. {
  1483. TIDY_FETCH_ONLY_OBJECT;
  1484. if (tidyNodeGetType(obj->node) == TidyNode_Php) {
  1485. RETURN_TRUE;
  1486. } else {
  1487. RETURN_FALSE;
  1488. }
  1489. }
  1490. /* }}} */
  1491. /* {{{ proto tidyNode tidyNode::getParent()
  1492. Returns the parent node if available or NULL */
  1493. static TIDY_NODE_METHOD(getParent)
  1494. {
  1495. TidyNode parent_node;
  1496. PHPTidyObj *newobj;
  1497. TIDY_FETCH_ONLY_OBJECT;
  1498. parent_node = tidyGetParent(obj->node);
  1499. if(parent_node) {
  1500. tidy_instanciate(tidy_ce_node, return_value TSRMLS_CC);
  1501. newobj = (PHPTidyObj *) zend_object_store_get_object(return_value TSRMLS_CC);
  1502. newobj->node = parent_node;
  1503. newobj->type = is_node;
  1504. newobj->ptdoc = obj->ptdoc;
  1505. newobj->ptdoc->ref_count++;
  1506. tidy_add_default_properties(newobj, is_node TSRMLS_CC);
  1507. } else {
  1508. ZVAL_NULL(return_value);
  1509. }
  1510. }
  1511. /* }}} */
  1512. /* {{{ proto void tidyNode::__construct()
  1513. __constructor for tidyNode. */
  1514. static TIDY_NODE_METHOD(__construct)
  1515. {
  1516. php_error_docref(NULL TSRMLS_CC, E_ERROR, "You should not create a tidyNode manually");
  1517. }
  1518. /* }}} */
  1519. static void _php_tidy_register_nodetypes(INIT_FUNC_ARGS)
  1520. {
  1521. TIDY_NODE_CONST(ROOT, Root);
  1522. TIDY_NODE_CONST(DOCTYPE, DocType);
  1523. TIDY_NODE_CONST(COMMENT, Comment);
  1524. TIDY_NODE_CONST(PROCINS, ProcIns);
  1525. TIDY_NODE_CONST(TEXT, Text);
  1526. TIDY_NODE_CONST(START, Start);
  1527. TIDY_NODE_CONST(END, End);
  1528. TIDY_NODE_CONST(STARTEND, StartEnd);
  1529. TIDY_NODE_CONST(CDATA, CDATA);
  1530. TIDY_NODE_CONST(SECTION, Section);
  1531. TIDY_NODE_CONST(ASP, Asp);
  1532. TIDY_NODE_CONST(JSTE, Jste);
  1533. TIDY_NODE_CONST(PHP, Php);
  1534. TIDY_NODE_CONST(XMLDECL, XmlDecl);
  1535. }
  1536. static void _php_tidy_register_tags(INIT_FUNC_ARGS)
  1537. {
  1538. TIDY_TAG_CONST(UNKNOWN);
  1539. TIDY_TAG_CONST(A);
  1540. TIDY_TAG_CONST(ABBR);
  1541. TIDY_TAG_CONST(ACRONYM);
  1542. TIDY_TAG_CONST(ADDRESS);
  1543. TIDY_TAG_CONST(ALIGN);
  1544. TIDY_TAG_CONST(APPLET);
  1545. TIDY_TAG_CONST(AREA);
  1546. TIDY_TAG_CONST(B);
  1547. TIDY_TAG_CONST(BASE);
  1548. TIDY_TAG_CONST(BASEFONT);
  1549. TIDY_TAG_CONST(BDO);
  1550. TIDY_TAG_CONST(BGSOUND);
  1551. TIDY_TAG_CONST(BIG);
  1552. TIDY_TAG_CONST(BLINK);
  1553. TIDY_TAG_CONST(BLOCKQUOTE);
  1554. TIDY_TAG_CONST(BODY);
  1555. TIDY_TAG_CONST(BR);
  1556. TIDY_TAG_CONST(BUTTON);
  1557. TIDY_TAG_CONST(CAPTION);
  1558. TIDY_TAG_CONST(CENTER);
  1559. TIDY_TAG_CONST(CITE);
  1560. TIDY_TAG_CONST(CODE);
  1561. TIDY_TAG_CONST(COL);
  1562. TIDY_TAG_CONST(COLGROUP);
  1563. TIDY_TAG_CONST(COMMENT);
  1564. TIDY_TAG_CONST(DD);
  1565. TIDY_TAG_CONST(DEL);
  1566. TIDY_TAG_CONST(DFN);
  1567. TIDY_TAG_CONST(DIR);
  1568. TIDY_TAG_CONST(DIV);
  1569. TIDY_TAG_CONST(DL);
  1570. TIDY_TAG_CONST(DT);
  1571. TIDY_TAG_CONST(EM);
  1572. TIDY_TAG_CONST(EMBED);
  1573. TIDY_TAG_CONST(FIELDSET);
  1574. TIDY_TAG_CONST(FONT);
  1575. TIDY_TAG_CONST(FORM);
  1576. TIDY_TAG_CONST(FRAME);
  1577. TIDY_TAG_CONST(FRAMESET);
  1578. TIDY_TAG_CONST(H1);
  1579. TIDY_TAG_CONST(H2);
  1580. TIDY_TAG_CONST(H3);
  1581. TIDY_TAG_CONST(H4);
  1582. TIDY_TAG_CONST(H5);
  1583. TIDY_TAG_CONST(H6);
  1584. TIDY_TAG_CONST(HEAD);
  1585. TIDY_TAG_CONST(HR);
  1586. TIDY_TAG_CONST(HTML);
  1587. TIDY_TAG_CONST(I);
  1588. TIDY_TAG_CONST(IFRAME);
  1589. TIDY_TAG_CONST(ILAYER);
  1590. TIDY_TAG_CONST(IMG);
  1591. TIDY_TAG_CONST(INPUT);
  1592. TIDY_TAG_CONST(INS);
  1593. TIDY_TAG_CONST(ISINDEX);
  1594. TIDY_TAG_CONST(KBD);
  1595. TIDY_TAG_CONST(KEYGEN);
  1596. TIDY_TAG_CONST(LABEL);
  1597. TIDY_TAG_CONST(LAYER);
  1598. TIDY_TAG_CONST(LEGEND);
  1599. TIDY_TAG_CONST(LI);
  1600. TIDY_TAG_CONST(LINK);
  1601. TIDY_TAG_CONST(LISTING);
  1602. TIDY_TAG_CONST(MAP);
  1603. TIDY_TAG_CONST(MARQUEE);
  1604. TIDY_TAG_CONST(MENU);
  1605. TIDY_TAG_CONST(META);
  1606. TIDY_TAG_CONST(MULTICOL);
  1607. TIDY_TAG_CONST(NOBR);
  1608. TIDY_TAG_CONST(NOEMBED);
  1609. TIDY_TAG_CONST(NOFRAMES);
  1610. TIDY_TAG_CONST(NOLAYER);
  1611. TIDY_TAG_CONST(NOSAVE);
  1612. TIDY_TAG_CONST(NOSCRIPT);
  1613. TIDY_TAG_CONST(OBJECT);
  1614. TIDY_TAG_CONST(OL);
  1615. TIDY_TAG_CONST(OPTGROUP);
  1616. TIDY_TAG_CONST(OPTION);
  1617. TIDY_TAG_CONST(P);
  1618. TIDY_TAG_CONST(PARAM);
  1619. TIDY_TAG_CONST(PLAINTEXT);
  1620. TIDY_TAG_CONST(PRE);
  1621. TIDY_TAG_CONST(Q);
  1622. TIDY_TAG_CONST(RB);
  1623. TIDY_TAG_CONST(RBC);
  1624. TIDY_TAG_CONST(RP);
  1625. TIDY_TAG_CONST(RT);
  1626. TIDY_TAG_CONST(RTC);
  1627. TIDY_TAG_CONST(RUBY);
  1628. TIDY_TAG_CONST(S);
  1629. TIDY_TAG_CONST(SAMP);
  1630. TIDY_TAG_CONST(SCRIPT);
  1631. TIDY_TAG_CONST(SELECT);
  1632. TIDY_TAG_CONST(SERVER);
  1633. TIDY_TAG_CONST(SERVLET);
  1634. TIDY_TAG_CONST(SMALL);
  1635. TIDY_TAG_CONST(SPACER);
  1636. TIDY_TAG_CONST(SPAN);
  1637. TIDY_TAG_CONST(STRIKE);
  1638. TIDY_TAG_CONST(STRONG);
  1639. TIDY_TAG_CONST(STYLE);
  1640. TIDY_TAG_CONST(SUB);
  1641. TIDY_TAG_CONST(SUP);
  1642. TIDY_TAG_CONST(TABLE);
  1643. TIDY_TAG_CONST(TBODY);
  1644. TIDY_TAG_CONST(TD);
  1645. TIDY_TAG_CONST(TEXTAREA);
  1646. TIDY_TAG_CONST(TFOOT);
  1647. TIDY_TAG_CONST(TH);
  1648. TIDY_TAG_CONST(THEAD);
  1649. TIDY_TAG_CONST(TITLE);
  1650. TIDY_TAG_CONST(TR);
  1651. TIDY_TAG_CONST(TT);
  1652. TIDY_TAG_CONST(U);
  1653. TIDY_TAG_CONST(UL);
  1654. TIDY_TAG_CONST(VAR);
  1655. TIDY_TAG_CONST(WBR);
  1656. TIDY_TAG_CONST(XMP);
  1657. }
  1658. #endif
  1659. /*
  1660. * Local variables:
  1661. * tab-width: 4
  1662. * c-basic-offset: 4
  1663. * End:
  1664. * vim600: noet sw=4 ts=4 fdm=marker
  1665. * vim<600: noet sw=4 ts=4
  1666. */